diff --git a/.gitignore b/.gitignore
index 1293051..fb95c5d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,10 +1,12 @@
-**/__pycache__
-**/.ipynb_checkpoints
-**/.pytest_cache
-**/.DS_Store
-**/*.pyc
+__pycache__
+.ipynb_checkpoints
+.pytest_cache
+.DS_Store
+*.pyc
 docs/_build
 build
 dist
 .idea
+.vscode
 src/kyupy.egg-info
+*nogit*
diff --git a/Demo.ipynb b/Demo.ipynb
deleted file mode 100644
index 6ba4c1f..0000000
--- a/Demo.ipynb
+++ /dev/null
@@ -1,1348 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Loading and Exploring Gate-Level Circuits"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Example of parsing the bench data format to make simple gate-level circuits."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from kyupy import bench\n",
-    "\n",
-    "# load a file\n",
-    "b01 = bench.load('tests/b01.bench')\n",
-    "\n",
-    "# ... or specify the circuit as string \n",
-    "mycircuit = bench.parse('input(a,b) output(o1,o2,o3) x=buf(a) o1=not(x) o2=buf(x) o3=buf(x)')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Circuits are objects of the class `Circuit`."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "<Circuit tests/b01.bench cells=45 forks=47 lines=130 ports=4>"
-      ]
-     },
-     "execution_count": 2,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "b01"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "<Circuit cells=4 forks=6 lines=8 ports=5>"
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "mycircuit"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Circuits are containers for two types of elements: nodes and lines.\n",
-    "* A `Node` is a named entity in a circuit (e.g. a gate, a standard cell, a named signal, or a fan-out point) that has connections to other nodes.\n",
-    "* A `Line` is a directional 1:1 connection between two Nodes.\n",
-    "\n",
-    "Use the `dump()` method to get a string representation of all nodes and their connections."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "None(0,1,2,3,4)\n",
-      "0:__fork__\"a\"  >1\n",
-      "1:__fork__\"b\"  \n",
-      "2:__fork__\"o1\" <2 \n",
-      "3:__fork__\"o2\" <4 \n",
-      "4:__fork__\"o3\" <6 \n",
-      "5:buf\"x\" <1 >0\n",
-      "6:__fork__\"x\" <0 >3 >5 >7\n",
-      "7:not\"o1\" <3 >2\n",
-      "8:buf\"o2\" <5 >4\n",
-      "9:buf\"o3\" <7 >6\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(mycircuit.dump())"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "The first line of the dump starts with the circuit name (\"None\" for `mycircuit`), followed by the node-IDs of all the ports (inputs and outputs) of the circuit.\n",
-    "\n",
-    "Each of the following lines describes one node.\n",
-    "Each node in the circuit has a unique ID, a type, a name, and line-connections. This information is given on each line in that order.\n",
-    "\n",
-    "A line in the circuit has a unique ID, a driver node and a receiver node. The connections in the dump show the direction (\">\" for output, \"<\" for input) and the line-ID. For example in `mycircuit`: Node-0 has one output connected to Line-1, and this Line-1 is connected to the input of Node-5.\n",
-    "\n",
-    "The `interface` is the list of nodes forming the ports (inputs and outputs):"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[0:__fork__\"a\"  >1,\n",
-       " 1:__fork__\"b\"  ,\n",
-       " 2:__fork__\"o1\" <2 ,\n",
-       " 3:__fork__\"o2\" <4 ,\n",
-       " 4:__fork__\"o3\" <6 ]"
-      ]
-     },
-     "execution_count": 5,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "mycircuit.interface"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Nodes\n",
-    "\n",
-    "There are two types of nodes: __forks__ and __cells__.\n",
-    "\n",
-    "Forks have the special type `__fork__` while cells can be of various types (`buf`, `not`, `and`, `nor`, etc.).\n",
-    "Forks are used to label signals with names and to connect a one cell to multiple other cells (fan-out).\n",
-    "The names among all forks and among all cells within a circuit are unique.\n",
-    "Thus, a fork and a cell are allowed to share the same name.\n",
-    "\n",
-    "Nodes in circuits can be accessed by ID or by name."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "7:not\"o1\" <3 >2"
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "mycircuit.nodes[7]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "6:__fork__\"x\" <0 >3 >5 >7"
-      ]
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "mycircuit.forks['x']"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "5:buf\"x\" <1 >0"
-      ]
-     },
-     "execution_count": 8,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "mycircuit.cells['x']"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Nodes have an `index` (the node ID), a `kind` (the type), a `name`, as well as `ins` (input pins) and `outs` (output pins)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(6, '__fork__', 'x', [0], [3, 5, 7])"
-      ]
-     },
-     "execution_count": 9,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "n = mycircuit.nodes[6]\n",
-    "n.index, n.kind, n.name, n.ins, n.outs"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "The inputs and outputs of a node are lists containing `Line` objects."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "kyupy.circuit.Line"
-      ]
-     },
-     "execution_count": 10,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "type(n.ins[0])"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Lines\n",
-    "\n",
-    "A line is a directional connection between one driving node (`driver`) and one reading node (`reader`).\n",
-    "\n",
-    "A line also knows to which node pins it is connected to: `driver_pin`, `reader_pin`."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(5, 6:__fork__\"x\" <0 >3 >5 >7, 8:buf\"o2\" <5 >4, 1, 0)"
-      ]
-     },
-     "execution_count": 11,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "l = mycircuit.nodes[6].outs[1]\n",
-    "l.index, l.driver, l.reader, l.driver_pin, l.reader_pin"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Basic Analysis Examples\n",
-    "### Cell type statistics"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "defaultdict(<class 'int'>, {'DFF': 5, 'AND': 1, 'NAND': 28, 'OR': 1, 'NOT': 10})\n"
-     ]
-    }
-   ],
-   "source": [
-    "from collections import defaultdict\n",
-    "\n",
-    "counts = defaultdict(int)\n",
-    "\n",
-    "for n in b01.cells.values():\n",
-    "    counts[n.kind] += 1\n",
-    "\n",
-    "print(counts)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Tracing a scan chain"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "<Circuit b14 cells=15873 forks=15842 lines=46891 ports=91>"
-      ]
-     },
-     "execution_count": 13,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "from kyupy import verilog\n",
-    "\n",
-    "b14 = verilog.load('tests/b14.v.gz')\n",
-    "b14"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {
-    "scrolled": false
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "chain length 287\n",
-      "output test_so000\n",
-      "NBUFFX8_RVT HFSBUF_36_76\n",
-      "SDFFARX1_RVT wr_reg\n",
-      "INVX4_RVT HFSINV_691_254\n",
-      "INVX0_RVT HFSINV_2682_255\n",
-      "SDFFARX1_RVT state_reg\n",
-      "NBUFFX2_RVT ZBUF_55_inst_860\n",
-      "SDFFARX1_RVT reg3_reg_28_\n",
-      "SDFFARX1_RVT reg3_reg_27_\n",
-      "SDFFARX1_RVT reg3_reg_26_\n",
-      "...\n",
-      "NBUFFX2_RVT ZBUF_1656_inst_2160\n",
-      "SDFFARX1_RVT IR_reg_3_\n",
-      "NBUFFX2_RVT ZBUF_85_inst_865\n",
-      "SDFFARX1_RVT IR_reg_2_\n",
-      "SDFFARX1_RVT IR_reg_1_\n",
-      "SDFFARX1_RVT IR_reg_0_\n",
-      "NBUFFX2_RVT ZBUF_17_inst_905\n",
-      "NBUFFX4_RVT ZBUF_275_inst_906\n",
-      "SDFFARX1_RVT B_reg\n",
-      "input test_si000\n"
-     ]
-    }
-   ],
-   "source": [
-    "chain = []\n",
-    "cell = b14.cells['test_so000']\n",
-    "chain.append(cell)\n",
-    "while len(cell.ins) > 0:\n",
-    "    cell = cell.ins[2 if 'SDFF' in cell.kind else 0].driver\n",
-    "    if '__fork__' not in cell.kind:\n",
-    "        chain.append(cell)\n",
-    "        \n",
-    "print('chain length', len(chain))\n",
-    "for c in chain[:10]:\n",
-    "    print(c.kind, c.name)\n",
-    "print('...')\n",
-    "for c in chain[-10:]:\n",
-    "    print(c.kind, c.name)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Determining Logic Depth of Nodes"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "<Circuit b14 cells=15873 forks=15842 lines=46891 ports=91>"
-      ]
-     },
-     "execution_count": 15,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "from kyupy import verilog\n",
-    "\n",
-    "b14 = verilog.load('tests/b14.v.gz')\n",
-    "b14"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Calculate logic level (logic depth, distance from inputs or scan flip-flops) for each node in the circuit.\n",
-    "Inputs and flip-flops themselves are level 0, **cells** driven by just inputs and flip-flops are level 1, and so on.\n",
-    "**Fork** nodes have the same level as their driver, because they do not increase the logic depth."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Maximum logic depth: 112\n"
-     ]
-    }
-   ],
-   "source": [
-    "import numpy as np\n",
-    "\n",
-    "levels = np.zeros(len(b14.nodes), dtype='uint16')  # store level for each node.\n",
-    "\n",
-    "for cell in b14.topological_order():\n",
-    "    if 'DFF' in cell.kind or 'input' == cell.kind:\n",
-    "        levels[cell] = 0\n",
-    "    elif '__fork__' == cell.kind:\n",
-    "        levels[cell] = levels[cell.ins[0].driver]  # forks only have exactly one driver\n",
-    "    else:\n",
-    "        levels[cell] = max([levels[line.driver] for line in cell.ins]) + 1\n",
-    "        \n",
-    "print(f'Maximum logic depth: {np.max(levels)}')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "List nodes with the highest depth and which nodes they are driving."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "depth: 112 node: __fork__     n2692  driving: SDFFARX1_RVT reg1_reg_29_  \n",
-      "depth: 112 node: NAND2X0_RVT  U465   driving: __fork__     n2692         \n",
-      "depth: 112 node: NAND2X0_RVT  U562   driving: __fork__     n2724         \n",
-      "depth: 112 node: __fork__     n2724  driving: SDFFARX1_RVT reg0_reg_29_  \n",
-      "depth: 112 node: __fork__     n2608  driving: SDFFARX1_RVT B_reg         \n",
-      "depth: 112 node: NAND2X0_RVT  U170   driving: __fork__     n2608         \n",
-      "depth: 111 node: NAND2X0_RVT  U5550  driving: __fork__     n2693         \n",
-      "depth: 111 node: __fork__     n2660  driving: SDFFARX1_RVT reg2_reg_29_  \n",
-      "depth: 111 node: AND2X2_RVT   U5560  driving: __fork__     n2660         \n",
-      "depth: 111 node: __fork__     n2725  driving: SDFFARX1_RVT reg0_reg_28_  \n",
-      "depth: 111 node: __fork__     n2693  driving: SDFFARX1_RVT reg1_reg_28_  \n",
-      "depth: 111 node: __fork__     n362   driving: NAND2X0_RVT  U170          \n",
-      "depth: 111 node: NAND2X0_RVT  U173   driving: __fork__     n362          \n",
-      "depth: 111 node: __fork__     n600   driving: NAND2X0_RVT  U562          \n",
-      "depth: 111 node: NAND2X0_RVT  U563   driving: __fork__     n600          \n",
-      "depth: 111 node: NAND2X0_RVT  U565   driving: __fork__     n2725         \n",
-      "depth: 111 node: NAND2X0_RVT  U466   driving: __fork__     n535          \n",
-      "depth: 111 node: __fork__     n535   driving: NAND2X0_RVT  U465          \n",
-      "depth: 110 node: __fork__     n4691  driving: AND2X2_RVT   U5560         \n",
-      "depth: 110 node: NAND2X0_RVT  U5736  driving: __fork__     n790          \n"
-     ]
-    }
-   ],
-   "source": [
-    "nodes_by_depth = np.argsort(levels)[::-1]\n",
-    "\n",
-    "for n_idx in nodes_by_depth[:20]:\n",
-    "    n = b14.nodes[n_idx]\n",
-    "    readers = ', '.join([f'{l.reader.kind:12s} {l.reader.name:14s}' for l in n.outs])\n",
-    "    print(f'depth: {levels[n_idx]} node: {n.kind:12s} {n.name:6s} driving: {readers}')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Working With Test Data and Logic Simulation"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Load a stuck-at fault test pattern set and expected fault-free responses from a STIL file."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from kyupy import verilog, stil\n",
-    "from kyupy.logic import MVArray, BPArray\n",
-    "from kyupy.logic_sim import LogicSim\n",
-    "\n",
-    "b14 = verilog.load('tests/b14.v.gz')\n",
-    "s = stil.load('tests/b14.stuck.stil.gz')\n",
-    "stuck_tests = s.tests(b14)\n",
-    "stuck_responses = s.responses(b14)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Tests and responses are instances of `MVArray`. Its `length` is the number of test vectors stored, its `width` is the number of values in a vector. By default, the stil parser returns 8-valued test vectors (`m=8`)."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "<MVArray length=1081 width=306 m=8 mem=323.0kiB>"
-      ]
-     },
-     "execution_count": 19,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "stuck_tests"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "The internal storage (an `ndarray` of `uint8`) is accessible via `data`. The first axis is the width, and the last axis goes along the test set."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(306, 1081)"
-      ]
-     },
-     "execution_count": 20,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "stuck_tests.data.shape"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "The subscript accessor returns a string representation of the given test vector number. Possible values are '0', '1', '-', 'X', 'R', 'F', 'P', and 'N'."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'P0--------------------11011111011001100111010101011101----------------------------------00-10111011010110011101110010111010111011101100010000110101111111011010101001010101010101010101001010110101001010101010101010110100000111111111111111011010100100101010010010101101010101001010100111010001010010000011100'"
-      ]
-     },
-     "execution_count": 21,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "stuck_tests[1]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'--10000010010100010111--------------------------------0101010010101010110101001001010100--011111110011011111000111010101010111011101100010000110101111111011010101001010101010101010101001010110101001010101010101010110100000111111111111111011010100100101010010010101101010101001010101000111111111111111011101'"
-      ]
-     },
-     "execution_count": 22,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "stuck_responses[1]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "The order of values in the vectors correspond to the circuit's interface followed by the scan flip-flops as they appear in `b14.cells`.\n",
-    "The test data can be used directly in the simulators as they use the same ordering convention.\n",
-    "\n",
-    "The logic simulator uses bit-parallel storage of logic values, but our loaded test data uses one `uint8` per logic value.\n",
-    "To convert the storage layout, we instanciate a `BPArray` for the input stimuli.\n",
-    "The storage layout is more compact, but individual values cannot be easily accessed anymore."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 23,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "<BPArray length=1081 width=306 m=8 mem=121.9kiB>"
-      ]
-     },
-     "execution_count": 23,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "stuck_tests_bp = BPArray(stuck_tests)\n",
-    "stuck_tests_bp"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 24,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(306, 3, 136)"
-      ]
-     },
-     "execution_count": 24,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "stuck_tests_bp.data.shape"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "The following code performs a 8-valued logic simulation and stores the results in a new instance of `BPArray`.\n",
-    "The packed array is unpacked into an `MVArray` for value access."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 25,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "responses_bp = BPArray((stuck_tests_bp.width, len(stuck_tests_bp)))\n",
-    "simulator = LogicSim(b14, sims=len(stuck_tests_bp))\n",
-    "simulator.assign(stuck_tests_bp)\n",
-    "simulator.propagate()\n",
-    "simulator.capture(responses_bp)\n",
-    "responses = MVArray(responses_bp)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 26,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'--10000010010100010111--------------------------------0101010010101010110101001001010100--011111110011011111000111010101010111011101100010000110101111111011010101001010101010101010101001010110101001010101010101010110100000111111111111111011010100100101010010010101101010101001010101000111111111111111011101'"
-      ]
-     },
-     "execution_count": 26,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "responses[1]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Compare simulation results to expected fault-free responses loaded from STIL. The first test fails, because it is a flush test while simulation implicitly assumes a standard test with a capture clock."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 27,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "mismatch for test pattern 0\n",
-      "1080 of 1081 responses matched with simulator\n"
-     ]
-    }
-   ],
-   "source": [
-    "matches = 0\n",
-    "for i in range(len(responses)):\n",
-    "    if responses[i] == stuck_responses[i]:\n",
-    "        matches += 1\n",
-    "    else:\n",
-    "        print(f'mismatch for test pattern {i}')\n",
-    "print(f'{matches} of {len(responses)} responses matched with simulator')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Transition faults require test vector pairs for testing. These pairs are generated by `tests_loc`, assuming a launch-on-capture scheme (two functional clock cycles after scan-in)."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 28,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "s = stil.load('tests/b14.transition.stil.gz')\n",
-    "trans_tests = s.tests_loc(b14)\n",
-    "trans_responses = s.responses(b14)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 29,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "<MVArray length=1392 width=306 m=8 mem=416.0kiB>"
-      ]
-     },
-     "execution_count": 29,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "trans_tests"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Possible values in the string representation are: '0', '1', '-', 'X', 'R' (rising transition), 'F' (falling transition), 'P' (positive pulse(s), 010), 'N' (negative pulse(s), 101)."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 30,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'00--------------------RRRRRRFRRRRRRRRRRRFFRFRRRRRRRRRR----------------------------------00-00000001110100011111011010000000000000000011001001100101111110101110110001000100010100110111111101101000000111110011100010111000111R1111111111111111111111110001100100000110100000111010101110RFF00F000F0F00F00000FF01F'"
-      ]
-     },
-     "execution_count": 30,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "trans_tests[1]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "We validate these patterns with an 8-valued logic simulation"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 31,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "trans_tests_bp = BPArray(trans_tests)\n",
-    "responses_bp = BPArray((trans_tests_bp.width, len(trans_tests_bp)))\n",
-    "simulator = LogicSim(b14, sims=len(trans_tests_bp))\n",
-    "simulator.assign(trans_tests_bp)\n",
-    "simulator.propagate()\n",
-    "simulator.capture(responses_bp)\n",
-    "responses = MVArray(responses_bp)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 32,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'--F00000F00F0F000F00FF--------------------------------01110101011100000101100000100110R0--0RRRRRRRNNNRNRPRNNNNNRFFRFRRRRRRR000000000011001001100101111110101110110001000100010100110111111101101000000111110011100010111000NNNNNNNNNNNNNNNNNNNNNNNNNNNNP0011001000001101000001110101011101RRRRRRRRRRRRRRRRRRRRP01R'"
-      ]
-     },
-     "execution_count": 32,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "responses[1]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "The responses loaded from STIL only contain the final logic values. Use simple character replacements before comparing these. First test is again a flush test."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 33,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "mismatch for test pattern 0\n",
-      "1391 of 1392 responses matched with simulator\n"
-     ]
-    }
-   ],
-   "source": [
-    "matches = 0\n",
-    "for i in range(len(responses)):\n",
-    "    if trans_responses[i] == responses[i].replace('P','0').replace('N','1').replace('R','1').replace('F','0'):\n",
-    "        matches += 1\n",
-    "    else:\n",
-    "        print(f'mismatch for test pattern {i}')\n",
-    "print(f'{matches} of {len(responses)} responses matched with simulator')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Working With Delay Information and Timing Simulation"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Delay data for gates and interconnect can be loaded from SDF files. In kyupy's timing simulators, delays are associated with the lines between nodes, not with the nodes themselves. Each line in the circuit has a rising delay, a falling delay, a negative pulse threshold, and a positive pulse threshold. "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 34,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from kyupy import sdf\n",
-    "\n",
-    "df = sdf.load('tests/b14.sdf.gz')\n",
-    "lt = df.annotation(b14, dataset=0, interconnect=False)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "The returned delay information is an `ndarray` with a set of delay values for each line in the circuit."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 35,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(46891, 2, 2)"
-      ]
-     },
-     "execution_count": 35,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "lt.shape"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Number of non-0 values loaded:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 36,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "120628"
-      ]
-     },
-     "execution_count": 36,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "(lt != 0).sum()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "The available timing simulators are `WaveSim` and `WaveSimCuda`.\n",
-    "They work similarly to `LogicSim` in that they evaluate all cells in topological order.\n",
-    "Instead of propagating a logic value, however, they propagate waveforms.\n",
-    "\n",
-    "`WaveSim` uses the numba just-in-time compiler for acceleration on CPU.\n",
-    "It falls back to pure python if numba is not available. `WaveSimCuda` uses numba for GPU acceleration.\n",
-    "If no CUDA card is available, it will fall back to pure python (not jit-compiled for CPU!).\n",
-    "Pure python is too slow for most purposes.\n",
-    "\n",
-    "Both simulators operate data-parallel.\n",
-    "The following instanciates a new engine for 32 independent timing simulations and each signal line in the circuit can carry at most 16 transitions. All simulators share the same circuit and the same line delay specification."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 37,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from kyupy.wave_sim import WaveSimCuda, TMAX\n",
-    "import numpy as np\n",
-    "\n",
-    "wsim = WaveSimCuda(b14, lt, sims=32, wavecaps=16)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "These are various memories allocated, with waveforms usually being the largest. "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 38,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Waveforms              : 93908.5 kiB\n",
-      "State Allocation Table : 1113.4 kiB\n",
-      "Circuit Timing         : 1484.5 kiB\n",
-      "Circuit Netlist        : 732.7 kiB\n",
-      "Capture Data           : 267.8 kiB\n",
-      "Test Stimuli Data      : 3.6 kiB\n"
-     ]
-    }
-   ],
-   "source": [
-    "def print_mem(name, arr):\n",
-    "    print(f'{name}: {arr.size * arr.itemsize / 1024:.1f} kiB')\n",
-    "    \n",
-    "print_mem('Waveforms              ', wsim.state)\n",
-    "print_mem('State Allocation Table ', wsim.sat)\n",
-    "print_mem('Circuit Timing         ', wsim.timing)\n",
-    "print_mem('Circuit Netlist        ', wsim.ops)\n",
-    "print_mem('Capture Data           ', wsim.cdata)\n",
-    "print_mem('Test Stimuli Data      ', wsim.tdata)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "This is a typical simulation loop where the number of patterns is larger than the number of simulators available.\n",
-    "We simulate `trans_tests_bp`.\n",
-    "The timing simulator accepts 8-valued `BPArray`s, but it will return response (capture) data in a different format."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 39,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "sims = 128  # len(trans_tests_bp)  # Feel free to simulate all tests if CUDA is set up correctly.\n",
-    "\n",
-    "cdata = np.zeros((len(wsim.interface), sims, 7))  # space to store all capture data\n",
-    "\n",
-    "for offset in range(0, sims, wsim.sims):\n",
-    "    wsim.assign(trans_tests_bp, offset=offset)\n",
-    "    wsim.propagate(sims=sims-offset)\n",
-    "    wsim.capture(time=2.5, cdata=cdata, offset=offset)  # capture at time 2.5"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "The capture data contains for each PI, PO, and scan flip-flop (axis 0), and each test (axis 1) seven values:\n",
-    "\n",
-    "0. Probability of capturing a 1 at the given capture time (same as next value, if no standard deviation given).\n",
-    "1. A capture value decided by random sampling according to above probability.\n",
-    "2. The final value (assume a very late capture time).\n",
-    "3. True, if there was a premature capture (capture error), i.e. final value is different from captured value.\n",
-    "4. Earliest arrival time. The time at which the output transitioned from its initial value.\n",
-    "5. Latest stabilization time. The time at which the output transitioned to its final value.\n",
-    "6. Overflow indicator. If non-zero, some signals in the input cone of this output had more transitions than specified in `wavecaps`. Some transitions have been discarded, the final values in the waveforms are still valid."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 40,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(306, 128, 7)"
-      ]
-     },
-     "execution_count": 40,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "cdata.shape"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "For validating against known logic values, take `cdata[...,1]`."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 41,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "mismatch for test pattern 0\n",
-      "127 of 128 responses matched with simulator\n"
-     ]
-    }
-   ],
-   "source": [
-    "matches = 0\n",
-    "\n",
-    "for i in range(cdata.shape[1]):\n",
-    "    response = ''.join('1' if x > 0.5 else '0' for x in cdata[..., i, 1])\n",
-    "    if trans_responses[i].replace('-','0') == response:\n",
-    "        matches += 1\n",
-    "    else:\n",
-    "        print(f'mismatch for test pattern {i}')\n",
-    "print(f'{matches} of {cdata.shape[1]} responses matched with simulator')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "The circuit delay is the maximum among all latest stabilization times:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 42,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "2.17240047454834"
-      ]
-     },
-     "execution_count": 42,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "cdata[...,5].max()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Check for overflows. If too many of them occur, increase `wavecaps` during engine instanciation:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 43,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "2.0"
-      ]
-     },
-     "execution_count": 43,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "cdata[...,6].sum()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Check for capture failures:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 44,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.0"
-      ]
-     },
-     "execution_count": 44,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "cdata[...,3].sum()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# CUDA Support Notes\n",
-    "\n",
-    "Try this code to check if CUDA is set up correctly.\n",
-    "\n",
-    "If there is an error related to `nvvm`, you probably need to set up some environment variables:\n",
-    "```\n",
-    "%env LD_LIBRARY_PATH=/usr/local/cuda/lib64\n",
-    "%env CUDA_HOME=/usr/local/cuda\n",
-    "```\n",
-    "If problems persist, refer to documentations for numba and cuda. "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 45,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Found 2 CUDA devices\n",
-      "id 0    b'NVIDIA GeForce RTX 3090'                              [SUPPORTED]\n",
-      "                      compute capability: 8.6\n",
-      "                           pci device id: 0\n",
-      "                              pci bus id: 3\n",
-      "id 1       b'NVIDIA TITAN V'                              [SUPPORTED]\n",
-      "                      compute capability: 7.0\n",
-      "                           pci device id: 0\n",
-      "                              pci bus id: 2\n",
-      "Summary:\n",
-      "\t2/2 devices are supported\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "True"
-      ]
-     },
-     "execution_count": 45,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "from numba import cuda\n",
-    "\n",
-    "cuda.detect()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "worker",
-   "language": "python",
-   "name": "worker"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.13"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/LICENSE.txt b/LICENSE.txt
index c0da9ca..8e73798 100644
--- a/LICENSE.txt
+++ b/LICENSE.txt
@@ -1,6 +1,6 @@
 MIT License
 
-Copyright (c) 2020-2022 Stefan Holst
+Copyright (c) 2020-2023 Stefan Holst
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
diff --git a/MANIFEST.in b/MANIFEST.in
index 11cee4f..cb81f10 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,5 +1,5 @@
-include *.ipynb
 include *.txt
+recursive-include examples *.ipynb
 recursive-include tests *.bench
 recursive-include tests *.gz
 recursive-include tests *.py
diff --git a/README.rst b/README.rst
index 739e7b8..a00df1b 100644
--- a/README.rst
+++ b/README.rst
@@ -6,7 +6,7 @@ It contains fundamental building blocks for research software in the fields of V
 
 * Efficient data structures for gate-level circuits and related design data.
 * Partial `lark <https://github.com/lark-parser/lark>`_ parsers for common design files like
-  bench, gate-level verilog, standard delay format (SDF), standard test interface language (STIL).
+  bench, gate-level Verilog, standard delay format (SDF), standard test interface language (STIL), design exchange format (DEF).
 * Bit-parallel gate-level 2-, 4-, and 8-valued logic simulation.
 * GPU-accelerated high-throughput gate-level timing simulation.
 * High-performance through the use of `numpy <https://numpy.org>`_ and `numba <https://numba.pydata.org>`_.
@@ -16,13 +16,17 @@ Getting Started
 ---------------
 
 KyuPy is available in `PyPI <https://pypi.org/project/kyupy>`_.
-It requires Python 3.6 or newer, `lark-parser <https://pypi.org/project/lark-parser>`_, and `numpy`_.
+It requires Python 3.8 or newer, `lark-parser <https://pypi.org/project/lark-parser>`_, and `numpy`_.
 Although optional, `numba`_ should be installed for best performance.
-GPU/CUDA support in numba may `require some additional setup <https://numba.pydata.org/numba-doc/latest/cuda/index.html>`_.
+GPU/CUDA support in numba may `require some additional setup <https://numba.readthedocs.io/en/stable/cuda/index.html>`_.
 If numba is not available, KyuPy will automatically fall back to slow, pure Python execution.
 
-The Jupyter Notebook `Demo.ipynb <https://github.com/s-holst/kyupy/blob/main/Demo.ipynb>`_ contains some useful examples to get familiar with the API.
+The Jupyter Notebook `Introduction.ipynb <https://github.com/s-holst/kyupy/blob/main/examples/Introduction.ipynb>`_ contains some useful examples to get familiar with the API.
+
+
+Development
+-----------
 
 To work with the latest pre-release source code, clone the `KyuPy GitHub repository <https://github.com/s-holst/kyupy>`_.
-Run ``pip3 install --user -e .`` within your local checkout to make the package available in your Python environment.
+Run ``pip install -e .`` within your local checkout to make the package available in your Python environment.
 The source code comes with tests that can be run with ``pytest``.
diff --git a/docs/Makefile b/docs/Makefile
index d4bb2cb..0f632bf 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -1,3 +1,5 @@
+# pip install sphinx sphinx-rtd-theme
+#
 # Minimal makefile for Sphinx documentation
 #
 
diff --git a/docs/circuit.rst b/docs/circuit.rst
new file mode 100644
index 0000000..1d1ff31
--- /dev/null
+++ b/docs/circuit.rst
@@ -0,0 +1,13 @@
+Circuit Graph - :mod:`kyupy.circuit`
+====================================
+
+.. automodule:: kyupy.circuit
+
+.. autoclass:: kyupy.circuit.Node
+   :members:
+
+.. autoclass:: kyupy.circuit.Line
+   :members:
+
+.. autoclass:: kyupy.circuit.Circuit
+   :members:
\ No newline at end of file
diff --git a/docs/conf.py b/docs/conf.py
index 540783b..e50d1bf 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -20,11 +20,11 @@ sys.path.insert(0, os.path.abspath('../src'))
 # -- Project information -----------------------------------------------------
 
 project = 'KyuPy'
-copyright = '2020-2021, Stefan Holst'
+copyright = '2020-2023, Stefan Holst'
 author = 'Stefan Holst'
 
 # The full version, including alpha/beta/rc tags
-release = '0.0.3'
+release = '0.0.4'
 
 
 # -- General configuration ---------------------------------------------------
diff --git a/docs/datastructures.rst b/docs/datastructures.rst
deleted file mode 100644
index 026ded9..0000000
--- a/docs/datastructures.rst
+++ /dev/null
@@ -1,29 +0,0 @@
-Data Structures
-===============
-
-KyuPy provides two types of core data structures, one for gate-level circuits, and a few others for representing and storing logic data and signal values.
-The data structures are designed to work together nicely with numpy arrays.
-For example, all the nodes and connections in the circuit graph have consecutive integer indices that can be used to access ndarrays with associated data.
-Circuit graphs also define an ordering of inputs, outputs and other nodes to easily process test vector data and alike.
-
-Circuit Graph - :mod:`kyupy.circuit`
-------------------------------------
-
-.. automodule:: kyupy.circuit
-
-.. autoclass:: kyupy.circuit.Node
-   :members:
-
-.. autoclass:: kyupy.circuit.Line
-   :members:
-
-.. autoclass:: kyupy.circuit.Circuit
-   :members:
-
-Multi-Valued Logic - :mod:`kyupy.logic`
----------------------------------------
-
-.. automodule:: kyupy.logic
-   :members:
-
-
diff --git a/docs/index.rst b/docs/index.rst
index 3fc74e8..06f085d 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -4,9 +4,11 @@ API Reference
 -------------
 
 .. toctree::
-   :maxdepth: 2
+   :maxdepth: 1
 
-   datastructures
+   circuit
+   logic
+   techlib
    parsers
    simulators
    miscellaneous
diff --git a/docs/logic.rst b/docs/logic.rst
new file mode 100644
index 0000000..4f9fac3
--- /dev/null
+++ b/docs/logic.rst
@@ -0,0 +1,7 @@
+Multi-Valued Logic - :mod:`kyupy.logic`
+=======================================
+
+.. automodule:: kyupy.logic
+   :members:
+
+
diff --git a/docs/miscellaneous.rst b/docs/miscellaneous.rst
index fff469f..f49363b 100644
--- a/docs/miscellaneous.rst
+++ b/docs/miscellaneous.rst
@@ -4,7 +4,3 @@ Miscellaneous
 .. automodule:: kyupy
    :members:
 
-.. automodule:: kyupy.techlib
-   :members:
-
-
diff --git a/docs/parsers.rst b/docs/parsers.rst
index 5dac7f8..920206e 100644
--- a/docs/parsers.rst
+++ b/docs/parsers.rst
@@ -40,3 +40,12 @@ Standard Delay Format - :mod:`kyupy.sdf`
 
 .. autoclass:: kyupy.sdf.DelayFile
    :members:
+
+Design Exchange Format - :mod:`kyupy.def_file`
+----------------------------------------------
+
+.. automodule:: kyupy.def_file
+   :members: parse, load
+
+.. autoclass:: kyupy.def_file.DefFile
+   :members:
diff --git a/docs/simulators.rst b/docs/simulators.rst
index bcc0ea4..44360f7 100644
--- a/docs/simulators.rst
+++ b/docs/simulators.rst
@@ -1,6 +1,11 @@
 Simulators
 ==========
 
+KyuPy's simulators are optimized for cells with at most 4 inputs and 1 output.
+
+More complex cells must be mapped to simulation primitives first.
+
+
 Logic Simulation - :mod:`kyupy.logic_sim`
 -----------------------------------------
 
diff --git a/docs/techlib.rst b/docs/techlib.rst
new file mode 100644
index 0000000..8e23b2d
--- /dev/null
+++ b/docs/techlib.rst
@@ -0,0 +1,7 @@
+Technology Libraries
+====================
+
+.. automodule:: kyupy.techlib
+   :members:
+
+
diff --git a/examples/Introduction.ipynb b/examples/Introduction.ipynb
new file mode 100644
index 0000000..2f0a352
--- /dev/null
+++ b/examples/Introduction.ipynb
@@ -0,0 +1,3502 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# KyuPy Introduction\n",
+    "\n",
+    "This notebook introduces KyuPy's basic data structures and built-in functions step-by-step.\n",
+    "\n",
+    "## Working With Gate-Level Circuit Structures\n",
+    "\n",
+    "KyuPy has two parser modules:\n",
+    "\n",
+    "* `kyupy.bench`: The [ISCAS'89 Benchmark Format](https://www.researchgate.net/profile/Franc-Brglez/publication/224723140_Combination_profiles_of_sequential_benchmark_circuits) \".bench\"\n",
+    "* `kyupy.verilog`: Non-hierarchical gate-level verilog\n",
+    "\n",
+    "Files can be loaded using `.load(file)`, strings can be parsed using `.parse(text)`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from kyupy import bench, verilog\n",
+    "from kyupy.techlib import SAED32, GSC180\n",
+    "\n",
+    "# load a file\n",
+    "b15 = verilog.load('../tests/b15_2ig.v.gz', tlib=SAED32)\n",
+    "\n",
+    "# ... or specify the circuit as string \n",
+    "adder = verilog.parse('''\n",
+    "module adder(clk, a, b, s);\n",
+    "    input clk, a, b;\n",
+    "    output s;\n",
+    "    wire cin, cout;\n",
+    "    DFFX1 carry (.D(cout), .CK(clk), .Q(cin));\n",
+    "    ADDFX1 adder (.A(a), .B(b), .CI(cin), .CO(cout), .S(s));\n",
+    "endmodule\n",
+    "''', tlib=GSC180)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "They return KyuPy's intermediate prepresentation of the circuit graph (objects of class `kyupy.circuit.Circuit`):"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "kyupy.circuit.Circuit"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "type(b15)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{name: \"b15\", cells: 10789, forks: 10749, lines: 32032, io_nodes: 111}"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "b15"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{name: \"adder\", cells: 6, forks: 6, lines: 12, io_nodes: 4}"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adder"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The `.stats` property returns a dictionary with more detailed statistics on the elements in the circuit."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'__node__': 12,\n",
+       " '__cell__': 6,\n",
+       " '__fork__': 6,\n",
+       " '__io__': 4,\n",
+       " '__line__': 12,\n",
+       " 'DFFX1': 1,\n",
+       " '__dff__': 1,\n",
+       " 'ADDFX1': 1,\n",
+       " '__comb__': 1,\n",
+       " 'input': 3,\n",
+       " 'output': 1,\n",
+       " '__latch__': 0,\n",
+       " '__seq__': 1}"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adder.stats"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'__node__': 21538,\n",
+       " '__cell__': 10789,\n",
+       " '__fork__': 10749,\n",
+       " '__io__': 111,\n",
+       " '__line__': 32032,\n",
+       " 'TIEH_RVT': 1,\n",
+       " '__comb__': 10261,\n",
+       " 'NBUFFX4_RVT': 114,\n",
+       " 'NBUFFX2_RVT': 371,\n",
+       " 'INVX2_RVT': 27,\n",
+       " 'NBUFFX8_RVT': 40,\n",
+       " 'INVX0_RVT': 769,\n",
+       " 'AND2X1_RVT': 996,\n",
+       " 'OR2X1_RVT': 1087,\n",
+       " 'OR2X2_RVT': 8,\n",
+       " 'INVX8_RVT': 30,\n",
+       " 'NOR2X2_RVT': 20,\n",
+       " 'INVX4_RVT': 36,\n",
+       " 'AND2X2_RVT': 50,\n",
+       " 'SDFFARX1_RVT': 412,\n",
+       " '__dff__': 417,\n",
+       " 'NAND2X0_RVT': 6596,\n",
+       " 'NOR2X0_RVT': 74,\n",
+       " 'NOR2X1_RVT': 15,\n",
+       " 'NAND2X1_RVT': 3,\n",
+       " 'NOR2X4_RVT': 5,\n",
+       " 'NAND2X2_RVT': 9,\n",
+       " 'SDFFARX2_RVT': 5,\n",
+       " 'NAND2X4_RVT': 3,\n",
+       " 'AND2X4_RVT': 1,\n",
+       " 'INVX32_RVT': 4,\n",
+       " 'INVX16_RVT': 1,\n",
+       " 'NBUFFX32_RVT': 1,\n",
+       " 'output': 71,\n",
+       " 'input': 40,\n",
+       " '__latch__': 0,\n",
+       " '__seq__': 417}"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "b15.stats"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The `.dot()` function shows the graph structure for small circuits. This requires the `graphviz` package."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "image/svg+xml": [
+       "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
+       "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
+       " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
+       "<!-- Generated by graphviz version 2.30.1 (20201013.1554)\n",
+       " -->\n",
+       "<!-- Title: %3 Pages: 1 -->\n",
+       "<svg width=\"734pt\" height=\"365pt\"\n",
+       " viewBox=\"0.00 0.00 734.00 365.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
+       "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 361)\">\n",
+       "<title>%3</title>\n",
+       "<polygon fill=\"white\" stroke=\"white\" points=\"-4,5 -4,-361 731,-361 731,5 -4,5\"/>\n",
+       "<!-- 0 -->\n",
+       "<g id=\"node1\" class=\"node\"><title>0</title>\n",
+       "<polygon fill=\"none\" stroke=\"black\" points=\"0,-72.5 0,-125.5 104,-125.5 104,-72.5 0,-72.5\"/>\n",
+       "<text text-anchor=\"middle\" x=\"11.5\" y=\"-108.8\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"0,-99.5 23,-99.5 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"11.5\" y=\"-82.3\" font-family=\"Times,serif\" font-size=\"14.00\">1</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"23,-72.5 23,-125.5 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"52\" y=\"-110.3\" font-family=\"Times,serif\" font-size=\"14.00\">0 [4]</text>\n",
+       "<text text-anchor=\"middle\" x=\"52\" y=\"-95.3\" font-family=\"Times,serif\" font-size=\"14.00\">DFFX1</text>\n",
+       "<text text-anchor=\"middle\" x=\"52\" y=\"-80.3\" font-family=\"Times,serif\" font-size=\"14.00\">carry</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"81,-72.5 81,-125.5 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"92.5\" y=\"-95.3\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "</g>\n",
+       "<!-- 1 -->\n",
+       "<g id=\"node5\" class=\"node\"><title>1</title>\n",
+       "<polygon fill=\"none\" stroke=\"black\" points=\"148.5,-159.5 148.5,-212.5 259.5,-212.5 259.5,-159.5 148.5,-159.5\"/>\n",
+       "<text text-anchor=\"middle\" x=\"160\" y=\"-182.3\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"171.5,-159.5 171.5,-212.5 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"204\" y=\"-197.3\" font-family=\"Times,serif\" font-size=\"14.00\">1</text>\n",
+       "<text text-anchor=\"middle\" x=\"204\" y=\"-182.3\" font-family=\"Times,serif\" font-size=\"14.00\">__fork__</text>\n",
+       "<text text-anchor=\"middle\" x=\"204\" y=\"-167.3\" font-family=\"Times,serif\" font-size=\"14.00\">cin</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"236.5,-159.5 236.5,-212.5 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"248\" y=\"-182.3\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "</g>\n",
+       "<!-- 0&#45;&gt;1 -->\n",
+       "<g id=\"edge1\" class=\"edge\"><title>0:o0&#45;&gt;1:i0</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M104,-99C133.937,-99 126.647,-129.018 148,-150 149.143,-151.123 150.465,-151.784 151.804,-152.244\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"149.805,-155.117 160,-158 153.828,-149.389 149.805,-155.117\"/>\n",
+       "<text text-anchor=\"middle\" x=\"126\" y=\"-120.8\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "</g>\n",
+       "<!-- 5 -->\n",
+       "<g id=\"node2\" class=\"node\"><title>5</title>\n",
+       "<polygon fill=\"none\" stroke=\"black\" points=\"8.5,-0.5 8.5,-53.5 95.5,-53.5 95.5,-0.5 8.5,-0.5\"/>\n",
+       "<text text-anchor=\"middle\" x=\"18.5\" y=\"-23.3\" font-family=\"Times,serif\" font-size=\"14.00\"> </text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"28.5,-0.5 28.5,-53.5 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"50.5\" y=\"-38.3\" font-family=\"Times,serif\" font-size=\"14.00\">5 [0]</text>\n",
+       "<text text-anchor=\"middle\" x=\"50.5\" y=\"-23.3\" font-family=\"Times,serif\" font-size=\"14.00\">input</text>\n",
+       "<text text-anchor=\"middle\" x=\"50.5\" y=\"-8.3\" font-family=\"Times,serif\" font-size=\"14.00\">clk</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"72.5,-0.5 72.5,-53.5 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"84\" y=\"-23.3\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "</g>\n",
+       "<!-- 6 -->\n",
+       "<g id=\"node6\" class=\"node\"><title>6</title>\n",
+       "<polygon fill=\"none\" stroke=\"black\" points=\"148.5,-87.5 148.5,-140.5 259.5,-140.5 259.5,-87.5 148.5,-87.5\"/>\n",
+       "<text text-anchor=\"middle\" x=\"160\" y=\"-110.3\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"171.5,-87.5 171.5,-140.5 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"204\" y=\"-125.3\" font-family=\"Times,serif\" font-size=\"14.00\">6</text>\n",
+       "<text text-anchor=\"middle\" x=\"204\" y=\"-110.3\" font-family=\"Times,serif\" font-size=\"14.00\">__fork__</text>\n",
+       "<text text-anchor=\"middle\" x=\"204\" y=\"-95.3\" font-family=\"Times,serif\" font-size=\"14.00\">clk</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"236.5,-87.5 236.5,-140.5 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"248\" y=\"-110.3\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "</g>\n",
+       "<!-- 5&#45;&gt;6 -->\n",
+       "<g id=\"edge4\" class=\"edge\"><title>5:o0&#45;&gt;6:i0</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M97,-27C131.765,-27 155.26,-43.9502 159.359,-75.6805\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"155.887,-76.2361 160,-86 162.874,-75.8023 155.887,-76.2361\"/>\n",
+       "<text text-anchor=\"middle\" x=\"126\" y=\"-37.8\" font-family=\"Times,serif\" font-size=\"14.00\">3</text>\n",
+       "</g>\n",
+       "<!-- 7 -->\n",
+       "<g id=\"node3\" class=\"node\"><title>7</title>\n",
+       "<polygon fill=\"none\" stroke=\"black\" points=\"8.5,-303.5 8.5,-356.5 95.5,-356.5 95.5,-303.5 8.5,-303.5\"/>\n",
+       "<text text-anchor=\"middle\" x=\"18.5\" y=\"-326.3\" font-family=\"Times,serif\" font-size=\"14.00\"> </text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"28.5,-303.5 28.5,-356.5 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"50.5\" y=\"-341.3\" font-family=\"Times,serif\" font-size=\"14.00\">7 [1]</text>\n",
+       "<text text-anchor=\"middle\" x=\"50.5\" y=\"-326.3\" font-family=\"Times,serif\" font-size=\"14.00\">input</text>\n",
+       "<text text-anchor=\"middle\" x=\"50.5\" y=\"-311.3\" font-family=\"Times,serif\" font-size=\"14.00\">a</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"72.5,-303.5 72.5,-356.5 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"84\" y=\"-326.3\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "</g>\n",
+       "<!-- 8 -->\n",
+       "<g id=\"node7\" class=\"node\"><title>8</title>\n",
+       "<polygon fill=\"none\" stroke=\"black\" points=\"148.5,-303.5 148.5,-356.5 259.5,-356.5 259.5,-303.5 148.5,-303.5\"/>\n",
+       "<text text-anchor=\"middle\" x=\"160\" y=\"-326.3\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"171.5,-303.5 171.5,-356.5 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"204\" y=\"-341.3\" font-family=\"Times,serif\" font-size=\"14.00\">8</text>\n",
+       "<text text-anchor=\"middle\" x=\"204\" y=\"-326.3\" font-family=\"Times,serif\" font-size=\"14.00\">__fork__</text>\n",
+       "<text text-anchor=\"middle\" x=\"204\" y=\"-311.3\" font-family=\"Times,serif\" font-size=\"14.00\">a</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"236.5,-303.5 236.5,-356.5 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"248\" y=\"-326.3\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "</g>\n",
+       "<!-- 7&#45;&gt;8 -->\n",
+       "<g id=\"edge5\" class=\"edge\"><title>7:o0&#45;&gt;8:i0</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M97,-330C115.771,-330 122.883,-330 137.646,-330\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"138,-333.5 148,-330 138,-326.5 138,-333.5\"/>\n",
+       "<text text-anchor=\"middle\" x=\"126\" y=\"-333.8\" font-family=\"Times,serif\" font-size=\"14.00\">4</text>\n",
+       "</g>\n",
+       "<!-- 9 -->\n",
+       "<g id=\"node4\" class=\"node\"><title>9</title>\n",
+       "<polygon fill=\"none\" stroke=\"black\" points=\"8.5,-231.5 8.5,-284.5 95.5,-284.5 95.5,-231.5 8.5,-231.5\"/>\n",
+       "<text text-anchor=\"middle\" x=\"18.5\" y=\"-254.3\" font-family=\"Times,serif\" font-size=\"14.00\"> </text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"28.5,-231.5 28.5,-284.5 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"50.5\" y=\"-269.3\" font-family=\"Times,serif\" font-size=\"14.00\">9 [2]</text>\n",
+       "<text text-anchor=\"middle\" x=\"50.5\" y=\"-254.3\" font-family=\"Times,serif\" font-size=\"14.00\">input</text>\n",
+       "<text text-anchor=\"middle\" x=\"50.5\" y=\"-239.3\" font-family=\"Times,serif\" font-size=\"14.00\">b</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"72.5,-231.5 72.5,-284.5 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"84\" y=\"-254.3\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "</g>\n",
+       "<!-- 10 -->\n",
+       "<g id=\"node8\" class=\"node\"><title>10</title>\n",
+       "<polygon fill=\"none\" stroke=\"black\" points=\"148.5,-231.5 148.5,-284.5 259.5,-284.5 259.5,-231.5 148.5,-231.5\"/>\n",
+       "<text text-anchor=\"middle\" x=\"160\" y=\"-254.3\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"171.5,-231.5 171.5,-284.5 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"204\" y=\"-269.3\" font-family=\"Times,serif\" font-size=\"14.00\">10</text>\n",
+       "<text text-anchor=\"middle\" x=\"204\" y=\"-254.3\" font-family=\"Times,serif\" font-size=\"14.00\">__fork__</text>\n",
+       "<text text-anchor=\"middle\" x=\"204\" y=\"-239.3\" font-family=\"Times,serif\" font-size=\"14.00\">b</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"236.5,-231.5 236.5,-284.5 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"248\" y=\"-254.3\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "</g>\n",
+       "<!-- 9&#45;&gt;10 -->\n",
+       "<g id=\"edge6\" class=\"edge\"><title>9:o0&#45;&gt;10:i0</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M97,-258C115.771,-258 122.883,-258 137.646,-258\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"138,-261.5 148,-258 138,-254.5 138,-261.5\"/>\n",
+       "<text text-anchor=\"middle\" x=\"126\" y=\"-261.8\" font-family=\"Times,serif\" font-size=\"14.00\">5</text>\n",
+       "</g>\n",
+       "<!-- 2 -->\n",
+       "<g id=\"node9\" class=\"node\"><title>2</title>\n",
+       "<polygon fill=\"none\" stroke=\"black\" points=\"310,-189.5 310,-258.5 426,-258.5 426,-189.5 310,-189.5\"/>\n",
+       "<text text-anchor=\"middle\" x=\"321.5\" y=\"-243.3\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"310,-235.5 333,-235.5 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"321.5\" y=\"-220.3\" font-family=\"Times,serif\" font-size=\"14.00\">1</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"310,-212.5 333,-212.5 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"321.5\" y=\"-197.3\" font-family=\"Times,serif\" font-size=\"14.00\">2</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"333,-189.5 333,-258.5 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"368\" y=\"-235.3\" font-family=\"Times,serif\" font-size=\"14.00\">2</text>\n",
+       "<text text-anchor=\"middle\" x=\"368\" y=\"-220.3\" font-family=\"Times,serif\" font-size=\"14.00\">ADDFX1</text>\n",
+       "<text text-anchor=\"middle\" x=\"368\" y=\"-205.3\" font-family=\"Times,serif\" font-size=\"14.00\">adder</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"403,-189.5 403,-258.5 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"414.5\" y=\"-237.8\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"403,-224.5 426,-224.5 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"414.5\" y=\"-203.3\" font-family=\"Times,serif\" font-size=\"14.00\">1</text>\n",
+       "</g>\n",
+       "<!-- 1&#45;&gt;2 -->\n",
+       "<g id=\"edge11\" class=\"edge\"><title>1:o0&#45;&gt;2:i2</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M260,-186C279.394,-186 285.091,-196.482 299.991,-199.921\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"299.683,-203.408 310,-201 300.433,-196.449 299.683,-203.408\"/>\n",
+       "<text text-anchor=\"middle\" x=\"285\" y=\"-199.8\" font-family=\"Times,serif\" font-size=\"14.00\">10</text>\n",
+       "</g>\n",
+       "<!-- 6&#45;&gt;0 -->\n",
+       "<g id=\"edge8\" class=\"edge\"><title>6:o0&#45;&gt;0:i0</title>\n",
+       "<path fill=\"none\" stroke=\"black\" stroke-dasharray=\"1,5\" d=\"M248,-142C248,-153.647 63.2922,-143.321 20.0013,-131.664\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"21.4891,-128.493 11,-127 18.2685,-134.708 21.4891,-128.493\"/>\n",
+       "<text text-anchor=\"middle\" x=\"126\" y=\"-147.8\" font-family=\"Times,serif\" font-size=\"14.00\">7</text>\n",
+       "</g>\n",
+       "<!-- 8&#45;&gt;2 -->\n",
+       "<g id=\"edge9\" class=\"edge\"><title>8:o0&#45;&gt;2:i0</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M260,-330C297.72,-330 317.449,-305.993 320.562,-270.351\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"324.074,-270.139 321,-260 317.08,-269.843 324.074,-270.139\"/>\n",
+       "<text text-anchor=\"middle\" x=\"285\" y=\"-330.8\" font-family=\"Times,serif\" font-size=\"14.00\">8</text>\n",
+       "</g>\n",
+       "<!-- 10&#45;&gt;2 -->\n",
+       "<g id=\"edge10\" class=\"edge\"><title>10:o0&#45;&gt;2:i1</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M260,-258C282.884,-258 283.564,-233.345 299.857,-226.025\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"300.879,-229.39 310,-224 299.508,-222.526 300.879,-229.39\"/>\n",
+       "<text text-anchor=\"middle\" x=\"285\" y=\"-252.8\" font-family=\"Times,serif\" font-size=\"14.00\">9</text>\n",
+       "</g>\n",
+       "<!-- 3 -->\n",
+       "<g id=\"node10\" class=\"node\"><title>3</title>\n",
+       "<polygon fill=\"none\" stroke=\"black\" points=\"470.5,-215.5 470.5,-268.5 581.5,-268.5 581.5,-215.5 470.5,-215.5\"/>\n",
+       "<text text-anchor=\"middle\" x=\"482\" y=\"-238.3\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"493.5,-215.5 493.5,-268.5 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"526\" y=\"-253.3\" font-family=\"Times,serif\" font-size=\"14.00\">3</text>\n",
+       "<text text-anchor=\"middle\" x=\"526\" y=\"-238.3\" font-family=\"Times,serif\" font-size=\"14.00\">__fork__</text>\n",
+       "<text text-anchor=\"middle\" x=\"526\" y=\"-223.3\" font-family=\"Times,serif\" font-size=\"14.00\">cout</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"558.5,-215.5 558.5,-268.5 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"570\" y=\"-238.3\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "</g>\n",
+       "<!-- 2&#45;&gt;3 -->\n",
+       "<g id=\"edge2\" class=\"edge\"><title>2:o0&#45;&gt;3:i0</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M426,-242C441.583,-242 447.853,-242 459.653,-242\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"460,-245.5 470,-242 460,-238.5 460,-245.5\"/>\n",
+       "<text text-anchor=\"middle\" x=\"448\" y=\"-245.8\" font-family=\"Times,serif\" font-size=\"14.00\">1</text>\n",
+       "</g>\n",
+       "<!-- 4 -->\n",
+       "<g id=\"node11\" class=\"node\"><title>4</title>\n",
+       "<polygon fill=\"none\" stroke=\"black\" points=\"470.5,-143.5 470.5,-196.5 581.5,-196.5 581.5,-143.5 470.5,-143.5\"/>\n",
+       "<text text-anchor=\"middle\" x=\"482\" y=\"-166.3\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"493.5,-143.5 493.5,-196.5 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"526\" y=\"-181.3\" font-family=\"Times,serif\" font-size=\"14.00\">4</text>\n",
+       "<text text-anchor=\"middle\" x=\"526\" y=\"-166.3\" font-family=\"Times,serif\" font-size=\"14.00\">__fork__</text>\n",
+       "<text text-anchor=\"middle\" x=\"526\" y=\"-151.3\" font-family=\"Times,serif\" font-size=\"14.00\">s</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"558.5,-143.5 558.5,-196.5 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"570\" y=\"-166.3\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "</g>\n",
+       "<!-- 2&#45;&gt;4 -->\n",
+       "<g id=\"edge3\" class=\"edge\"><title>2:o1&#45;&gt;4:i0</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M426,-207C447.558,-207 445.871,-180.659 460.165,-172.428\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"461.13,-175.795 470,-170 459.453,-168.999 461.13,-175.795\"/>\n",
+       "<text text-anchor=\"middle\" x=\"448\" y=\"-198.8\" font-family=\"Times,serif\" font-size=\"14.00\">2</text>\n",
+       "</g>\n",
+       "<!-- 3&#45;&gt;0 -->\n",
+       "<g id=\"edge7\" class=\"edge\"><title>3:o0&#45;&gt;0:i1</title>\n",
+       "<path fill=\"none\" stroke=\"black\" stroke-dasharray=\"1,5\" d=\"M570,-214C570,-202.853 479.517,-211.803 470,-206 457.048,-198.102 463.144,-187.292 452,-177 381.469,-111.86 352.85,-102.427 260,-78 192.899,-60.3466 173.364,-73.7065 104,-72 95.808,-71.7985 42.1297,-64.7898 20.2546,-67.0491\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"18.8227,-63.8547 11,-71 21.5712,-70.2926 18.8227,-63.8547\"/>\n",
+       "<text text-anchor=\"middle\" x=\"285\" y=\"-89.8\" font-family=\"Times,serif\" font-size=\"14.00\">6</text>\n",
+       "</g>\n",
+       "<!-- 11 -->\n",
+       "<g id=\"node12\" class=\"node\"><title>11</title>\n",
+       "<polygon fill=\"none\" stroke=\"black\" points=\"632,-143.5 632,-196.5 726,-196.5 726,-143.5 632,-143.5\"/>\n",
+       "<text text-anchor=\"middle\" x=\"643.5\" y=\"-166.3\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"655,-143.5 655,-196.5 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"680.5\" y=\"-181.3\" font-family=\"Times,serif\" font-size=\"14.00\">11 [3]</text>\n",
+       "<text text-anchor=\"middle\" x=\"680.5\" y=\"-166.3\" font-family=\"Times,serif\" font-size=\"14.00\">output</text>\n",
+       "<text text-anchor=\"middle\" x=\"680.5\" y=\"-151.3\" font-family=\"Times,serif\" font-size=\"14.00\">s</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"706,-143.5 706,-196.5 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"716\" y=\"-166.3\" font-family=\"Times,serif\" font-size=\"14.00\"> </text>\n",
+       "</g>\n",
+       "<!-- 4&#45;&gt;11 -->\n",
+       "<g id=\"edge12\" class=\"edge\"><title>4:o0&#45;&gt;11:i0</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M582,-170C600.403,-170 607.376,-170 621.849,-170\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"622,-173.5 632,-170 622,-166.5 622,-173.5\"/>\n",
+       "<text text-anchor=\"middle\" x=\"607\" y=\"-173.8\" font-family=\"Times,serif\" font-size=\"14.00\">11</text>\n",
+       "</g>\n",
+       "</g>\n",
+       "</svg>\n"
+      ],
+      "text/plain": [
+       "<graphviz.graphs.Digraph at 0x7f8e493abc40>"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adder.dot()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The rectangles are called _nodes_.\n",
+    "Every node has an _index_ (top-middle number), a _kind_ or type (string in the middle), and a _name_ (bottom-most string).\n",
+    "Nodes have numeric input pins on the left and numeric output pins on the right.\n",
+    "The pins are connected by directional _lines_.\n",
+    "Lines are 1-to-1 connections and also have an _index_.\n",
+    "\n",
+    "Some nodes have an additional number in brackets. These are primary inputs, primary outputs (_io_nodes_) or sequential nodes (flip-flops, _s_nodes_).\n",
+    "The number is the position of their corresponding data in test vectors.\n",
+    "\n",
+    "The graph above is topologically sorted with primary inputs and flip-flops on the left.\n",
+    "Lines that follow the sorting are shown as solid, lines back to the flip-flops are shown as dotted.\n",
+    "\n",
+    "Let's explore the components in more detail:\n",
+    "\n",
+    "### Cells and Forks\n",
+    "\n",
+    "Circuits contain `cells` and `forks` dictionaries that map names to `Node`-objects."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'carry': 0:DFFX1\"carry\" <7 <6 >0,\n",
+       " 'adder': 2:ADDFX1\"adder\" <8 <9 <10 >1 >2,\n",
+       " 'clk': 5:input\"clk\" >3,\n",
+       " 'a': 7:input\"a\" >4,\n",
+       " 'b': 9:input\"b\" >5,\n",
+       " 's': 11:output\"s\" <11}"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adder.cells"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'cin': 1:__fork__\"cin\" <0 >10,\n",
+       " 'cout': 3:__fork__\"cout\" <1 >6,\n",
+       " 's': 4:__fork__\"s\" <2 >11,\n",
+       " 'clk': 6:__fork__\"clk\" <3 >7,\n",
+       " 'a': 8:__fork__\"a\" <4 >8,\n",
+       " 'b': 10:__fork__\"b\" <5 >9}"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adder.forks"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Access any cell or fork by name using a simple dictionary lookup:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "2:ADDFX1\"adder\" <8 <9 <10 >1 >2"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adder.cells['adder']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "3:__fork__\"cout\" <1 >6"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adder.forks['cout']"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Cells and forks are instances of class `Node`, which represent *things* that are connected to one or more other *things* in the circuit.\n",
+    "\n",
+    "* A **cell** represents a gate or a standard cell.\n",
+    "* A **fork** represents a named signal or a fan-out point (connecting the output of one cell to multiple other cells or forks).\n",
+    "\n",
+    "`Node`-objects have an `index`, a `kind`, and a `name`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(11, 'output', 's')"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adder.cells['s'].index, adder.cells['s'].kind, adder.cells['s'].name"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "*Forks* are `Node`-objects of the special kind `__fork__`.\n",
+    "\n",
+    "*Cells* are `Node`-objects of any other kind. A *kind* is just a string and can be anything.\n",
+    "\n",
+    "The namespaces of *forks* and *cells* are separate:\n",
+    "* A *cell* and a *fork* **can** have the same name.\n",
+    "* Two *cells* or two *forks* **cannot** have the same name."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(4, '__fork__', 's')"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adder.forks['s'].index, adder.forks['s'].kind, adder.forks['s'].name"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The `index` of a *node* in a circuit is a unique and consecutive integer.\n",
+    "\n",
+    "Although *Forks* and *cells* can have the same name, they all have separate indices.\n",
+    "\n",
+    "Nodes can be accessed by their index using the `nodes` list:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[0:DFFX1\"carry\" <7 <6 >0,\n",
+       " 1:__fork__\"cin\" <0 >10,\n",
+       " 2:ADDFX1\"adder\" <8 <9 <10 >1 >2,\n",
+       " 3:__fork__\"cout\" <1 >6,\n",
+       " 4:__fork__\"s\" <2 >11,\n",
+       " 5:input\"clk\" >3,\n",
+       " 6:__fork__\"clk\" <3 >7,\n",
+       " 7:input\"a\" >4,\n",
+       " 8:__fork__\"a\" <4 >8,\n",
+       " 9:input\"b\" >5,\n",
+       " 10:__fork__\"b\" <5 >9,\n",
+       " 11:output\"s\" <11]"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adder.nodes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(4:__fork__\"s\" <2 >11, 11:output\"s\" <11)"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adder.nodes[4], adder.nodes[11]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "A common use for the index is to store additional data for nodes. Since the index is positive, unique, and consecutive, it can be easily used with external arrays or lists.\n",
+    "\n",
+    "This is how you store an additional \"weight\" for each node in the circuit:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "weights = [0] * len(adder.nodes)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Use the node instance to index into the external list. This also works with numpy arrays, of course."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "weights[adder.cells['s']] = 5"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5]"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "weights"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Lines\n",
+    "\n",
+    "A `Line` is a directional 1:1 connection between two Nodes.\n",
+    "\n",
+    "A line has a circuit-unique and consecutive `index` just like nodes.\n",
+    "\n",
+    "Line and node indices are different!\n",
+    "\n",
+    "There is a `lines` list. If a line is printed, it just outputs its index:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adder.lines"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "A line one `driver`-node and one `reader`-node:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(2:ADDFX1\"adder\" <8 <9 <10 >1 >2, 4:__fork__\"s\" <2 >11)"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adder.lines[2].driver, adder.lines[2].reader"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Nodes show their connections to the lines with direction (\"<\" for input, \">\" for output) and the line index.\n",
+    "\n",
+    "In the example above, line 2 connects the output of cell \"axb\" to the input of fork \"axb\".\n",
+    "\n",
+    "The input connections and output connections of a node are ordered lists of lines called `ins` and `outs`:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "([8, 9, 10], [1, 2])"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adder.cells['adder'].ins, adder.cells['adder'].outs"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "A line also stores its positions in the connection lists in `driver_pin` and `reader_pin`:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(0, 1)"
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adder.lines[9].driver_pin, adder.lines[9].reader_pin"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### IO_Nodes\n",
+    "\n",
+    "Any node in the circuit can be designated as a primary input or primary output by adding it to the `io_nodes` list:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[5:input\"clk\" >3, 7:input\"a\" >4, 9:input\"b\" >5, 11:output\"s\" <11]"
+      ]
+     },
+     "execution_count": 23,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adder.io_nodes"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "It is common that io_nodes either have only output connections (in a role as primary-input) or only input connections (in a role as primary-output).\n",
+    "\n",
+    "Inputs and outputs appear in the order they were defined in the loaded file. Inputs and outputs are often interspersed.\n",
+    "\n",
+    "A related list is `s_nodes`. It contains the io_nodes at the beginning and adds all sequential elements (flip-flops, latches)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[5:input\"clk\" >3,\n",
+       " 7:input\"a\" >4,\n",
+       " 9:input\"b\" >5,\n",
+       " 11:output\"s\" <11,\n",
+       " 0:DFFX1\"carry\" <7 <6 >0]"
+      ]
+     },
+     "execution_count": 24,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adder.s_nodes"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Basic Circuit Navigation\n",
+    "\n",
+    "A circuit can be traversed easily using the properties of `Circuit`, `Node`, and `Line`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "8:__fork__\"a\" <4 >8"
+      ]
+     },
+     "execution_count": 25,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adder.io_nodes[1].outs[0].reader"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "3:__fork__\"cout\" <1 >6\n",
+      "4:__fork__\"s\" <2 >11\n"
+     ]
+    }
+   ],
+   "source": [
+    "for line in adder.cells['adder'].outs:\n",
+    "    print(line.reader)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'adder'"
+      ]
+     },
+     "execution_count": 27,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adder.forks['cout'].ins[0].driver.name"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's continue with `b15` loaded before. It has 111 io_nodes:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "({name: \"b15\", cells: 10789, forks: 10749, lines: 32032, io_nodes: 111},\n",
+       " [21386:output\"BE_n[3]\" <31961,\n",
+       "  21387:output\"BE_n[2]\" <31962,\n",
+       "  21388:output\"BE_n[1]\" <31963,\n",
+       "  21389:output\"BE_n[0]\" <31964,\n",
+       "  21390:output\"Address[29]\" <31965,\n",
+       "  21391:output\"Address[28]\" <31966,\n",
+       "  21392:output\"Address[27]\" <31967,\n",
+       "  21393:output\"Address[26]\" <31968,\n",
+       "  21394:output\"Address[25]\" <31969,\n",
+       "  21395:output\"Address[24]\" <31970,\n",
+       "  21396:output\"Address[23]\" <31971,\n",
+       "  21397:output\"Address[22]\" <31972,\n",
+       "  21398:output\"Address[21]\" <31973,\n",
+       "  21399:output\"Address[20]\" <31974,\n",
+       "  21400:output\"Address[19]\" <31975,\n",
+       "  21401:output\"Address[18]\" <31976,\n",
+       "  21402:output\"Address[17]\" <31977,\n",
+       "  21403:output\"Address[16]\" <31978,\n",
+       "  21404:output\"Address[15]\" <31979,\n",
+       "  21405:output\"Address[14]\" <31980])"
+      ]
+     },
+     "execution_count": 28,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "b15, b15.io_nodes[:20]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "and even more sequential nodes:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "528"
+      ]
+     },
+     "execution_count": 29,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(b15.s_nodes)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The `.io_locs(prefix)` and `.s_locs(prefix)` methods return the locations of signals, busses and registers in `io_nodes` and `s_nodes`. :"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "107"
+      ]
+     },
+     "execution_count": 30,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "b15.io_locs('RESET')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[33,\n",
+       " 32,\n",
+       " 31,\n",
+       " 30,\n",
+       " 29,\n",
+       " 28,\n",
+       " 27,\n",
+       " 26,\n",
+       " 25,\n",
+       " 24,\n",
+       " 23,\n",
+       " 22,\n",
+       " 21,\n",
+       " 20,\n",
+       " 19,\n",
+       " 18,\n",
+       " 17,\n",
+       " 16,\n",
+       " 15,\n",
+       " 14,\n",
+       " 13,\n",
+       " 12,\n",
+       " 11,\n",
+       " 10,\n",
+       " 9,\n",
+       " 8,\n",
+       " 7,\n",
+       " 6,\n",
+       " 5,\n",
+       " 4]"
+      ]
+     },
+     "execution_count": 31,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "b15.io_locs('Address')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Example of a two-dimensional register file (16 8-bit registers):"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[[349, 348, 350, 347, 351, 346, 352, 345],\n",
+       " [357, 356, 358, 355, 359, 354, 360, 353],\n",
+       " [365, 364, 366, 363, 367, 362, 368, 361],\n",
+       " [373, 372, 374, 371, 375, 370, 376, 369],\n",
+       " [381, 380, 382, 379, 383, 378, 384, 377],\n",
+       " [389, 388, 390, 387, 391, 386, 392, 385],\n",
+       " [397, 396, 398, 395, 399, 394, 400, 393],\n",
+       " [405, 404, 406, 403, 407, 402, 408, 401],\n",
+       " [413, 412, 414, 411, 415, 410, 416, 409],\n",
+       " [421, 420, 422, 419, 423, 418, 424, 417],\n",
+       " [429, 428, 430, 427, 431, 426, 432, 425],\n",
+       " [437, 436, 438, 435, 439, 434, 440, 433],\n",
+       " [445, 444, 446, 443, 447, 442, 448, 441],\n",
+       " [453, 452, 454, 451, 455, 450, 456, 449],\n",
+       " [461, 460, 462, 459, 463, 458, 464, 457],\n",
+       " [469, 468, 470, 467, 471, 466, 472, 465]]"
+      ]
+     },
+     "execution_count": 32,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "b15.s_locs('InstQueue_reg')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "---------------\n",
+      "1385:SDFFARX1_RVT\"InstQueue_reg_0__0_\" <12704 <12707 <12708 <12706 <12705 >702\n",
+      "1383:SDFFARX1_RVT\"InstQueue_reg_0__1_\" <12699 <12702 <12703 <12701 <12700 >701\n",
+      "1387:SDFFARX1_RVT\"InstQueue_reg_0__2_\" <12709 <12712 <12713 <12711 <12710 >703\n",
+      "1381:SDFFARX1_RVT\"InstQueue_reg_0__3_\" <12694 <12697 <12698 <12696 <12695 >700\n",
+      "1389:SDFFARX1_RVT\"InstQueue_reg_0__4_\" <12714 <12717 <12718 <12716 <12715 >704\n",
+      "1379:SDFFARX1_RVT\"InstQueue_reg_0__5_\" <12689 <12692 <12693 <12691 <12690 >699\n",
+      "1391:SDFFARX1_RVT\"InstQueue_reg_0__6_\" <12719 <12722 <12723 <12721 <12720 >705\n",
+      "1377:SDFFARX1_RVT\"InstQueue_reg_0__7_\" <12684 <12687 <12688 <12686 <12685 >698\n",
+      "---------------\n",
+      "1401:SDFFARX1_RVT\"InstQueue_reg_1__0_\" <12744 <12747 <12748 <12746 <12745 >710\n",
+      "1399:SDFFARX1_RVT\"InstQueue_reg_1__1_\" <12739 <12742 <12743 <12741 <12740 >709\n",
+      "1403:SDFFARX1_RVT\"InstQueue_reg_1__2_\" <12749 <12752 <12753 <12751 <12750 >711\n",
+      "1397:SDFFARX1_RVT\"InstQueue_reg_1__3_\" <12734 <12737 <12738 <12736 <12735 >708\n",
+      "1405:SDFFARX1_RVT\"InstQueue_reg_1__4_\" <12754 <12757 <12758 <12756 <12755 >712\n",
+      "1395:SDFFARX1_RVT\"InstQueue_reg_1__5_\" <12729 <12732 <12733 <12731 <12730 >707\n",
+      "1407:SDFFARX1_RVT\"InstQueue_reg_1__6_\" <12759 <12762 <12763 <12761 <12760 >713\n",
+      "1393:SDFFARX1_RVT\"InstQueue_reg_1__7_\" <12724 <12727 <12728 <12726 <12725 >706\n",
+      "---------------\n",
+      "1417:SDFFARX1_RVT\"InstQueue_reg_2__0_\" <12784 <12787 <12788 <12786 <12785 >718\n",
+      "1415:SDFFARX1_RVT\"InstQueue_reg_2__1_\" <12779 <12782 <12783 <12781 <12780 >717\n",
+      "1419:SDFFARX1_RVT\"InstQueue_reg_2__2_\" <12789 <12792 <12793 <12791 <12790 >719\n",
+      "1413:SDFFARX1_RVT\"InstQueue_reg_2__3_\" <12774 <12777 <12778 <12776 <12775 >716\n",
+      "1421:SDFFARX1_RVT\"InstQueue_reg_2__4_\" <12794 <12797 <12798 <12796 <12795 >720\n",
+      "1411:SDFFARX1_RVT\"InstQueue_reg_2__5_\" <12769 <12772 <12773 <12771 <12770 >715\n",
+      "1423:SDFFARX1_RVT\"InstQueue_reg_2__6_\" <12799 <12802 <12803 <12801 <12800 >721\n",
+      "1409:SDFFARX1_RVT\"InstQueue_reg_2__7_\" <12764 <12767 <12768 <12766 <12765 >714\n",
+      "---------------\n",
+      "1433:SDFFARX1_RVT\"InstQueue_reg_3__0_\" <12824 <12827 <12828 <12826 <12825 >726\n",
+      "1431:SDFFARX1_RVT\"InstQueue_reg_3__1_\" <12819 <12822 <12823 <12821 <12820 >725\n",
+      "1435:SDFFARX1_RVT\"InstQueue_reg_3__2_\" <12829 <12832 <12833 <12831 <12830 >727\n",
+      "1429:SDFFARX1_RVT\"InstQueue_reg_3__3_\" <12814 <12817 <12818 <12816 <12815 >724\n",
+      "1437:SDFFARX1_RVT\"InstQueue_reg_3__4_\" <12834 <12837 <12838 <12836 <12835 >728\n",
+      "1427:SDFFARX1_RVT\"InstQueue_reg_3__5_\" <12809 <12812 <12813 <12811 <12810 >723\n",
+      "1439:SDFFARX1_RVT\"InstQueue_reg_3__6_\" <12839 <12842 <12843 <12841 <12840 >729\n",
+      "1425:SDFFARX1_RVT\"InstQueue_reg_3__7_\" <12804 <12807 <12808 <12806 <12805 >722\n",
+      "---------------\n",
+      "1449:SDFFARX1_RVT\"InstQueue_reg_4__0_\" <12864 <12867 <12868 <12866 <12865 >734\n",
+      "1447:SDFFARX1_RVT\"InstQueue_reg_4__1_\" <12859 <12862 <12863 <12861 <12860 >733\n",
+      "1451:SDFFARX1_RVT\"InstQueue_reg_4__2_\" <12869 <12872 <12873 <12871 <12870 >735\n",
+      "1445:SDFFARX1_RVT\"InstQueue_reg_4__3_\" <12854 <12857 <12858 <12856 <12855 >732\n",
+      "1453:SDFFARX1_RVT\"InstQueue_reg_4__4_\" <12874 <12877 <12878 <12876 <12875 >736\n",
+      "1443:SDFFARX1_RVT\"InstQueue_reg_4__5_\" <12849 <12852 <12853 <12851 <12850 >731\n",
+      "1455:SDFFARX1_RVT\"InstQueue_reg_4__6_\" <12879 <12882 <12883 <12881 <12880 >737\n",
+      "1441:SDFFARX1_RVT\"InstQueue_reg_4__7_\" <12844 <12847 <12848 <12846 <12845 >730\n",
+      "---------------\n",
+      "1465:SDFFARX1_RVT\"InstQueue_reg_5__0_\" <12904 <12907 <12908 <12906 <12905 >742\n",
+      "1463:SDFFARX1_RVT\"InstQueue_reg_5__1_\" <12899 <12902 <12903 <12901 <12900 >741\n",
+      "1467:SDFFARX1_RVT\"InstQueue_reg_5__2_\" <12909 <12912 <12913 <12911 <12910 >743\n",
+      "1461:SDFFARX1_RVT\"InstQueue_reg_5__3_\" <12894 <12897 <12898 <12896 <12895 >740\n",
+      "1469:SDFFARX1_RVT\"InstQueue_reg_5__4_\" <12914 <12917 <12918 <12916 <12915 >744\n",
+      "1459:SDFFARX1_RVT\"InstQueue_reg_5__5_\" <12889 <12892 <12893 <12891 <12890 >739\n",
+      "1471:SDFFARX1_RVT\"InstQueue_reg_5__6_\" <12919 <12922 <12923 <12921 <12920 >745\n",
+      "1457:SDFFARX1_RVT\"InstQueue_reg_5__7_\" <12884 <12887 <12888 <12886 <12885 >738\n",
+      "---------------\n",
+      "1481:SDFFARX1_RVT\"InstQueue_reg_6__0_\" <12944 <12947 <12948 <12946 <12945 >750\n",
+      "1479:SDFFARX1_RVT\"InstQueue_reg_6__1_\" <12939 <12942 <12943 <12941 <12940 >749\n",
+      "1483:SDFFARX1_RVT\"InstQueue_reg_6__2_\" <12949 <12952 <12953 <12951 <12950 >751\n",
+      "1477:SDFFARX1_RVT\"InstQueue_reg_6__3_\" <12934 <12937 <12938 <12936 <12935 >748\n",
+      "1485:SDFFARX1_RVT\"InstQueue_reg_6__4_\" <12954 <12957 <12958 <12956 <12955 >752\n",
+      "1475:SDFFARX1_RVT\"InstQueue_reg_6__5_\" <12929 <12932 <12933 <12931 <12930 >747\n",
+      "1487:SDFFARX1_RVT\"InstQueue_reg_6__6_\" <12959 <12962 <12963 <12961 <12960 >753\n",
+      "1473:SDFFARX1_RVT\"InstQueue_reg_6__7_\" <12924 <12927 <12928 <12926 <12925 >746\n",
+      "---------------\n",
+      "1497:SDFFARX1_RVT\"InstQueue_reg_7__0_\" <12984 <12987 <12988 <12986 <12985 >758\n",
+      "1495:SDFFARX1_RVT\"InstQueue_reg_7__1_\" <12979 <12982 <12983 <12981 <12980 >757\n",
+      "1499:SDFFARX1_RVT\"InstQueue_reg_7__2_\" <12989 <12992 <12993 <12991 <12990 >759\n",
+      "1493:SDFFARX1_RVT\"InstQueue_reg_7__3_\" <12974 <12977 <12978 <12976 <12975 >756\n",
+      "1501:SDFFARX1_RVT\"InstQueue_reg_7__4_\" <12994 <12997 <12998 <12996 <12995 >760\n",
+      "1491:SDFFARX1_RVT\"InstQueue_reg_7__5_\" <12969 <12972 <12973 <12971 <12970 >755\n",
+      "1503:SDFFARX1_RVT\"InstQueue_reg_7__6_\" <12999 <13002 <13003 <13001 <13000 >761\n",
+      "1489:SDFFARX1_RVT\"InstQueue_reg_7__7_\" <12964 <12967 <12968 <12966 <12965 >754\n",
+      "---------------\n",
+      "1513:SDFFARX1_RVT\"InstQueue_reg_8__0_\" <13024 <13027 <13028 <13026 <13025 >766\n",
+      "1511:SDFFARX1_RVT\"InstQueue_reg_8__1_\" <13019 <13022 <13023 <13021 <13020 >765\n",
+      "1515:SDFFARX1_RVT\"InstQueue_reg_8__2_\" <13029 <13032 <13033 <13031 <13030 >767 >768\n",
+      "1509:SDFFARX1_RVT\"InstQueue_reg_8__3_\" <13014 <13017 <13018 <13016 <13015 >764\n",
+      "1518:SDFFARX1_RVT\"InstQueue_reg_8__4_\" <13034 <13037 <13038 <13036 <13035 >769\n",
+      "1507:SDFFARX1_RVT\"InstQueue_reg_8__5_\" <13009 <13012 <13013 <13011 <13010 >763\n",
+      "1520:SDFFARX1_RVT\"InstQueue_reg_8__6_\" <13039 <13042 <13043 <13041 <13040 >770\n",
+      "1505:SDFFARX1_RVT\"InstQueue_reg_8__7_\" <13004 <13007 <13008 <13006 <13005 >762\n",
+      "---------------\n",
+      "1530:SDFFARX1_RVT\"InstQueue_reg_9__0_\" <13064 <13067 <13068 <13066 <13065 >775\n",
+      "1528:SDFFARX1_RVT\"InstQueue_reg_9__1_\" <13059 <13062 <13063 <13061 <13060 >774\n",
+      "1532:SDFFARX1_RVT\"InstQueue_reg_9__2_\" <13069 <13072 <13073 <13071 <13070 >776\n",
+      "1526:SDFFARX1_RVT\"InstQueue_reg_9__3_\" <13054 <13057 <13058 <13056 <13055 >773\n",
+      "1534:SDFFARX1_RVT\"InstQueue_reg_9__4_\" <13074 <13077 <13078 <13076 <13075 >777\n",
+      "1524:SDFFARX1_RVT\"InstQueue_reg_9__5_\" <13049 <13052 <13053 <13051 <13050 >772\n",
+      "1536:SDFFARX1_RVT\"InstQueue_reg_9__6_\" <13079 <13082 <13083 <13081 <13080 >778\n",
+      "1522:SDFFARX1_RVT\"InstQueue_reg_9__7_\" <13044 <13047 <13048 <13046 <13045 >771\n",
+      "---------------\n",
+      "1546:SDFFARX1_RVT\"InstQueue_reg_10__0_\" <13104 <13107 <13108 <13106 <13105 >783\n",
+      "1544:SDFFARX1_RVT\"InstQueue_reg_10__1_\" <13099 <13102 <13103 <13101 <13100 >782\n",
+      "1548:SDFFARX1_RVT\"InstQueue_reg_10__2_\" <13109 <13112 <13113 <13111 <13110 >784\n",
+      "1542:SDFFARX1_RVT\"InstQueue_reg_10__3_\" <13094 <13097 <13098 <13096 <13095 >781\n",
+      "1550:SDFFARX1_RVT\"InstQueue_reg_10__4_\" <13114 <13117 <13118 <13116 <13115 >785 >786\n",
+      "1540:SDFFARX1_RVT\"InstQueue_reg_10__5_\" <13089 <13092 <13093 <13091 <13090 >780\n",
+      "1553:SDFFARX1_RVT\"InstQueue_reg_10__6_\" <13119 <13122 <13123 <13121 <13120 >787\n",
+      "1538:SDFFARX1_RVT\"InstQueue_reg_10__7_\" <13084 <13087 <13088 <13086 <13085 >779\n",
+      "---------------\n",
+      "1563:SDFFARX1_RVT\"InstQueue_reg_11__0_\" <13144 <13147 <13148 <13146 <13145 >792\n",
+      "1561:SDFFARX1_RVT\"InstQueue_reg_11__1_\" <13139 <13142 <13143 <13141 <13140 >791\n",
+      "1565:SDFFARX1_RVT\"InstQueue_reg_11__2_\" <13149 <13152 <13153 <13151 <13150 >793\n",
+      "1559:SDFFARX1_RVT\"InstQueue_reg_11__3_\" <13134 <13137 <13138 <13136 <13135 >790\n",
+      "1567:SDFFARX1_RVT\"InstQueue_reg_11__4_\" <13154 <13157 <13158 <13156 <13155 >794\n",
+      "1557:SDFFARX1_RVT\"InstQueue_reg_11__5_\" <13129 <13132 <13133 <13131 <13130 >789\n",
+      "1569:SDFFARX1_RVT\"InstQueue_reg_11__6_\" <13159 <13162 <13163 <13161 <13160 >795\n",
+      "1555:SDFFARX1_RVT\"InstQueue_reg_11__7_\" <13124 <13127 <13128 <13126 <13125 >788\n",
+      "---------------\n",
+      "1579:SDFFARX1_RVT\"InstQueue_reg_12__0_\" <13184 <13187 <13188 <13186 <13185 >800\n",
+      "1577:SDFFARX1_RVT\"InstQueue_reg_12__1_\" <13179 <13182 <13183 <13181 <13180 >799\n",
+      "1581:SDFFARX1_RVT\"InstQueue_reg_12__2_\" <13189 <13192 <13193 <13191 <13190 >801\n",
+      "1575:SDFFARX1_RVT\"InstQueue_reg_12__3_\" <13174 <13177 <13178 <13176 <13175 >798\n",
+      "1583:SDFFARX1_RVT\"InstQueue_reg_12__4_\" <13194 <13197 <13198 <13196 <13195 >802\n",
+      "1573:SDFFARX1_RVT\"InstQueue_reg_12__5_\" <13169 <13172 <13173 <13171 <13170 >797\n",
+      "1585:SDFFARX1_RVT\"InstQueue_reg_12__6_\" <13199 <13202 <13203 <13201 <13200 >803\n",
+      "1571:SDFFARX1_RVT\"InstQueue_reg_12__7_\" <13164 <13167 <13168 <13166 <13165 >796\n",
+      "---------------\n",
+      "1595:SDFFARX1_RVT\"InstQueue_reg_13__0_\" <13224 <13227 <13228 <13226 <13225 >808\n",
+      "1593:SDFFARX1_RVT\"InstQueue_reg_13__1_\" <13219 <13222 <13223 <13221 <13220 >807\n",
+      "1597:SDFFARX1_RVT\"InstQueue_reg_13__2_\" <13229 <13232 <13233 <13231 <13230 >809\n",
+      "1591:SDFFARX1_RVT\"InstQueue_reg_13__3_\" <13214 <13217 <13218 <13216 <13215 >806\n",
+      "1599:SDFFARX1_RVT\"InstQueue_reg_13__4_\" <13234 <13237 <13238 <13236 <13235 >810\n",
+      "1589:SDFFARX1_RVT\"InstQueue_reg_13__5_\" <13209 <13212 <13213 <13211 <13210 >805\n",
+      "1601:SDFFARX1_RVT\"InstQueue_reg_13__6_\" <13239 <13242 <13243 <13241 <13240 >811\n",
+      "1587:SDFFARX1_RVT\"InstQueue_reg_13__7_\" <13204 <13207 <13208 <13206 <13205 >804\n",
+      "---------------\n",
+      "1611:SDFFARX1_RVT\"InstQueue_reg_14__0_\" <13264 <13267 <13268 <13266 <13265 >816\n",
+      "1609:SDFFARX1_RVT\"InstQueue_reg_14__1_\" <13259 <13262 <13263 <13261 <13260 >815\n",
+      "1613:SDFFARX1_RVT\"InstQueue_reg_14__2_\" <13269 <13272 <13273 <13271 <13270 >817\n",
+      "1607:SDFFARX1_RVT\"InstQueue_reg_14__3_\" <13254 <13257 <13258 <13256 <13255 >814\n",
+      "1615:SDFFARX1_RVT\"InstQueue_reg_14__4_\" <13274 <13277 <13278 <13276 <13275 >818\n",
+      "1605:SDFFARX1_RVT\"InstQueue_reg_14__5_\" <13249 <13252 <13253 <13251 <13250 >813\n",
+      "1617:SDFFARX1_RVT\"InstQueue_reg_14__6_\" <13279 <13282 <13283 <13281 <13280 >819\n",
+      "1603:SDFFARX1_RVT\"InstQueue_reg_14__7_\" <13244 <13247 <13248 <13246 <13245 >812\n",
+      "---------------\n",
+      "1627:SDFFARX1_RVT\"InstQueue_reg_15__0_\" <13304 <13307 <13308 <13306 <13305 >824\n",
+      "1625:SDFFARX1_RVT\"InstQueue_reg_15__1_\" <13299 <13302 <13303 <13301 <13300 >823\n",
+      "1629:SDFFARX1_RVT\"InstQueue_reg_15__2_\" <13309 <13312 <13313 <13311 <13310 >825\n",
+      "1623:SDFFARX1_RVT\"InstQueue_reg_15__3_\" <13294 <13297 <13298 <13296 <13295 >822\n",
+      "1631:SDFFARX1_RVT\"InstQueue_reg_15__4_\" <13314 <13317 <13318 <13316 <13315 >826\n",
+      "1621:SDFFARX1_RVT\"InstQueue_reg_15__5_\" <13289 <13292 <13293 <13291 <13290 >821\n",
+      "1633:SDFFARX1_RVT\"InstQueue_reg_15__6_\" <13319 <13322 <13323 <13321 <13320 >827\n",
+      "1619:SDFFARX1_RVT\"InstQueue_reg_15__7_\" <13284 <13287 <13288 <13286 <13285 >820\n"
+     ]
+    }
+   ],
+   "source": [
+    "for l in b15.s_locs('InstQueue_reg'):\n",
+    "    print('---------------')\n",
+    "    for i in l:\n",
+    "        print(b15.s_nodes[i])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Example: Tracing A Scan Chain\n",
+    "\n",
+    "We start at the output of the scan chain \"test_so000\", then go backwards through the circuit.\n",
+    "\n",
+    "When we encounter a scan-cell (\"SDFF...\"), we continue with the \"SI\" pin.\n",
+    "\n",
+    "We do this on the original circuit `b15` that still contains the scan-cells themselves."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "length (with forks): 1123\n",
+      "length (without forks): 562\n",
+      "length only SDFF: 417\n",
+      "output\"test_so000\" __fork__\"test_so000\" NBUFFX8_RVT\"ZBUF_15_inst_543\" __fork__\"aps_rename_15_\" SDFFARX1_RVT\"W_R_n_reg\" __fork__\"ZBUF_17_48\" NBUFFX2_RVT\"ZBUF_17_inst_981\" __fork__\"N3897\" SDFFARX1_RVT\"uWord_reg_14_\" __fork__\"N3896\" ... __fork__\"Address[0]\" NBUFFX2_RVT\"ZBUF_19_inst_438\" __fork__\"aps_rename_14_\" SDFFARX1_RVT\"Address_reg_0_\" __fork__\"ADS_n\" NBUFFX2_RVT\"ZBUF_34_inst_547\" __fork__\"aps_rename_18_\" SDFFARX1_RVT\"ADS_n_reg\" __fork__\"test_si000\" input\"test_si000\"\n"
+     ]
+    }
+   ],
+   "source": [
+    "chain = [cell := b15.cells['test_so000']]\n",
+    "while len(cell.ins) > 0:\n",
+    "    chain.append(cell := cell.ins[SAED32.pin_index(cell.kind,'SI') if cell.kind.startswith('SDFF') else 0].driver)\n",
+    "        \n",
+    "print(f'length (with forks): {len(chain)}')\n",
+    "print(f'length (without forks): {len(list(filter(lambda n: n.kind != \"__fork__\", chain)))}')\n",
+    "print(f'length only SDFF: {len(list(filter(lambda n: n.kind.startswith(\"SDFF\"), chain)))}')\n",
+    "\n",
+    "names = [f'{c.kind}\"{c.name}\"' for c in chain]\n",
+    "print(' '.join(names[:10]) + ' ... ' + ' '.join(names[-10:]))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Traversing a Circuit in Topological Order\n",
+    "\n",
+    "There are several generators to traverse the circuit in various topological orderings.\n",
+    "\n",
+    "The following loop prints all nodes:\n",
+    "* starting with primary inputs (nodes that don't have any input connections) and sequential elements,\n",
+    "* and continuing with nodes who's inputs are connected only to already printed nodes."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0:DFFX1\"carry\" <7 <6 >0\n",
+      "5:input\"clk\" >3\n",
+      "7:input\"a\" >4\n",
+      "9:input\"b\" >5\n",
+      "1:__fork__\"cin\" <0 >10\n",
+      "6:__fork__\"clk\" <3 >7\n",
+      "8:__fork__\"a\" <4 >8\n",
+      "10:__fork__\"b\" <5 >9\n",
+      "2:ADDFX1\"adder\" <8 <9 <10 >1 >2\n",
+      "3:__fork__\"cout\" <1 >6\n",
+      "4:__fork__\"s\" <2 >11\n",
+      "11:output\"s\" <11\n"
+     ]
+    }
+   ],
+   "source": [
+    "for n in adder.topological_order():\n",
+    "    print(n)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Example: Determine Topological Level\n",
+    "\n",
+    "The topological (or logic level) of a node is its distance from inputs or sequential elements.\n",
+    "\n",
+    "Inputs and flip-flops themselves are level 0, *cells* driven by just inputs and flip-flops are level 1, and so on.\n",
+    "*Fork* nodes have the same level as their driver, because they do not increase the logic depth."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Maximum logic depth: 44\n"
+     ]
+    }
+   ],
+   "source": [
+    "import numpy as np\n",
+    "\n",
+    "levels = np.zeros(len(b15.nodes), dtype=np.uint32)  # array to store level for each node.\n",
+    "\n",
+    "for n in b15.topological_order():\n",
+    "    if 'DFF' in n.kind or len(n.ins) == 0:\n",
+    "        levels[n] = 0           # use the node n directly to index into the array.\n",
+    "    elif n.kind == '__fork__':\n",
+    "        levels[n] = levels[n.ins[0].driver]  # forks only have exactly one driver\n",
+    "    else:\n",
+    "        levels[n] = max([levels[line.driver] for line in n.ins]) + 1\n",
+    "        \n",
+    "print(f'Maximum logic depth: {np.max(levels)}')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "List nodes with the highest depth and which nodes they are driving."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "depth: 44 node: __fork__     n4587  driving: SDFFARX1_RVT EAX_reg_31_   \n",
+      "depth: 44 node: NAND2X0_RVT  U737   driving: __fork__     n4587         \n",
+      "depth: 43 node: __fork__     n4478  driving: SDFFARX1_RVT Address_reg_29_\n",
+      "depth: 43 node: NAND2X0_RVT  U738   driving: __fork__     n684          \n",
+      "depth: 43 node: __fork__     n4416  driving: SDFFARX1_RVT PhyAddrPointer_reg_29_\n",
+      "depth: 43 node: NAND2X0_RVT  U220   driving: __fork__     n4416         \n",
+      "depth: 43 node: NAND2X0_RVT  U214   driving: __fork__     n4414         \n",
+      "depth: 43 node: __fork__     n4414  driving: SDFFARX1_RVT PhyAddrPointer_reg_31_\n",
+      "depth: 43 node: __fork__     n684   driving: NAND2X0_RVT  U737          \n",
+      "depth: 43 node: NAND2X0_RVT  U408   driving: __fork__     n4478         \n",
+      "depth: 42 node: NAND2X0_RVT  U216   driving: __fork__     n332          \n",
+      "depth: 42 node: __fork__     n4510  driving: SDFFARX1_RVT rEIP_reg_29_  \n",
+      "depth: 42 node: NAND2X0_RVT  U595   driving: __fork__     n4540         \n",
+      "depth: 42 node: __fork__     n4540  driving: SDFFARX1_RVT EBX_reg_31_   \n",
+      "depth: 42 node: __fork__     n4588  driving: SDFFARX1_RVT EAX_reg_30_   \n",
+      "depth: 42 node: __fork__     n332   driving: NAND2X0_RVT  U214          \n",
+      "depth: 42 node: NAND2X0_RVT  U222   driving: __fork__     n337          \n",
+      "depth: 42 node: __fork__     n463   driving: NAND2X0_RVT  U408          \n",
+      "depth: 42 node: __fork__     n4446  driving: SDFFARX1_RVT InstAddrPointer_reg_31_\n",
+      "depth: 42 node: NAND2X0_RVT  U311   driving: __fork__     n4446         \n"
+     ]
+    }
+   ],
+   "source": [
+    "nodes_by_depth = np.argsort(levels)[::-1]\n",
+    "\n",
+    "for n_idx in nodes_by_depth[:20]:\n",
+    "    n = b15.nodes[n_idx]  # get the node itself by its index\n",
+    "    readers = ', '.join([f'{l.reader.kind:12s} {l.reader.name:14s}' for l in n.outs])\n",
+    "    print(f'depth: {levels[n_idx]} node: {n.kind:12s} {n.name:6s} driving: {readers}')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Working With Technology Libraries\n",
+    "\n",
+    "Kyupy's simulators only supports cells with at most 4 inputs and exactly 1 output.\n",
+    "To map a circuit to the supported simulation primitives, we use `.resolve_tlib_cells()` with the corresponding library."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "adder.resolve_tlib_cells(GSC180)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "image/svg+xml": [
+       "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
+       "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
+       " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
+       "<!-- Generated by graphviz version 2.30.1 (20201013.1554)\n",
+       " -->\n",
+       "<!-- Title: %3 Pages: 1 -->\n",
+       "<svg width=\"1214pt\" height=\"413pt\"\n",
+       " viewBox=\"0.00 0.00 1214.00 413.27\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
+       "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 409.267)\">\n",
+       "<title>%3</title>\n",
+       "<polygon fill=\"white\" stroke=\"white\" points=\"-4,5 -4,-409.267 1211,-409.267 1211,5 -4,5\"/>\n",
+       "<!-- 0 -->\n",
+       "<g id=\"node1\" class=\"node\"><title>0</title>\n",
+       "<polygon fill=\"none\" stroke=\"black\" points=\"0,-276.143 0,-329.143 90,-329.143 90,-276.143 0,-276.143\"/>\n",
+       "<text text-anchor=\"middle\" x=\"11.5\" y=\"-312.443\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"0,-303.143 23,-303.143 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"11.5\" y=\"-285.943\" font-family=\"Times,serif\" font-size=\"14.00\">1</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"23,-276.143 23,-329.143 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"45\" y=\"-313.943\" font-family=\"Times,serif\" font-size=\"14.00\">0 [4]</text>\n",
+       "<text text-anchor=\"middle\" x=\"45\" y=\"-298.943\" font-family=\"Times,serif\" font-size=\"14.00\">DFF</text>\n",
+       "<text text-anchor=\"middle\" x=\"45\" y=\"-283.943\" font-family=\"Times,serif\" font-size=\"14.00\">carry</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"67,-276.143 67,-329.143 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"78.5\" y=\"-298.943\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "</g>\n",
+       "<!-- 12 -->\n",
+       "<g id=\"node5\" class=\"node\"><title>12</title>\n",
+       "<polygon fill=\"none\" stroke=\"black\" points=\"140.5,-274.143 140.5,-327.143 251.5,-327.143 251.5,-274.143 140.5,-274.143\"/>\n",
+       "<text text-anchor=\"middle\" x=\"152\" y=\"-296.943\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"163.5,-274.143 163.5,-327.143 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"196\" y=\"-311.943\" font-family=\"Times,serif\" font-size=\"14.00\">12</text>\n",
+       "<text text-anchor=\"middle\" x=\"196\" y=\"-296.943\" font-family=\"Times,serif\" font-size=\"14.00\">__fork__</text>\n",
+       "<text text-anchor=\"middle\" x=\"196\" y=\"-281.943\" font-family=\"Times,serif\" font-size=\"14.00\">carry~Q</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"228.5,-274.143 228.5,-327.143 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"240\" y=\"-296.943\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "</g>\n",
+       "<!-- 0&#45;&gt;12 -->\n",
+       "<g id=\"edge13\" class=\"edge\"><title>0:o0&#45;&gt;12:i0</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M90,-302.643C108.417,-302.643 115.369,-301.271 129.844,-300.8\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"130.055,-304.297 140,-300.643 129.947,-297.298 130.055,-304.297\"/>\n",
+       "<text text-anchor=\"middle\" x=\"115\" y=\"-306.443\" font-family=\"Times,serif\" font-size=\"14.00\">12</text>\n",
+       "</g>\n",
+       "<!-- 5 -->\n",
+       "<g id=\"node2\" class=\"node\"><title>5</title>\n",
+       "<polygon fill=\"none\" stroke=\"black\" points=\"1.5,-200.143 1.5,-253.143 88.5,-253.143 88.5,-200.143 1.5,-200.143\"/>\n",
+       "<text text-anchor=\"middle\" x=\"11.5\" y=\"-222.943\" font-family=\"Times,serif\" font-size=\"14.00\"> </text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"21.5,-200.143 21.5,-253.143 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"43.5\" y=\"-237.943\" font-family=\"Times,serif\" font-size=\"14.00\">5 [0]</text>\n",
+       "<text text-anchor=\"middle\" x=\"43.5\" y=\"-222.943\" font-family=\"Times,serif\" font-size=\"14.00\">input</text>\n",
+       "<text text-anchor=\"middle\" x=\"43.5\" y=\"-207.943\" font-family=\"Times,serif\" font-size=\"14.00\">clk</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"65.5,-200.143 65.5,-253.143 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"77\" y=\"-222.943\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "</g>\n",
+       "<!-- 6 -->\n",
+       "<g id=\"node6\" class=\"node\"><title>6</title>\n",
+       "<polygon fill=\"none\" stroke=\"black\" points=\"140.5,-201.143 140.5,-254.143 251.5,-254.143 251.5,-201.143 140.5,-201.143\"/>\n",
+       "<text text-anchor=\"middle\" x=\"152\" y=\"-223.943\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"163.5,-201.143 163.5,-254.143 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"196\" y=\"-238.943\" font-family=\"Times,serif\" font-size=\"14.00\">6</text>\n",
+       "<text text-anchor=\"middle\" x=\"196\" y=\"-223.943\" font-family=\"Times,serif\" font-size=\"14.00\">__fork__</text>\n",
+       "<text text-anchor=\"middle\" x=\"196\" y=\"-208.943\" font-family=\"Times,serif\" font-size=\"14.00\">clk</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"228.5,-201.143 228.5,-254.143 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"240\" y=\"-223.943\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "</g>\n",
+       "<!-- 5&#45;&gt;6 -->\n",
+       "<g id=\"edge4\" class=\"edge\"><title>5:o0&#45;&gt;6:i0</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M90,-226.643C108.406,-226.643 115.374,-227.329 129.848,-227.565\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"129.973,-231.066 140,-227.643 130.027,-224.066 129.973,-231.066\"/>\n",
+       "<text text-anchor=\"middle\" x=\"115\" y=\"-231.443\" font-family=\"Times,serif\" font-size=\"14.00\">3</text>\n",
+       "</g>\n",
+       "<!-- 7 -->\n",
+       "<g id=\"node3\" class=\"node\"><title>7</title>\n",
+       "<polygon fill=\"none\" stroke=\"black\" points=\"1.5,-127.143 1.5,-180.143 88.5,-180.143 88.5,-127.143 1.5,-127.143\"/>\n",
+       "<text text-anchor=\"middle\" x=\"11.5\" y=\"-149.943\" font-family=\"Times,serif\" font-size=\"14.00\"> </text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"21.5,-127.143 21.5,-180.143 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"43.5\" y=\"-164.943\" font-family=\"Times,serif\" font-size=\"14.00\">7 [1]</text>\n",
+       "<text text-anchor=\"middle\" x=\"43.5\" y=\"-149.943\" font-family=\"Times,serif\" font-size=\"14.00\">input</text>\n",
+       "<text text-anchor=\"middle\" x=\"43.5\" y=\"-134.943\" font-family=\"Times,serif\" font-size=\"14.00\">a</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"65.5,-127.143 65.5,-180.143 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"77\" y=\"-149.943\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "</g>\n",
+       "<!-- 8 -->\n",
+       "<g id=\"node7\" class=\"node\"><title>8</title>\n",
+       "<polygon fill=\"none\" stroke=\"black\" points=\"140.5,-127.143 140.5,-180.143 251.5,-180.143 251.5,-127.143 140.5,-127.143\"/>\n",
+       "<text text-anchor=\"middle\" x=\"152\" y=\"-149.943\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"163.5,-127.143 163.5,-180.143 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"196\" y=\"-164.943\" font-family=\"Times,serif\" font-size=\"14.00\">8</text>\n",
+       "<text text-anchor=\"middle\" x=\"196\" y=\"-149.943\" font-family=\"Times,serif\" font-size=\"14.00\">__fork__</text>\n",
+       "<text text-anchor=\"middle\" x=\"196\" y=\"-134.943\" font-family=\"Times,serif\" font-size=\"14.00\">a</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"228.5,-127.143 228.5,-180.143 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"240\" y=\"-149.943\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "</g>\n",
+       "<!-- 7&#45;&gt;8 -->\n",
+       "<g id=\"edge5\" class=\"edge\"><title>7:o0&#45;&gt;8:i0</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M90,-153.643C108.403,-153.643 115.376,-153.643 129.849,-153.643\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"130,-157.143 140,-153.643 130,-150.143 130,-157.143\"/>\n",
+       "<text text-anchor=\"middle\" x=\"115\" y=\"-157.443\" font-family=\"Times,serif\" font-size=\"14.00\">4</text>\n",
+       "</g>\n",
+       "<!-- 9 -->\n",
+       "<g id=\"node4\" class=\"node\"><title>9</title>\n",
+       "<polygon fill=\"none\" stroke=\"black\" points=\"1.5,-50.1431 1.5,-103.143 88.5,-103.143 88.5,-50.1431 1.5,-50.1431\"/>\n",
+       "<text text-anchor=\"middle\" x=\"11.5\" y=\"-72.9431\" font-family=\"Times,serif\" font-size=\"14.00\"> </text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"21.5,-50.1431 21.5,-103.143 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"43.5\" y=\"-87.9431\" font-family=\"Times,serif\" font-size=\"14.00\">9 [2]</text>\n",
+       "<text text-anchor=\"middle\" x=\"43.5\" y=\"-72.9431\" font-family=\"Times,serif\" font-size=\"14.00\">input</text>\n",
+       "<text text-anchor=\"middle\" x=\"43.5\" y=\"-57.9431\" font-family=\"Times,serif\" font-size=\"14.00\">b</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"65.5,-50.1431 65.5,-103.143 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"77\" y=\"-72.9431\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "</g>\n",
+       "<!-- 10 -->\n",
+       "<g id=\"node8\" class=\"node\"><title>10</title>\n",
+       "<polygon fill=\"none\" stroke=\"black\" points=\"140.5,-50.1431 140.5,-103.143 251.5,-103.143 251.5,-50.1431 140.5,-50.1431\"/>\n",
+       "<text text-anchor=\"middle\" x=\"152\" y=\"-72.9431\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"163.5,-50.1431 163.5,-103.143 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"196\" y=\"-87.9431\" font-family=\"Times,serif\" font-size=\"14.00\">10</text>\n",
+       "<text text-anchor=\"middle\" x=\"196\" y=\"-72.9431\" font-family=\"Times,serif\" font-size=\"14.00\">__fork__</text>\n",
+       "<text text-anchor=\"middle\" x=\"196\" y=\"-57.9431\" font-family=\"Times,serif\" font-size=\"14.00\">b</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"228.5,-50.1431 228.5,-103.143 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"240\" y=\"-72.9431\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "</g>\n",
+       "<!-- 9&#45;&gt;10 -->\n",
+       "<g id=\"edge6\" class=\"edge\"><title>9:o0&#45;&gt;10:i0</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M90,-76.6431C108.403,-76.6431 115.376,-76.6431 129.849,-76.6431\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"130,-80.1432 140,-76.6431 130,-73.1432 130,-80.1432\"/>\n",
+       "<text text-anchor=\"middle\" x=\"115\" y=\"-80.4431\" font-family=\"Times,serif\" font-size=\"14.00\">5</text>\n",
+       "</g>\n",
+       "<!-- 1 -->\n",
+       "<g id=\"node9\" class=\"node\"><title>1</title>\n",
+       "<polygon fill=\"none\" stroke=\"black\" points=\"296.5,-257.143 296.5,-310.143 407.5,-310.143 407.5,-257.143 296.5,-257.143\"/>\n",
+       "<text text-anchor=\"middle\" x=\"308\" y=\"-279.943\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"319.5,-257.143 319.5,-310.143 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"352\" y=\"-294.943\" font-family=\"Times,serif\" font-size=\"14.00\">1</text>\n",
+       "<text text-anchor=\"middle\" x=\"352\" y=\"-279.943\" font-family=\"Times,serif\" font-size=\"14.00\">__fork__</text>\n",
+       "<text text-anchor=\"middle\" x=\"352\" y=\"-264.943\" font-family=\"Times,serif\" font-size=\"14.00\">cin</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"384.5,-257.143 384.5,-310.143 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"396\" y=\"-279.943\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "</g>\n",
+       "<!-- 12&#45;&gt;1 -->\n",
+       "<g id=\"edge1\" class=\"edge\"><title>12:o0&#45;&gt;1:i0</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M252,-300.643C269.034,-300.643 273.595,-289.42 285.951,-285.212\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"286.66,-288.644 296,-283.643 285.58,-281.728 286.66,-288.644\"/>\n",
+       "<text text-anchor=\"middle\" x=\"274\" y=\"-299.443\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "</g>\n",
+       "<!-- 6&#45;&gt;0 -->\n",
+       "<g id=\"edge8\" class=\"edge\"><title>6:o0&#45;&gt;0:i1</title>\n",
+       "<path fill=\"none\" stroke=\"black\" stroke-dasharray=\"1,5\" d=\"M240,-255.643C240,-261.228 64.6719,-266.699 20.8398,-272.256\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"19.8929,-268.884 11,-274.643 21.5433,-275.687 19.8929,-268.884\"/>\n",
+       "<text text-anchor=\"middle\" x=\"115\" y=\"-270.443\" font-family=\"Times,serif\" font-size=\"14.00\">7</text>\n",
+       "</g>\n",
+       "<!-- 13 -->\n",
+       "<g id=\"node10\" class=\"node\"><title>13</title>\n",
+       "<polygon fill=\"none\" stroke=\"black\" points=\"296.5,-127.143 296.5,-180.143 407.5,-180.143 407.5,-127.143 296.5,-127.143\"/>\n",
+       "<text text-anchor=\"middle\" x=\"308\" y=\"-149.943\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"319.5,-127.143 319.5,-180.143 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"352\" y=\"-164.943\" font-family=\"Times,serif\" font-size=\"14.00\">13</text>\n",
+       "<text text-anchor=\"middle\" x=\"352\" y=\"-149.943\" font-family=\"Times,serif\" font-size=\"14.00\">__fork__</text>\n",
+       "<text text-anchor=\"middle\" x=\"352\" y=\"-134.943\" font-family=\"Times,serif\" font-size=\"14.00\">adder~A</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"384.5,-127.143 384.5,-180.143 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"396\" y=\"-163.443\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"384.5,-154.143 407.5,-154.143 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"396\" y=\"-136.943\" font-family=\"Times,serif\" font-size=\"14.00\">1</text>\n",
+       "</g>\n",
+       "<!-- 8&#45;&gt;13 -->\n",
+       "<g id=\"edge9\" class=\"edge\"><title>8:o0&#45;&gt;13:i0</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M252,-153.643C267.583,-153.643 273.853,-153.643 285.653,-153.643\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"286,-157.143 296,-153.643 286,-150.143 286,-157.143\"/>\n",
+       "<text text-anchor=\"middle\" x=\"274\" y=\"-157.443\" font-family=\"Times,serif\" font-size=\"14.00\">8</text>\n",
+       "</g>\n",
+       "<!-- 14 -->\n",
+       "<g id=\"node11\" class=\"node\"><title>14</title>\n",
+       "<polygon fill=\"none\" stroke=\"black\" points=\"296.5,-50.1431 296.5,-103.143 407.5,-103.143 407.5,-50.1431 296.5,-50.1431\"/>\n",
+       "<text text-anchor=\"middle\" x=\"308\" y=\"-72.9431\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"319.5,-50.1431 319.5,-103.143 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"352\" y=\"-87.9431\" font-family=\"Times,serif\" font-size=\"14.00\">14</text>\n",
+       "<text text-anchor=\"middle\" x=\"352\" y=\"-72.9431\" font-family=\"Times,serif\" font-size=\"14.00\">__fork__</text>\n",
+       "<text text-anchor=\"middle\" x=\"352\" y=\"-57.9431\" font-family=\"Times,serif\" font-size=\"14.00\">adder~B</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"384.5,-50.1431 384.5,-103.143 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"396\" y=\"-86.4431\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"384.5,-77.1431 407.5,-77.1431 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"396\" y=\"-59.9431\" font-family=\"Times,serif\" font-size=\"14.00\">1</text>\n",
+       "</g>\n",
+       "<!-- 10&#45;&gt;14 -->\n",
+       "<g id=\"edge10\" class=\"edge\"><title>10:o0&#45;&gt;14:i0</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M252,-76.6431C267.583,-76.6431 273.853,-76.6431 285.653,-76.6431\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"286,-80.1432 296,-76.6431 286,-73.1432 286,-80.1432\"/>\n",
+       "<text text-anchor=\"middle\" x=\"274\" y=\"-80.4431\" font-family=\"Times,serif\" font-size=\"14.00\">9</text>\n",
+       "</g>\n",
+       "<!-- 15 -->\n",
+       "<g id=\"node12\" class=\"node\"><title>15</title>\n",
+       "<polygon fill=\"none\" stroke=\"black\" points=\"461,-248.143 461,-301.143 575,-301.143 575,-248.143 461,-248.143\"/>\n",
+       "<text text-anchor=\"middle\" x=\"472.5\" y=\"-270.943\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"484,-248.143 484,-301.143 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"518\" y=\"-285.943\" font-family=\"Times,serif\" font-size=\"14.00\">15</text>\n",
+       "<text text-anchor=\"middle\" x=\"518\" y=\"-270.943\" font-family=\"Times,serif\" font-size=\"14.00\">__fork__</text>\n",
+       "<text text-anchor=\"middle\" x=\"518\" y=\"-255.943\" font-family=\"Times,serif\" font-size=\"14.00\">adder~CI</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"552,-248.143 552,-301.143 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"563.5\" y=\"-284.443\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"552,-275.143 575,-275.143 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"563.5\" y=\"-257.943\" font-family=\"Times,serif\" font-size=\"14.00\">1</text>\n",
+       "</g>\n",
+       "<!-- 1&#45;&gt;15 -->\n",
+       "<g id=\"edge11\" class=\"edge\"><title>1:o0&#45;&gt;15:i0</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M408,-283.643C427.607,-283.643 434.381,-277.354 449.777,-275.29\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"450.241,-278.768 460,-274.643 449.799,-271.782 450.241,-278.768\"/>\n",
+       "<text text-anchor=\"middle\" x=\"433\" y=\"-285.443\" font-family=\"Times,serif\" font-size=\"14.00\">10</text>\n",
+       "</g>\n",
+       "<!-- 16 -->\n",
+       "<g id=\"node13\" class=\"node\"><title>16</title>\n",
+       "<polygon fill=\"none\" stroke=\"black\" points=\"458.5,-127.143 458.5,-180.143 577.5,-180.143 577.5,-127.143 458.5,-127.143\"/>\n",
+       "<text text-anchor=\"middle\" x=\"470\" y=\"-163.443\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"458.5,-154.143 481.5,-154.143 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"470\" y=\"-136.943\" font-family=\"Times,serif\" font-size=\"14.00\">1</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"481.5,-127.143 481.5,-180.143 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"518\" y=\"-164.943\" font-family=\"Times,serif\" font-size=\"14.00\">16</text>\n",
+       "<text text-anchor=\"middle\" x=\"518\" y=\"-149.943\" font-family=\"Times,serif\" font-size=\"14.00\">XOR2</text>\n",
+       "<text text-anchor=\"middle\" x=\"518\" y=\"-134.943\" font-family=\"Times,serif\" font-size=\"14.00\">adder~AB</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"554.5,-127.143 554.5,-180.143 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"566\" y=\"-149.943\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "</g>\n",
+       "<!-- 13&#45;&gt;16 -->\n",
+       "<g id=\"edge15\" class=\"edge\"><title>13:o0&#45;&gt;16:i0</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M408,-167.643C426.403,-167.643 433.376,-167.643 447.849,-167.643\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"448,-171.143 458,-167.643 448,-164.143 448,-171.143\"/>\n",
+       "<text text-anchor=\"middle\" x=\"433\" y=\"-171.443\" font-family=\"Times,serif\" font-size=\"14.00\">14</text>\n",
+       "</g>\n",
+       "<!-- 18 -->\n",
+       "<g id=\"node16\" class=\"node\"><title>18</title>\n",
+       "<polygon fill=\"none\" stroke=\"black\" points=\"798,-86.6431 798,-178.643 906,-178.643 906,-86.6431 798,-86.6431\"/>\n",
+       "<text text-anchor=\"middle\" x=\"809.5\" y=\"-163.443\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"798,-155.643 821,-155.643 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"809.5\" y=\"-140.443\" font-family=\"Times,serif\" font-size=\"14.00\">1</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"798,-132.643 821,-132.643 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"809.5\" y=\"-117.443\" font-family=\"Times,serif\" font-size=\"14.00\">2</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"798,-109.643 821,-109.643 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"809.5\" y=\"-94.4431\" font-family=\"Times,serif\" font-size=\"14.00\">3</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"821,-86.6431 821,-178.643 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"852\" y=\"-143.943\" font-family=\"Times,serif\" font-size=\"14.00\">18</text>\n",
+       "<text text-anchor=\"middle\" x=\"852\" y=\"-128.943\" font-family=\"Times,serif\" font-size=\"14.00\">AO22</text>\n",
+       "<text text-anchor=\"middle\" x=\"852\" y=\"-113.943\" font-family=\"Times,serif\" font-size=\"14.00\">adder~S</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"883,-86.6431 883,-178.643 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"894.5\" y=\"-128.943\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "</g>\n",
+       "<!-- 13&#45;&gt;18 -->\n",
+       "<g id=\"edge21\" class=\"edge\"><title>13:o1&#45;&gt;18:i2</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M396,-125.643C396,-102.922 707.19,-118.971 787.838,-120.525\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"787.96,-124.027 798,-120.643 788.041,-117.027 787.96,-124.027\"/>\n",
+       "<text text-anchor=\"middle\" x=\"603\" y=\"-119.443\" font-family=\"Times,serif\" font-size=\"14.00\">20</text>\n",
+       "</g>\n",
+       "<!-- 14&#45;&gt;16 -->\n",
+       "<g id=\"edge16\" class=\"edge\"><title>14:o0&#45;&gt;16:i1</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M396,-104.643C396,-110.991 430.312,-130.082 448.282,-137.576\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"447.41,-140.971 458,-140.643 449.517,-134.296 447.41,-140.971\"/>\n",
+       "<text text-anchor=\"middle\" x=\"433\" y=\"-137.443\" font-family=\"Times,serif\" font-size=\"14.00\">15</text>\n",
+       "</g>\n",
+       "<!-- 14&#45;&gt;18 -->\n",
+       "<g id=\"edge22\" class=\"edge\"><title>14:o1&#45;&gt;18:i3</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M408,-63.6431C583.004,-63.6431 800.325,86.794 808.747,-75.5643\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"805.25,-75.734 809,-85.6431 812.248,-75.5585 805.25,-75.734\"/>\n",
+       "<text text-anchor=\"middle\" x=\"603\" y=\"-32.4431\" font-family=\"Times,serif\" font-size=\"14.00\">21</text>\n",
+       "</g>\n",
+       "<!-- 2 -->\n",
+       "<g id=\"node15\" class=\"node\"><title>2</title>\n",
+       "<polygon fill=\"none\" stroke=\"black\" points=\"803.5,-275.143 803.5,-328.143 900.5,-328.143 900.5,-275.143 803.5,-275.143\"/>\n",
+       "<text text-anchor=\"middle\" x=\"815\" y=\"-311.443\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"803.5,-302.143 826.5,-302.143 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"815\" y=\"-284.943\" font-family=\"Times,serif\" font-size=\"14.00\">1</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"826.5,-275.143 826.5,-328.143 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"852\" y=\"-312.943\" font-family=\"Times,serif\" font-size=\"14.00\">2</text>\n",
+       "<text text-anchor=\"middle\" x=\"852\" y=\"-297.943\" font-family=\"Times,serif\" font-size=\"14.00\">XOR2</text>\n",
+       "<text text-anchor=\"middle\" x=\"852\" y=\"-282.943\" font-family=\"Times,serif\" font-size=\"14.00\">adder</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"877.5,-275.143 877.5,-328.143 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"889\" y=\"-297.943\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "</g>\n",
+       "<!-- 15&#45;&gt;2 -->\n",
+       "<g id=\"edge18\" class=\"edge\"><title>15:o0&#45;&gt;2:i1</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M576,-288.643C673.011,-288.643 699.751,-288.643 791.959,-288.643\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"792,-292.143 802,-288.643 792,-285.143 792,-292.143\"/>\n",
+       "<text text-anchor=\"middle\" x=\"688\" y=\"-292.443\" font-family=\"Times,serif\" font-size=\"14.00\">17</text>\n",
+       "</g>\n",
+       "<!-- 15&#45;&gt;18 -->\n",
+       "<g id=\"edge20\" class=\"edge\"><title>15:o1&#45;&gt;18:i1</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M564,-246.643C564,-136.627 677.361,-144.064 787.68,-144.615\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"787.991,-148.116 798,-144.643 788.009,-141.116 787.991,-148.116\"/>\n",
+       "<text text-anchor=\"middle\" x=\"688\" y=\"-160.443\" font-family=\"Times,serif\" font-size=\"14.00\">19</text>\n",
+       "</g>\n",
+       "<!-- 17 -->\n",
+       "<g id=\"node14\" class=\"node\"><title>17</title>\n",
+       "<polygon fill=\"none\" stroke=\"black\" points=\"628.5,-199.143 628.5,-252.143 747.5,-252.143 747.5,-199.143 628.5,-199.143\"/>\n",
+       "<text text-anchor=\"middle\" x=\"640\" y=\"-221.943\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"651.5,-199.143 651.5,-252.143 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"688\" y=\"-236.943\" font-family=\"Times,serif\" font-size=\"14.00\">17</text>\n",
+       "<text text-anchor=\"middle\" x=\"688\" y=\"-221.943\" font-family=\"Times,serif\" font-size=\"14.00\">__fork__</text>\n",
+       "<text text-anchor=\"middle\" x=\"688\" y=\"-206.943\" font-family=\"Times,serif\" font-size=\"14.00\">adder~AB</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"724.5,-199.143 724.5,-252.143 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"736\" y=\"-235.443\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"724.5,-226.143 747.5,-226.143 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"736\" y=\"-208.943\" font-family=\"Times,serif\" font-size=\"14.00\">1</text>\n",
+       "</g>\n",
+       "<!-- 16&#45;&gt;17 -->\n",
+       "<g id=\"edge14\" class=\"edge\"><title>16:o0&#45;&gt;17:i0</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M578,-153.643C581.3,-153.643 617.285,-177.921 632.871,-190.556\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"630.441,-193.075 640,-197.643 635.376,-188.111 630.441,-193.075\"/>\n",
+       "<text text-anchor=\"middle\" x=\"603\" y=\"-177.443\" font-family=\"Times,serif\" font-size=\"14.00\">13</text>\n",
+       "</g>\n",
+       "<!-- 17&#45;&gt;2 -->\n",
+       "<g id=\"edge17\" class=\"edge\"><title>17:o0&#45;&gt;2:i0</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M736,-253.643C736,-255.648 776.313,-293.828 793.963,-309.256\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"791.994,-312.161 802,-315.643 796.349,-306.681 791.994,-312.161\"/>\n",
+       "<text text-anchor=\"middle\" x=\"773\" y=\"-300.443\" font-family=\"Times,serif\" font-size=\"14.00\">16</text>\n",
+       "</g>\n",
+       "<!-- 17&#45;&gt;18 -->\n",
+       "<g id=\"edge19\" class=\"edge\"><title>17:o1&#45;&gt;18:i0</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M748,-212.643C775.091,-212.643 801.682,-210.962 807.725,-189.628\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"811.205,-190.006 809,-179.643 804.261,-189.119 811.205,-190.006\"/>\n",
+       "<text text-anchor=\"middle\" x=\"773\" y=\"-216.443\" font-family=\"Times,serif\" font-size=\"14.00\">18</text>\n",
+       "</g>\n",
+       "<!-- 3 -->\n",
+       "<g id=\"node17\" class=\"node\"><title>3</title>\n",
+       "<polygon fill=\"none\" stroke=\"black\" points=\"950.5,-314.143 950.5,-367.143 1061.5,-367.143 1061.5,-314.143 950.5,-314.143\"/>\n",
+       "<text text-anchor=\"middle\" x=\"962\" y=\"-336.943\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"973.5,-314.143 973.5,-367.143 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"1006\" y=\"-351.943\" font-family=\"Times,serif\" font-size=\"14.00\">3</text>\n",
+       "<text text-anchor=\"middle\" x=\"1006\" y=\"-336.943\" font-family=\"Times,serif\" font-size=\"14.00\">__fork__</text>\n",
+       "<text text-anchor=\"middle\" x=\"1006\" y=\"-321.943\" font-family=\"Times,serif\" font-size=\"14.00\">cout</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"1038.5,-314.143 1038.5,-367.143 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"1050\" y=\"-336.943\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "</g>\n",
+       "<!-- 2&#45;&gt;3 -->\n",
+       "<g id=\"edge2\" class=\"edge\"><title>2:o0&#45;&gt;3:i0</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M902,-301.643C924.875,-301.643 951.863,-290.173 959.739,-302.874\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"956.335,-303.69 962,-312.643 963.155,-302.111 956.335,-303.69\"/>\n",
+       "<text text-anchor=\"middle\" x=\"928\" y=\"-303.443\" font-family=\"Times,serif\" font-size=\"14.00\">1</text>\n",
+       "</g>\n",
+       "<!-- 4 -->\n",
+       "<g id=\"node18\" class=\"node\"><title>4</title>\n",
+       "<polygon fill=\"none\" stroke=\"black\" points=\"950.5,-106.143 950.5,-159.143 1061.5,-159.143 1061.5,-106.143 950.5,-106.143\"/>\n",
+       "<text text-anchor=\"middle\" x=\"962\" y=\"-128.943\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"973.5,-106.143 973.5,-159.143 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"1006\" y=\"-143.943\" font-family=\"Times,serif\" font-size=\"14.00\">4</text>\n",
+       "<text text-anchor=\"middle\" x=\"1006\" y=\"-128.943\" font-family=\"Times,serif\" font-size=\"14.00\">__fork__</text>\n",
+       "<text text-anchor=\"middle\" x=\"1006\" y=\"-113.943\" font-family=\"Times,serif\" font-size=\"14.00\">s</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"1038.5,-106.143 1038.5,-159.143 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"1050\" y=\"-128.943\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "</g>\n",
+       "<!-- 18&#45;&gt;4 -->\n",
+       "<g id=\"edge3\" class=\"edge\"><title>18:o0&#45;&gt;4:i0</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M906,-132.643C921.583,-132.643 927.853,-132.643 939.653,-132.643\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"940,-136.143 950,-132.643 940,-129.143 940,-136.143\"/>\n",
+       "<text text-anchor=\"middle\" x=\"928\" y=\"-136.443\" font-family=\"Times,serif\" font-size=\"14.00\">2</text>\n",
+       "</g>\n",
+       "<!-- 3&#45;&gt;0 -->\n",
+       "<g id=\"edge7\" class=\"edge\"><title>3:o0&#45;&gt;0:i0</title>\n",
+       "<path fill=\"none\" stroke=\"black\" stroke-dasharray=\"1,5\" d=\"M1050,-368.643C1050,-456.361 940.718,-356.643 853,-356.643 195,-356.643 195,-356.643 195,-356.643 116.12,-356.643 20.0731,-408.263 11.603,-340.949\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"15.0783,-340.422 11,-330.643 8.09022,-340.83 15.0783,-340.422\"/>\n",
+       "<text text-anchor=\"middle\" x=\"518\" y=\"-360.443\" font-family=\"Times,serif\" font-size=\"14.00\">6</text>\n",
+       "</g>\n",
+       "<!-- 11 -->\n",
+       "<g id=\"node19\" class=\"node\"><title>11</title>\n",
+       "<polygon fill=\"none\" stroke=\"black\" points=\"1112,-106.143 1112,-159.143 1206,-159.143 1206,-106.143 1112,-106.143\"/>\n",
+       "<text text-anchor=\"middle\" x=\"1123.5\" y=\"-128.943\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"1135,-106.143 1135,-159.143 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"1160.5\" y=\"-143.943\" font-family=\"Times,serif\" font-size=\"14.00\">11 [3]</text>\n",
+       "<text text-anchor=\"middle\" x=\"1160.5\" y=\"-128.943\" font-family=\"Times,serif\" font-size=\"14.00\">output</text>\n",
+       "<text text-anchor=\"middle\" x=\"1160.5\" y=\"-113.943\" font-family=\"Times,serif\" font-size=\"14.00\">s</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"1186,-106.143 1186,-159.143 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"1196\" y=\"-128.943\" font-family=\"Times,serif\" font-size=\"14.00\"> </text>\n",
+       "</g>\n",
+       "<!-- 4&#45;&gt;11 -->\n",
+       "<g id=\"edge12\" class=\"edge\"><title>4:o0&#45;&gt;11:i0</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M1062,-132.643C1080.4,-132.643 1087.38,-132.643 1101.85,-132.643\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"1102,-136.143 1112,-132.643 1102,-129.143 1102,-136.143\"/>\n",
+       "<text text-anchor=\"middle\" x=\"1087\" y=\"-136.443\" font-family=\"Times,serif\" font-size=\"14.00\">11</text>\n",
+       "</g>\n",
+       "</g>\n",
+       "</svg>\n"
+      ],
+      "text/plain": [
+       "<graphviz.graphs.Digraph at 0x7f8e491c2d40>"
+      ]
+     },
+     "execution_count": 39,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adder.dot()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The graph is getting quite big due to all the forks. If we don't need the signal names anymore, we can remove all forks that only connect to one successor node."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "image/svg+xml": [
+       "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
+       "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
+       " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
+       "<!-- Generated by graphviz version 2.30.1 (20201013.1554)\n",
+       " -->\n",
+       "<!-- Title: %3 Pages: 1 -->\n",
+       "<svg width=\"907pt\" height=\"297pt\"\n",
+       " viewBox=\"0.00 0.00 906.77 297.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
+       "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 293)\">\n",
+       "<title>%3</title>\n",
+       "<polygon fill=\"white\" stroke=\"white\" points=\"-4,5 -4,-293 903.765,-293 903.765,5 -4,5\"/>\n",
+       "<!-- 0 -->\n",
+       "<g id=\"node1\" class=\"node\"><title>0</title>\n",
+       "<polygon fill=\"none\" stroke=\"black\" points=\"14.7652,-0.5 14.7652,-53.5 104.765,-53.5 104.765,-0.5 14.7652,-0.5\"/>\n",
+       "<text text-anchor=\"middle\" x=\"26.2652\" y=\"-36.8\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"14.7652,-27.5 37.7652,-27.5 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"26.2652\" y=\"-10.3\" font-family=\"Times,serif\" font-size=\"14.00\">1</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"37.7652,-0.5 37.7652,-53.5 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"59.7652\" y=\"-38.3\" font-family=\"Times,serif\" font-size=\"14.00\">0 [4]</text>\n",
+       "<text text-anchor=\"middle\" x=\"59.7652\" y=\"-23.3\" font-family=\"Times,serif\" font-size=\"14.00\">DFF</text>\n",
+       "<text text-anchor=\"middle\" x=\"59.7652\" y=\"-8.3\" font-family=\"Times,serif\" font-size=\"14.00\">carry</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"81.7652,-0.5 81.7652,-53.5 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"93.2652\" y=\"-23.3\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "</g>\n",
+       "<!-- 6 -->\n",
+       "<g id=\"node5\" class=\"node\"><title>6</title>\n",
+       "<polygon fill=\"none\" stroke=\"black\" points=\"154.765,-10.5 154.765,-63.5 268.765,-63.5 268.765,-10.5 154.765,-10.5\"/>\n",
+       "<text text-anchor=\"middle\" x=\"166.265\" y=\"-33.3\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"177.765,-10.5 177.765,-63.5 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"211.765\" y=\"-48.3\" font-family=\"Times,serif\" font-size=\"14.00\">6</text>\n",
+       "<text text-anchor=\"middle\" x=\"211.765\" y=\"-33.3\" font-family=\"Times,serif\" font-size=\"14.00\">__fork__</text>\n",
+       "<text text-anchor=\"middle\" x=\"211.765\" y=\"-18.3\" font-family=\"Times,serif\" font-size=\"14.00\">adder~CI</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"245.765,-10.5 245.765,-63.5 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"257.265\" y=\"-46.8\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"245.765,-37.5 268.765,-37.5 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"257.265\" y=\"-20.3\" font-family=\"Times,serif\" font-size=\"14.00\">1</text>\n",
+       "</g>\n",
+       "<!-- 0&#45;&gt;6 -->\n",
+       "<g id=\"edge13\" class=\"edge\"><title>0:o0&#45;&gt;6:i0</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M104.765,-27C123.532,-27 129.964,-33.8579 144.491,-36.2153\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"144.528,-39.7282 154.765,-37 145.061,-32.7485 144.528,-39.7282\"/>\n",
+       "<text text-anchor=\"middle\" x=\"129.765\" y=\"-38.8\" font-family=\"Times,serif\" font-size=\"14.00\">12</text>\n",
+       "</g>\n",
+       "<!-- 5 -->\n",
+       "<g id=\"node2\" class=\"node\"><title>5</title>\n",
+       "<polygon fill=\"none\" stroke=\"black\" points=\"16.2652,-90.5 16.2652,-143.5 103.265,-143.5 103.265,-90.5 16.2652,-90.5\"/>\n",
+       "<text text-anchor=\"middle\" x=\"26.2652\" y=\"-113.3\" font-family=\"Times,serif\" font-size=\"14.00\"> </text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"36.2652,-90.5 36.2652,-143.5 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"58.2652\" y=\"-128.3\" font-family=\"Times,serif\" font-size=\"14.00\">5 [0]</text>\n",
+       "<text text-anchor=\"middle\" x=\"58.2652\" y=\"-113.3\" font-family=\"Times,serif\" font-size=\"14.00\">input</text>\n",
+       "<text text-anchor=\"middle\" x=\"58.2652\" y=\"-98.3\" font-family=\"Times,serif\" font-size=\"14.00\">clk</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"80.2652,-90.5 80.2652,-143.5 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"91.7652\" y=\"-113.3\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "</g>\n",
+       "<!-- 5&#45;&gt;0 -->\n",
+       "<g id=\"edge4\" class=\"edge\"><title>5:o0&#45;&gt;0:i1</title>\n",
+       "<path fill=\"none\" stroke=\"black\" stroke-dasharray=\"1,5\" d=\"M3.93752,-16.7734C-9.49505,-27.5832 15.186,-66.9364 38.7652,-80 49.2493,-85.8085 91.7652,-78.0144 91.7652,-90\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"5.09172,-20.0845 13.7652,-14 3.19048,-13.3476 5.09172,-20.0845\"/>\n",
+       "<text text-anchor=\"middle\" x=\"42.2652\" y=\"-68.3\" font-family=\"Times,serif\" font-size=\"14.00\">3</text>\n",
+       "</g>\n",
+       "<!-- 7 -->\n",
+       "<g id=\"node3\" class=\"node\"><title>7</title>\n",
+       "<polygon fill=\"none\" stroke=\"black\" points=\"16.2652,-234.5 16.2652,-287.5 103.265,-287.5 103.265,-234.5 16.2652,-234.5\"/>\n",
+       "<text text-anchor=\"middle\" x=\"26.2652\" y=\"-257.3\" font-family=\"Times,serif\" font-size=\"14.00\"> </text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"36.2652,-234.5 36.2652,-287.5 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"58.2652\" y=\"-272.3\" font-family=\"Times,serif\" font-size=\"14.00\">7 [1]</text>\n",
+       "<text text-anchor=\"middle\" x=\"58.2652\" y=\"-257.3\" font-family=\"Times,serif\" font-size=\"14.00\">input</text>\n",
+       "<text text-anchor=\"middle\" x=\"58.2652\" y=\"-242.3\" font-family=\"Times,serif\" font-size=\"14.00\">a</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"80.2652,-234.5 80.2652,-287.5 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"91.7652\" y=\"-257.3\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "</g>\n",
+       "<!-- 10 -->\n",
+       "<g id=\"node6\" class=\"node\"><title>10</title>\n",
+       "<polygon fill=\"none\" stroke=\"black\" points=\"156.265,-233.5 156.265,-286.5 267.265,-286.5 267.265,-233.5 156.265,-233.5\"/>\n",
+       "<text text-anchor=\"middle\" x=\"167.765\" y=\"-256.3\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"179.265,-233.5 179.265,-286.5 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"211.765\" y=\"-271.3\" font-family=\"Times,serif\" font-size=\"14.00\">10</text>\n",
+       "<text text-anchor=\"middle\" x=\"211.765\" y=\"-256.3\" font-family=\"Times,serif\" font-size=\"14.00\">__fork__</text>\n",
+       "<text text-anchor=\"middle\" x=\"211.765\" y=\"-241.3\" font-family=\"Times,serif\" font-size=\"14.00\">adder~A</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"244.265,-233.5 244.265,-286.5 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"255.765\" y=\"-269.8\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"244.265,-260.5 267.265,-260.5 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"255.765\" y=\"-243.3\" font-family=\"Times,serif\" font-size=\"14.00\">1</text>\n",
+       "</g>\n",
+       "<!-- 7&#45;&gt;10 -->\n",
+       "<g id=\"edge5\" class=\"edge\"><title>7:o0&#45;&gt;10:i0</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M104.765,-261C123.172,-261 130.139,-260.314 144.613,-260.078\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"144.793,-263.577 154.765,-260 144.738,-256.577 144.793,-263.577\"/>\n",
+       "<text text-anchor=\"middle\" x=\"129.765\" y=\"-263.8\" font-family=\"Times,serif\" font-size=\"14.00\">4</text>\n",
+       "</g>\n",
+       "<!-- 9 -->\n",
+       "<g id=\"node4\" class=\"node\"><title>9</title>\n",
+       "<polygon fill=\"none\" stroke=\"black\" points=\"16.2652,-162.5 16.2652,-215.5 103.265,-215.5 103.265,-162.5 16.2652,-162.5\"/>\n",
+       "<text text-anchor=\"middle\" x=\"26.2652\" y=\"-185.3\" font-family=\"Times,serif\" font-size=\"14.00\"> </text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"36.2652,-162.5 36.2652,-215.5 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"58.2652\" y=\"-200.3\" font-family=\"Times,serif\" font-size=\"14.00\">9 [2]</text>\n",
+       "<text text-anchor=\"middle\" x=\"58.2652\" y=\"-185.3\" font-family=\"Times,serif\" font-size=\"14.00\">input</text>\n",
+       "<text text-anchor=\"middle\" x=\"58.2652\" y=\"-170.3\" font-family=\"Times,serif\" font-size=\"14.00\">b</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"80.2652,-162.5 80.2652,-215.5 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"91.7652\" y=\"-185.3\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "</g>\n",
+       "<!-- 8 -->\n",
+       "<g id=\"node7\" class=\"node\"><title>8</title>\n",
+       "<polygon fill=\"none\" stroke=\"black\" points=\"156.265,-153.5 156.265,-206.5 267.265,-206.5 267.265,-153.5 156.265,-153.5\"/>\n",
+       "<text text-anchor=\"middle\" x=\"167.765\" y=\"-176.3\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"179.265,-153.5 179.265,-206.5 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"211.765\" y=\"-191.3\" font-family=\"Times,serif\" font-size=\"14.00\">8</text>\n",
+       "<text text-anchor=\"middle\" x=\"211.765\" y=\"-176.3\" font-family=\"Times,serif\" font-size=\"14.00\">__fork__</text>\n",
+       "<text text-anchor=\"middle\" x=\"211.765\" y=\"-161.3\" font-family=\"Times,serif\" font-size=\"14.00\">adder~B</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"244.265,-153.5 244.265,-206.5 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"255.765\" y=\"-189.8\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"244.265,-180.5 267.265,-180.5 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"255.765\" y=\"-163.3\" font-family=\"Times,serif\" font-size=\"14.00\">1</text>\n",
+       "</g>\n",
+       "<!-- 9&#45;&gt;8 -->\n",
+       "<g id=\"edge6\" class=\"edge\"><title>9:o0&#45;&gt;8:i0</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M104.765,-189C123.464,-189 129.998,-182.828 144.515,-180.706\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"145.029,-184.179 154.765,-180 144.548,-177.196 145.029,-184.179\"/>\n",
+       "<text text-anchor=\"middle\" x=\"129.765\" y=\"-190.8\" font-family=\"Times,serif\" font-size=\"14.00\">5</text>\n",
+       "</g>\n",
+       "<!-- 2 -->\n",
+       "<g id=\"node10\" class=\"node\"><title>2</title>\n",
+       "<polygon fill=\"none\" stroke=\"black\" points=\"658.265,-81.5 658.265,-134.5 755.265,-134.5 755.265,-81.5 658.265,-81.5\"/>\n",
+       "<text text-anchor=\"middle\" x=\"669.765\" y=\"-117.8\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"658.265,-108.5 681.265,-108.5 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"669.765\" y=\"-91.3\" font-family=\"Times,serif\" font-size=\"14.00\">1</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"681.265,-81.5 681.265,-134.5 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"706.765\" y=\"-119.3\" font-family=\"Times,serif\" font-size=\"14.00\">2</text>\n",
+       "<text text-anchor=\"middle\" x=\"706.765\" y=\"-104.3\" font-family=\"Times,serif\" font-size=\"14.00\">XOR2</text>\n",
+       "<text text-anchor=\"middle\" x=\"706.765\" y=\"-89.3\" font-family=\"Times,serif\" font-size=\"14.00\">adder</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"732.265,-81.5 732.265,-134.5 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"743.765\" y=\"-104.3\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "</g>\n",
+       "<!-- 6&#45;&gt;2 -->\n",
+       "<g id=\"edge9\" class=\"edge\"><title>6:o0&#45;&gt;2:i1</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M268.765,-51C309.61,-51 578.963,-89.9948 646.587,-94.5645\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"646.625,-98.0693 656.765,-95 646.924,-91.0757 646.625,-98.0693\"/>\n",
+       "<text text-anchor=\"middle\" x=\"463.765\" y=\"-78.8\" font-family=\"Times,serif\" font-size=\"14.00\">8</text>\n",
+       "</g>\n",
+       "<!-- 1 -->\n",
+       "<g id=\"node11\" class=\"node\"><title>1</title>\n",
+       "<polygon fill=\"none\" stroke=\"black\" points=\"652.765,-156 652.765,-248 760.765,-248 760.765,-156 652.765,-156\"/>\n",
+       "<text text-anchor=\"middle\" x=\"664.265\" y=\"-232.8\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"652.765,-225 675.765,-225 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"664.265\" y=\"-209.8\" font-family=\"Times,serif\" font-size=\"14.00\">1</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"652.765,-202 675.765,-202 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"664.265\" y=\"-186.8\" font-family=\"Times,serif\" font-size=\"14.00\">2</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"652.765,-179 675.765,-179 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"664.265\" y=\"-163.8\" font-family=\"Times,serif\" font-size=\"14.00\">3</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"675.765,-156 675.765,-248 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"706.765\" y=\"-213.3\" font-family=\"Times,serif\" font-size=\"14.00\">1</text>\n",
+       "<text text-anchor=\"middle\" x=\"706.765\" y=\"-198.3\" font-family=\"Times,serif\" font-size=\"14.00\">AO22</text>\n",
+       "<text text-anchor=\"middle\" x=\"706.765\" y=\"-183.3\" font-family=\"Times,serif\" font-size=\"14.00\">adder~S</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"737.765,-156 737.765,-248 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"749.265\" y=\"-198.3\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "</g>\n",
+       "<!-- 6&#45;&gt;1 -->\n",
+       "<g id=\"edge12\" class=\"edge\"><title>6:o1&#45;&gt;1:i1</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M268.765,-24C359.528,-24 386.047,-21.4316 470.765,-54 552.456,-85.4047 580.985,-93.9542 634.765,-163 646.651,-178.26 635.499,-201.984 643.214,-210.67\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"642.17,-214.013 652.765,-214 644.475,-207.403 642.17,-214.013\"/>\n",
+       "<text text-anchor=\"middle\" x=\"463.765\" y=\"-57.8\" font-family=\"Times,serif\" font-size=\"14.00\">11</text>\n",
+       "</g>\n",
+       "<!-- 10&#45;&gt;1 -->\n",
+       "<g id=\"edge7\" class=\"edge\"><title>10:o1&#45;&gt;1:i2</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M268.765,-247C293.226,-247 295.167,-230.437 318.765,-224 459.344,-185.654 501.583,-189.778 642.45,-189.992\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"642.762,-193.492 652.765,-190 642.768,-186.492 642.762,-193.492\"/>\n",
+       "<text text-anchor=\"middle\" x=\"463.765\" y=\"-198.8\" font-family=\"Times,serif\" font-size=\"14.00\">6</text>\n",
+       "</g>\n",
+       "<!-- 4 -->\n",
+       "<g id=\"node8\" class=\"node\"><title>4</title>\n",
+       "<polygon fill=\"none\" stroke=\"black\" points=\"319.265,-233.5 319.265,-286.5 438.265,-286.5 438.265,-233.5 319.265,-233.5\"/>\n",
+       "<text text-anchor=\"middle\" x=\"330.765\" y=\"-269.8\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"319.265,-260.5 342.265,-260.5 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"330.765\" y=\"-243.3\" font-family=\"Times,serif\" font-size=\"14.00\">1</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"342.265,-233.5 342.265,-286.5 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"378.765\" y=\"-271.3\" font-family=\"Times,serif\" font-size=\"14.00\">4</text>\n",
+       "<text text-anchor=\"middle\" x=\"378.765\" y=\"-256.3\" font-family=\"Times,serif\" font-size=\"14.00\">XOR2</text>\n",
+       "<text text-anchor=\"middle\" x=\"378.765\" y=\"-241.3\" font-family=\"Times,serif\" font-size=\"14.00\">adder~AB</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"415.265,-233.5 415.265,-286.5 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"426.765\" y=\"-256.3\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "</g>\n",
+       "<!-- 10&#45;&gt;4 -->\n",
+       "<g id=\"edge15\" class=\"edge\"><title>10:o0&#45;&gt;4:i0</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M268.765,-274C287.168,-274 294.141,-274 308.615,-274\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"308.765,-277.5 318.765,-274 308.765,-270.5 308.765,-277.5\"/>\n",
+       "<text text-anchor=\"middle\" x=\"293.765\" y=\"-277.8\" font-family=\"Times,serif\" font-size=\"14.00\">14</text>\n",
+       "</g>\n",
+       "<!-- 8&#45;&gt;4 -->\n",
+       "<g id=\"edge1\" class=\"edge\"><title>8:o0&#45;&gt;4:i1</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M268.765,-194C297.423,-194 324.007,-198.466 329.662,-222.012\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"326.188,-222.445 330.765,-232 333.146,-221.676 326.188,-222.445\"/>\n",
+       "<text text-anchor=\"middle\" x=\"293.765\" y=\"-200.8\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "</g>\n",
+       "<!-- 8&#45;&gt;1 -->\n",
+       "<g id=\"edge11\" class=\"edge\"><title>8:o1&#45;&gt;1:i3</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M268.765,-167C435.932,-167 480.295,-167 642.425,-167\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"642.765,-170.5 652.765,-167 642.765,-163.5 642.765,-170.5\"/>\n",
+       "<text text-anchor=\"middle\" x=\"463.765\" y=\"-170.8\" font-family=\"Times,serif\" font-size=\"14.00\">10</text>\n",
+       "</g>\n",
+       "<!-- 3 -->\n",
+       "<g id=\"node9\" class=\"node\"><title>3</title>\n",
+       "<polygon fill=\"none\" stroke=\"black\" points=\"489.265,-233.5 489.265,-286.5 608.265,-286.5 608.265,-233.5 489.265,-233.5\"/>\n",
+       "<text text-anchor=\"middle\" x=\"500.765\" y=\"-256.3\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"512.265,-233.5 512.265,-286.5 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"548.765\" y=\"-271.3\" font-family=\"Times,serif\" font-size=\"14.00\">3</text>\n",
+       "<text text-anchor=\"middle\" x=\"548.765\" y=\"-256.3\" font-family=\"Times,serif\" font-size=\"14.00\">__fork__</text>\n",
+       "<text text-anchor=\"middle\" x=\"548.765\" y=\"-241.3\" font-family=\"Times,serif\" font-size=\"14.00\">adder~AB</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"585.265,-233.5 585.265,-286.5 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"596.765\" y=\"-269.8\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"585.265,-260.5 608.265,-260.5 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"596.765\" y=\"-243.3\" font-family=\"Times,serif\" font-size=\"14.00\">1</text>\n",
+       "</g>\n",
+       "<!-- 4&#45;&gt;3 -->\n",
+       "<g id=\"edge14\" class=\"edge\"><title>4:o0&#45;&gt;3:i0</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M438.765,-260C457.168,-260 464.141,-260 478.615,-260\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"478.765,-263.5 488.765,-260 478.765,-256.5 478.765,-263.5\"/>\n",
+       "<text text-anchor=\"middle\" x=\"463.765\" y=\"-263.8\" font-family=\"Times,serif\" font-size=\"14.00\">13</text>\n",
+       "</g>\n",
+       "<!-- 3&#45;&gt;1 -->\n",
+       "<g id=\"edge8\" class=\"edge\"><title>3:o1&#45;&gt;1:i0</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M608.765,-247C617.52,-247 618.277,-241.145 626.765,-239 633.788,-237.225 637.661,-236.888 642.408,-236.884\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"642.727,-240.388 652.765,-237 642.805,-233.388 642.727,-240.388\"/>\n",
+       "<text text-anchor=\"middle\" x=\"630.765\" y=\"-242.8\" font-family=\"Times,serif\" font-size=\"14.00\">7</text>\n",
+       "</g>\n",
+       "<!-- 3&#45;&gt;2 -->\n",
+       "<g id=\"edge10\" class=\"edge\"><title>3:o0&#45;&gt;2:i0</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M608.765,-274C667.987,-274 632.38,-175.662 652.765,-146 654.86,-142.952 658.213,-142.256 661.389,-141.812\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"663.545,-144.577 669.765,-136 659.554,-138.826 663.545,-144.577\"/>\n",
+       "<text text-anchor=\"middle\" x=\"630.765\" y=\"-272.8\" font-family=\"Times,serif\" font-size=\"14.00\">9</text>\n",
+       "</g>\n",
+       "<!-- 2&#45;&gt;0 -->\n",
+       "<g id=\"edge2\" class=\"edge\"><title>2:o0&#45;&gt;0:i0</title>\n",
+       "<path fill=\"none\" stroke=\"black\" stroke-dasharray=\"1,5\" d=\"M743.765,-80C743.765,-41.7752 694.675,-71.0609 657.765,-81 646.427,-84.0531 645.84,-90.0969 634.765,-94 598.655,-106.726 588.052,-108 549.765,-108 210.765,-108 210.765,-108 210.765,-108 206.15,-108 71.6313,-72.4854 34.935,-59.3351\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"36.3018,-56.1099 25.7652,-55 33.3099,-62.4383 36.3018,-56.1099\"/>\n",
+       "<text text-anchor=\"middle\" x=\"378.765\" y=\"-111.8\" font-family=\"Times,serif\" font-size=\"14.00\">1</text>\n",
+       "</g>\n",
+       "<!-- 11 -->\n",
+       "<g id=\"node12\" class=\"node\"><title>11</title>\n",
+       "<polygon fill=\"none\" stroke=\"black\" points=\"804.765,-175.5 804.765,-228.5 898.765,-228.5 898.765,-175.5 804.765,-175.5\"/>\n",
+       "<text text-anchor=\"middle\" x=\"816.265\" y=\"-198.3\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"827.765,-175.5 827.765,-228.5 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"853.265\" y=\"-213.3\" font-family=\"Times,serif\" font-size=\"14.00\">11 [3]</text>\n",
+       "<text text-anchor=\"middle\" x=\"853.265\" y=\"-198.3\" font-family=\"Times,serif\" font-size=\"14.00\">output</text>\n",
+       "<text text-anchor=\"middle\" x=\"853.265\" y=\"-183.3\" font-family=\"Times,serif\" font-size=\"14.00\">s</text>\n",
+       "<polyline fill=\"none\" stroke=\"black\" points=\"878.765,-175.5 878.765,-228.5 \"/>\n",
+       "<text text-anchor=\"middle\" x=\"888.765\" y=\"-198.3\" font-family=\"Times,serif\" font-size=\"14.00\"> </text>\n",
+       "</g>\n",
+       "<!-- 1&#45;&gt;11 -->\n",
+       "<g id=\"edge3\" class=\"edge\"><title>1:o0&#45;&gt;11:i0</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M760.765,-202C776.349,-202 782.618,-202 794.418,-202\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"794.765,-205.5 804.765,-202 794.765,-198.5 794.765,-205.5\"/>\n",
+       "<text text-anchor=\"middle\" x=\"782.765\" y=\"-205.8\" font-family=\"Times,serif\" font-size=\"14.00\">2</text>\n",
+       "</g>\n",
+       "</g>\n",
+       "</svg>\n"
+      ],
+      "text/plain": [
+       "<graphviz.graphs.Digraph at 0x7f8e491c1120>"
+      ]
+     },
+     "execution_count": 40,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adder.eliminate_1to1_forks()\n",
+    "adder.dot()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's resolve the b15 circuit as well."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'__node__': 22819,\n",
+       " '__cell__': 11653,\n",
+       " '__fork__': 11166,\n",
+       " '__io__': 111,\n",
+       " '__line__': 33313,\n",
+       " '__const1__': 1,\n",
+       " '__comb__': 11125,\n",
+       " 'BUF1': 526,\n",
+       " 'INV1': 897,\n",
+       " 'AND2': 1464,\n",
+       " 'OR2': 1095,\n",
+       " 'NOR2': 114,\n",
+       " 'DFF': 417,\n",
+       " '__dff__': 417,\n",
+       " 'NAND2': 6611,\n",
+       " 'output': 71,\n",
+       " 'input': 40,\n",
+       " 'MUX21': 417,\n",
+       " '__latch__': 0,\n",
+       " '__seq__': 417}"
+      ]
+     },
+     "execution_count": 41,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "b15_prim = b15.copy()\n",
+    "b15_prim.resolve_tlib_cells(SAED32)\n",
+    "b15_prim.stats"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The resolved circuit `b15_prim` contains primitive DFF and no scan-cells.\n",
+    "The scan-chain is still present but it now contains ordinary multiplexers and flip-flops."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "---------------\n",
+      "1385:DFF\"InstQueue_reg_0__0_\" <32766 <12707 >32767\n",
+      "1383:DFF\"InstQueue_reg_0__1_\" <32763 <12702 >32764\n",
+      "1387:DFF\"InstQueue_reg_0__2_\" <32769 <12712 >32770\n",
+      "1381:DFF\"InstQueue_reg_0__3_\" <32760 <12697 >32761\n",
+      "1389:DFF\"InstQueue_reg_0__4_\" <32772 <12717 >32773\n",
+      "1379:DFF\"InstQueue_reg_0__5_\" <32757 <12692 >32758\n",
+      "1391:DFF\"InstQueue_reg_0__6_\" <32775 <12722 >32776\n",
+      "1377:DFF\"InstQueue_reg_0__7_\" <32754 <12687 >32755\n",
+      "---------------\n",
+      "1401:DFF\"InstQueue_reg_1__0_\" <32790 <12747 >32791\n",
+      "1399:DFF\"InstQueue_reg_1__1_\" <32787 <12742 >32788\n",
+      "1403:DFF\"InstQueue_reg_1__2_\" <32793 <12752 >32794\n",
+      "1397:DFF\"InstQueue_reg_1__3_\" <32784 <12737 >32785\n",
+      "1405:DFF\"InstQueue_reg_1__4_\" <32796 <12757 >32797\n",
+      "1395:DFF\"InstQueue_reg_1__5_\" <32781 <12732 >32782\n",
+      "1407:DFF\"InstQueue_reg_1__6_\" <32799 <12762 >32800\n",
+      "1393:DFF\"InstQueue_reg_1__7_\" <32778 <12727 >32779\n",
+      "---------------\n",
+      "1417:DFF\"InstQueue_reg_2__0_\" <32814 <12787 >32815\n",
+      "1415:DFF\"InstQueue_reg_2__1_\" <32811 <12782 >32812\n",
+      "1419:DFF\"InstQueue_reg_2__2_\" <32817 <12792 >32818\n",
+      "1413:DFF\"InstQueue_reg_2__3_\" <32808 <12777 >32809\n",
+      "1421:DFF\"InstQueue_reg_2__4_\" <32820 <12797 >32821\n",
+      "1411:DFF\"InstQueue_reg_2__5_\" <32805 <12772 >32806\n",
+      "1423:DFF\"InstQueue_reg_2__6_\" <32823 <12802 >32824\n",
+      "1409:DFF\"InstQueue_reg_2__7_\" <32802 <12767 >32803\n",
+      "---------------\n",
+      "1433:DFF\"InstQueue_reg_3__0_\" <32838 <12827 >32839\n",
+      "1431:DFF\"InstQueue_reg_3__1_\" <32835 <12822 >32836\n",
+      "1435:DFF\"InstQueue_reg_3__2_\" <32841 <12832 >32842\n",
+      "1429:DFF\"InstQueue_reg_3__3_\" <32832 <12817 >32833\n",
+      "1437:DFF\"InstQueue_reg_3__4_\" <32844 <12837 >32845\n",
+      "1427:DFF\"InstQueue_reg_3__5_\" <32829 <12812 >32830\n",
+      "1439:DFF\"InstQueue_reg_3__6_\" <32847 <12842 >32848\n",
+      "1425:DFF\"InstQueue_reg_3__7_\" <32826 <12807 >32827\n",
+      "---------------\n",
+      "1449:DFF\"InstQueue_reg_4__0_\" <32862 <12867 >32863\n",
+      "1447:DFF\"InstQueue_reg_4__1_\" <32859 <12862 >32860\n",
+      "1451:DFF\"InstQueue_reg_4__2_\" <32865 <12872 >32866\n",
+      "1445:DFF\"InstQueue_reg_4__3_\" <32856 <12857 >32857\n",
+      "1453:DFF\"InstQueue_reg_4__4_\" <32868 <12877 >32869\n",
+      "1443:DFF\"InstQueue_reg_4__5_\" <32853 <12852 >32854\n",
+      "1455:DFF\"InstQueue_reg_4__6_\" <32871 <12882 >32872\n",
+      "1441:DFF\"InstQueue_reg_4__7_\" <32850 <12847 >32851\n",
+      "---------------\n",
+      "1465:DFF\"InstQueue_reg_5__0_\" <32886 <12907 >32887\n",
+      "1463:DFF\"InstQueue_reg_5__1_\" <32883 <12902 >32884\n",
+      "1467:DFF\"InstQueue_reg_5__2_\" <32889 <12912 >32890\n",
+      "1461:DFF\"InstQueue_reg_5__3_\" <32880 <12897 >32881\n",
+      "1469:DFF\"InstQueue_reg_5__4_\" <32892 <12917 >32893\n",
+      "1459:DFF\"InstQueue_reg_5__5_\" <32877 <12892 >32878\n",
+      "1471:DFF\"InstQueue_reg_5__6_\" <32895 <12922 >32896\n",
+      "1457:DFF\"InstQueue_reg_5__7_\" <32874 <12887 >32875\n",
+      "---------------\n",
+      "1481:DFF\"InstQueue_reg_6__0_\" <32910 <12947 >32911\n",
+      "1479:DFF\"InstQueue_reg_6__1_\" <32907 <12942 >32908\n",
+      "1483:DFF\"InstQueue_reg_6__2_\" <32913 <12952 >32914\n",
+      "1477:DFF\"InstQueue_reg_6__3_\" <32904 <12937 >32905\n",
+      "1485:DFF\"InstQueue_reg_6__4_\" <32916 <12957 >32917\n",
+      "1475:DFF\"InstQueue_reg_6__5_\" <32901 <12932 >32902\n",
+      "1487:DFF\"InstQueue_reg_6__6_\" <32919 <12962 >32920\n",
+      "1473:DFF\"InstQueue_reg_6__7_\" <32898 <12927 >32899\n",
+      "---------------\n",
+      "1497:DFF\"InstQueue_reg_7__0_\" <32934 <12987 >32935\n",
+      "1495:DFF\"InstQueue_reg_7__1_\" <32931 <12982 >32932\n",
+      "1499:DFF\"InstQueue_reg_7__2_\" <32937 <12992 >32938\n",
+      "1493:DFF\"InstQueue_reg_7__3_\" <32928 <12977 >32929\n",
+      "1501:DFF\"InstQueue_reg_7__4_\" <32940 <12997 >32941\n",
+      "1491:DFF\"InstQueue_reg_7__5_\" <32925 <12972 >32926\n",
+      "1503:DFF\"InstQueue_reg_7__6_\" <32943 <13002 >32944\n",
+      "1489:DFF\"InstQueue_reg_7__7_\" <32922 <12967 >32923\n",
+      "---------------\n",
+      "1513:DFF\"InstQueue_reg_8__0_\" <32958 <13027 >32959\n",
+      "1511:DFF\"InstQueue_reg_8__1_\" <32955 <13022 >32956\n",
+      "1515:DFF\"InstQueue_reg_8__2_\" <32961 <13032 >32963\n",
+      "1509:DFF\"InstQueue_reg_8__3_\" <32952 <13017 >32953\n",
+      "1518:DFF\"InstQueue_reg_8__4_\" <32965 <13037 >32966\n",
+      "1507:DFF\"InstQueue_reg_8__5_\" <32949 <13012 >32950\n",
+      "1520:DFF\"InstQueue_reg_8__6_\" <32968 <13042 >32969\n",
+      "1505:DFF\"InstQueue_reg_8__7_\" <32946 <13007 >32947\n",
+      "---------------\n",
+      "1530:DFF\"InstQueue_reg_9__0_\" <32983 <13067 >32984\n",
+      "1528:DFF\"InstQueue_reg_9__1_\" <32980 <13062 >32981\n",
+      "1532:DFF\"InstQueue_reg_9__2_\" <32986 <13072 >32987\n",
+      "1526:DFF\"InstQueue_reg_9__3_\" <32977 <13057 >32978\n",
+      "1534:DFF\"InstQueue_reg_9__4_\" <32989 <13077 >32990\n",
+      "1524:DFF\"InstQueue_reg_9__5_\" <32974 <13052 >32975\n",
+      "1536:DFF\"InstQueue_reg_9__6_\" <32992 <13082 >32993\n",
+      "1522:DFF\"InstQueue_reg_9__7_\" <32971 <13047 >32972\n",
+      "---------------\n",
+      "1546:DFF\"InstQueue_reg_10__0_\" <33007 <13107 >33008\n",
+      "1544:DFF\"InstQueue_reg_10__1_\" <33004 <13102 >33005\n",
+      "1548:DFF\"InstQueue_reg_10__2_\" <33010 <13112 >33011\n",
+      "1542:DFF\"InstQueue_reg_10__3_\" <33001 <13097 >33002\n",
+      "1550:DFF\"InstQueue_reg_10__4_\" <33013 <13117 >33015\n",
+      "1540:DFF\"InstQueue_reg_10__5_\" <32998 <13092 >32999\n",
+      "1553:DFF\"InstQueue_reg_10__6_\" <33017 <13122 >33018\n",
+      "1538:DFF\"InstQueue_reg_10__7_\" <32995 <13087 >32996\n",
+      "---------------\n",
+      "1563:DFF\"InstQueue_reg_11__0_\" <33032 <13147 >33033\n",
+      "1561:DFF\"InstQueue_reg_11__1_\" <33029 <13142 >33030\n",
+      "1565:DFF\"InstQueue_reg_11__2_\" <33035 <13152 >33036\n",
+      "1559:DFF\"InstQueue_reg_11__3_\" <33026 <13137 >33027\n",
+      "1567:DFF\"InstQueue_reg_11__4_\" <33038 <13157 >33039\n",
+      "1557:DFF\"InstQueue_reg_11__5_\" <33023 <13132 >33024\n",
+      "1569:DFF\"InstQueue_reg_11__6_\" <33041 <13162 >33042\n",
+      "1555:DFF\"InstQueue_reg_11__7_\" <33020 <13127 >33021\n",
+      "---------------\n",
+      "1579:DFF\"InstQueue_reg_12__0_\" <33056 <13187 >33057\n",
+      "1577:DFF\"InstQueue_reg_12__1_\" <33053 <13182 >33054\n",
+      "1581:DFF\"InstQueue_reg_12__2_\" <33059 <13192 >33060\n",
+      "1575:DFF\"InstQueue_reg_12__3_\" <33050 <13177 >33051\n",
+      "1583:DFF\"InstQueue_reg_12__4_\" <33062 <13197 >33063\n",
+      "1573:DFF\"InstQueue_reg_12__5_\" <33047 <13172 >33048\n",
+      "1585:DFF\"InstQueue_reg_12__6_\" <33065 <13202 >33066\n",
+      "1571:DFF\"InstQueue_reg_12__7_\" <33044 <13167 >33045\n",
+      "---------------\n",
+      "1595:DFF\"InstQueue_reg_13__0_\" <33080 <13227 >33081\n",
+      "1593:DFF\"InstQueue_reg_13__1_\" <33077 <13222 >33078\n",
+      "1597:DFF\"InstQueue_reg_13__2_\" <33083 <13232 >33084\n",
+      "1591:DFF\"InstQueue_reg_13__3_\" <33074 <13217 >33075\n",
+      "1599:DFF\"InstQueue_reg_13__4_\" <33086 <13237 >33087\n",
+      "1589:DFF\"InstQueue_reg_13__5_\" <33071 <13212 >33072\n",
+      "1601:DFF\"InstQueue_reg_13__6_\" <33089 <13242 >33090\n",
+      "1587:DFF\"InstQueue_reg_13__7_\" <33068 <13207 >33069\n",
+      "---------------\n",
+      "1611:DFF\"InstQueue_reg_14__0_\" <33104 <13267 >33105\n",
+      "1609:DFF\"InstQueue_reg_14__1_\" <33101 <13262 >33102\n",
+      "1613:DFF\"InstQueue_reg_14__2_\" <33107 <13272 >33108\n",
+      "1607:DFF\"InstQueue_reg_14__3_\" <33098 <13257 >33099\n",
+      "1615:DFF\"InstQueue_reg_14__4_\" <33110 <13277 >33111\n",
+      "1605:DFF\"InstQueue_reg_14__5_\" <33095 <13252 >33096\n",
+      "1617:DFF\"InstQueue_reg_14__6_\" <33113 <13282 >33114\n",
+      "1603:DFF\"InstQueue_reg_14__7_\" <33092 <13247 >33093\n",
+      "---------------\n",
+      "1627:DFF\"InstQueue_reg_15__0_\" <33128 <13307 >33129\n",
+      "1625:DFF\"InstQueue_reg_15__1_\" <33125 <13302 >33126\n",
+      "1629:DFF\"InstQueue_reg_15__2_\" <33131 <13312 >33132\n",
+      "1623:DFF\"InstQueue_reg_15__3_\" <33122 <13297 >33123\n",
+      "1631:DFF\"InstQueue_reg_15__4_\" <33134 <13317 >33135\n",
+      "1621:DFF\"InstQueue_reg_15__5_\" <33119 <13292 >33120\n",
+      "1633:DFF\"InstQueue_reg_15__6_\" <33137 <13322 >33138\n",
+      "1619:DFF\"InstQueue_reg_15__7_\" <33116 <13287 >33117\n"
+     ]
+    }
+   ],
+   "source": [
+    "for l in b15_prim.s_locs('InstQueue_reg'):\n",
+    "    print('---------------')\n",
+    "    for i in l:\n",
+    "        print(b15_prim.s_nodes[i])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Working With Logic Values\n",
+    "\n",
+    "Sequential states of circuits, signals, and test patterns contain logic values.\n",
+    "\n",
+    "KyuPy provides some useful tools to deal with 2-valued, 4-valued, and 8-valued logic data.\n",
+    "\n",
+    "All logic values are stored in numpy arrays of dtype `np.uint8`.\n",
+    "\n",
+    "There are two storage formats:\n",
+    "* `mv` (for \"multi-valued\"): Each logic value is stored as uint8\n",
+    "* `bp` (for \"bit-parallel\"): Groups of 8 logic values are stored as three uint8"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### `mv` Arrays\n",
+    "\n",
+    "Suppose we want to simulate the adder circuit with 2 inputs, 1 output and 1 flip-flop."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[5:input\"clk\" >3,\n",
+       " 7:input\"a\" >4,\n",
+       " 9:input\"b\" >5,\n",
+       " 11:output\"s\" <2,\n",
+       " 0:DFF\"carry\" <1 <3 >12]"
+      ]
+     },
+     "execution_count": 43,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adder.s_nodes"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can construct a set of vectors using the `mvarray` helper function.\n",
+    "\n",
+    "Each vector has 4 elements, one for each io_node and sequential element.\n",
+    "\n",
+    "This would be an exhaustive vector set (the output in `s_nodes` remains unassigned (\"-\")):"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[0, 0, 0, 0, 0, 0, 0, 0],\n",
+       "       [0, 3, 0, 3, 0, 3, 0, 3],\n",
+       "       [0, 0, 3, 3, 0, 0, 3, 3],\n",
+       "       [2, 2, 2, 2, 2, 2, 2, 2],\n",
+       "       [0, 0, 0, 0, 3, 3, 3, 3]], dtype=uint8)"
+      ]
+     },
+     "execution_count": 44,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from kyupy import logic\n",
+    "\n",
+    "inputs = logic.mvarray('000-0', '010-0', '001-0', '011-0', '000-1', '010-1', '001-1', '011-1')\n",
+    "inputs"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The numeric values in this array are defined in `kyupy.logic`.\n",
+    "A logic-0 is stored as `0`, a logic-1 is stored as `3`, and 'unassigned' is stored as `2`.\n",
+    "\n",
+    "The **last** axis is always the number of vectors. It may be unintuitive at first, but it is more convenient for data-parallel simulations.\n",
+    "\n",
+    "The **second-to-last** axis corresponds to `s_nodes`. I.e., the first row is for input 'a', the second row for input 'b', and so on."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 45,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(5, 8)"
+      ]
+     },
+     "execution_count": 45,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "inputs.shape"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Get a string representation of a vector set. Possible values are '0', '1', '-', 'X', 'R', 'F', 'P', and 'N'."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 46,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "000-0\n",
+      "010-0\n",
+      "001-0\n",
+      "011-0\n",
+      "000-1\n",
+      "010-1\n",
+      "001-1\n",
+      "011-1\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(logic.mv_str(inputs))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Load a stuck-at fault test pattern set and expected fault-free responses from a STIL file. It contains 678 test vectors. Use the resolved circuit for arranging the patterns because the DFF positions may have changed by replacing the original technology cells."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 47,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from kyupy import stil\n",
+    "\n",
+    "s = stil.load('../tests/b15_2ig.sa_nf.stil.gz')\n",
+    "stuck_tests = s.tests(b15_prim)\n",
+    "stuck_responses = s.responses(b15_prim)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 48,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "528"
+      ]
+     },
+     "execution_count": 48,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(b15_prim.s_nodes)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 49,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(528, 678)"
+      ]
+     },
+     "execution_count": 49,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "stuck_tests.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 50,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(528, 678)"
+      ]
+     },
+     "execution_count": 50,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "stuck_responses.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 51,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "------------------------------------------------------------------------------------------------------0----00--000110110011010011101101001011010010110100101101001001100110011001100110011001100011001100110011001100110011001110011001100110011001100110011001011101001011010010110100101101001011010010110100101101001011101100110011001101100110000010010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101011100110011001100110011001100110011111100001001011000100\n",
+      "--------------------------------------1100011011011111--------10111110--------------------------------P-00000--0101111000011010-01----110---110-110---110-11001100110-11101010011010-10-1010-1010101010-10-10-1010-10-10-10-101-10101010101010-10-10100-01010101011-01-11101110111011101-101-111011101-111011101-111010101000-10-1010-101010010-10011--0100-100110000111100001-111111-000011100000-101-01000011110-11-001000011111111-0001010100010001000----1----1-----1---11-----------00-101010101010-1010-1010101010101--10-1101010111111111\n",
+      "---------------------------------------1-1--1--1---1--1000100000000011--------------------------------P-10-00--11000001101111011---0--01-1111111111111111--1111110-111011111-111---11--11-1--1--------------------------------000000001---1----1--010-1-1---10010110011111110101001-0-0-0-1-0-1-0-011-0-1-000-1-001-1000101111111111111111111101111001101----1----------1-----------------------------------------------------1-11011100011111111-----111-----1-111-0-1-1----------------01------1------1-1-------------1--011-01011110011101111\n",
+      "------------------------------------------------------------------------------------------------------P111100--11011-0110000-10--0-----0--0-0-1-0-1-0-0-1---1-0-1-------0-01-11---0-0--00-0-00--------------------------------001100110000011000111110011000100--0011-------------1---------------------0-1-----1---1-1-1-1111111111111111101000100--001000011001-----1--111111--1--11--111--011---------------------------------0-------0-0---------------------------------------------00--------------------------------10-110-10010010-01010\n",
+      "--------------------------------------1011011111010000--------00101101--------------------------------P001100--10001010001111100-1-10101-1-1-1-1-1-1-1-1-101-1-1-100-000----0--00--1-0---------------------------------------------1--------------------------110-0--00001-0011001-001-0011001-0-0-010-001-1-0110101-1-1-0-100000000000011--1-11-1110-1----1--------------0--00---00-01---00-0----00-00---11-00--00000001011-0001-00-01---001001--11100---00-00---00-00---------1100-----01-------------0---------------0--11------0101000-10---\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(logic.mv_str(stuck_tests[:,:5]))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 52,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "11001100110011001100110011001100110000--------------------------------01001100110011001100110011001100--------0000110110011010011101101001011010010110100101101001001100110011001100110011001100011001100110011001100110011001110011001100110011001100110011001011101001011010010110100101101001011010010110100101101001011101100110011001101100110000010010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101011100110011001100110011001100110011111100001001011000100\n",
+      "X0101010101010101X0101X01010101010XX11--------------------------------001X01X01X01X0101X01X01X01010101--------X0001010110000010X01XXXX110XXX110X110XXX110X11001100110X11101010011010X10X1010X1010101010X10X10X1010X10X10X10X101X10101010101010X10X10100X01010101110X00X01000100010001000X000X010001000X010001000X010000000000X10X1010X101010010X10001XX0100X100110000111100001X111111X000011100000X101X01000011110X11X001000011111111X00010101000100010001111011011110110110000011001110101X101010101010X1010X10101010101011X1001101010111111111\n",
+      "1X11XX1XXXXXXXXXXXXX1X1XXXXXX1XXXX1X1X--------------------------------0XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX--------101010001101110010011111100000000000000000011000000110000000001000110000000001000XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX000000001XXX1XXXX1XX010X1X1XXX10110110011111110101001X0X0X0X1X0X1X0X011X0X1X000X1X001X1000101000000000000000000011111001010XXXX1XXXXXXXXXX1XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX1X11011100011111111XXXXX111XXXXX1X111X0X1X1XXXXXXXXXXXXXXXX0110000100100010100101001011011001100100101100001110101\n",
+      "0101XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX0X0X--------------------------------0XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX--------011011X11100000000X0XXXXX0XX0X0X1X0X1X0X0X1XXX1X0X1XXXXXXX0X01X11XXX0X0XX00X0X00XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX011100110000011000111110011000100XX0011XXXXXXXXXXXXX1XXXXXXXXXXXXXXXXXXXXX0X1XXXXX1XXX1X1X1X1111111111111111101000100XX001000011001XXXXX1XX111111XX1XX11XX111XX011XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX0XXXXXXX0X0XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX10X1XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX101110010110110X01010\n",
+      "1XXXXX0XXXXXXXXXXXXXXX0XXXXXXXXXXX1XXX--------------------------------0XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX--------100011001101001100X1X10101X1X1X1X1X1X1X1X1X101X1X1X100X000XXXX0XX00XX1X0XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX1XXXXXXXXXXXXXXXXXXXXXXXXXX110X0XX00101X0011001X001X0011001X0X0X010X001X1X0110101X1X1X0X100000000000011XX1X11X1110X1XX1000101111100010X0XX00XXX00X01XXX00X0XXXX00X00XXX11X00XX00000001011X0001X00X01XXX001001XX11100XXX00X00XXX00X00XX01101100011011001011XXXXXXXXXXX0XXXXXXXXXXXXXXX0XX010X1X1X0101000X10XXX\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(logic.mv_str(stuck_responses[:,:5]))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The order of values in the vectors correspond to the circuit's `s_nodes`.\n",
+    "The test data can be used directly in the simulators as they use the same ordering convention.\n",
+    "\n",
+    "`stuck_tests` has values for all primary inputs and scan flip-flops, `stuck_responses` contains the expected values for all primary outputs and scan flip-flops.\n",
+    "\n",
+    "Since this is a static test, only '0', '1' and 'X' are used with the exception of the clock input, which has a positive pulse 'P'.\n",
+    "\n",
+    "A transition fault test is a dynamic test that also contains 'R' for rising transition and 'F' for falling transition:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 53,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "s = stil.load('../tests/b15_2ig.tf_nf.stil.gz')\n",
+    "transition_tests = s.tests_loc(b15_prim)\n",
+    "transition_responses = s.responses(b15_prim)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "STIL files for delay tests usually only contain the initialization patterns.\n",
+    "When loading launch-on-capture transition fault tests, use `.tests_loc()`. This function performs a logic simulation of the launch cycle to calculate the transitions for the delay test itself."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 54,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "------------------------------------------------------------------------------------------------------X----XX--000110110011010011101101001011010010110100101101001001100110011001100110011001100011001100110011001100110011001110011001100110011001100110011001011101001011010010110100101101001011010010110100101101001011101100110011001101100110000010010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101011100110011001100110011001100110011111100001001011000100\n",
+      "--------------------------------------RFRRRRFRRFRRRR1R--------RXXRXRRR--------------------------------00F0F00--RR1RRXRF00011010XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX011X00XXX101X1X1X1X1X1X1X1X1X1X1X1X1X1X1X1X1X1X1X1X1X0XXXXXXX1XXXXXXXXXXXXXXXXXXXXXXXXRRXRXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX00000000000000000111111000001001RFXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX11XXXXX\n",
+      "--------------------------------------1X111XX11X1XX1X11XX11XX111X11X11--------------------------------0-XXR00--F1FF1X0110011X01XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXFXX1XFFX1XFXFXFXFXFXFXFXFXFXFXFXFXFXFXFXFXFXFXFXFXFX0XFXFXFXF11110XX1XX1XXXXXXX1XXXX1XFR1F1XXXXXXXXXXXXXXXXX011101111000101101110111110001001000100010101011011001110111000X1XX11XXXXXXXX11XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX000X011111111111111111111111111111R0F1R0R001RR100FF1XXX\n",
+      "--------------------------------------------------------------XXXXXXXX--------------------------------0--FX00--F11F1XFRR00R01111XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX0XX1XXXXXRX1XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX111111111111111111111110X0XX10XXXXXXXXXRXXXRXRF1FFFRFRXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX1XXXXXXXXXXXXX0XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX11X1XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX011001010010111111111\n",
+      "--------------------------------------------------------------FFXFFFFF--------------------------------0XXRR00--RF01FXRF00001XX1XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX010X010111111111X111111111111111X11100XX0101XXXXXX01XXXX01XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX1X1111X110111111XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX0RXRXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX1XXXXXX\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(logic.mv_str(transition_tests[:,:5]))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 55,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "11001100110011001100110011001100110000--------------------------------01001100110011001100110011001100--------0000110110011010011101101001011010010110100101101001001100110011001100110011001100011001100110011001100110011001110011001100110011001100110011001011101001011010010110100101101001011010010110100101101001011101100110011001101100110000010010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101011100110011001100110011001100110011111100001001011000100\n",
+      "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX1--------------------------------0XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX--------X01001X0110000010XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX011X00XXX000X0X0X0X0X0X0X0X0X0X0X0X0X0X0X0X0X0X0X0X0X1XXXXXXX1XXXXXXXXXXXXXXXXXXXXXXXX11X1XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX1111111111111111100000011111011010X1XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX0X0X0X1XXXXXXX11XXXXX\n",
+      "0100111111111111111111111111111110101X--------------------------------0XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX--------101011X0110011001000X0X0X1X0X0X1X0X0X0X1X1X0X0X1X1X010110100110001XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX0XX1X00X1X0X0X0X0X0X0X0X0X0X0X0X0X0X0X0X0X0X0X0X0X0X0X0X0X0X000001XX1XX1XXXXXXX1XXXX1X01000XXXXXXXXXXXXXXXXX011101111000101101110111110001001000100010101011011001110111000X1XX11XXXXXXXX11XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX0010100000000000000000000000000000111011011000000010XXX\n",
+      "1011XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX01--------------------------------0XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX--------X00101X01100100010XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX0XX1XXXXX1X1XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX000000000000000000000001X0XX11XXXXXXXX0100010101000101XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX1XXXXXXXXXXXXX0XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX11101XXXXXXXXXXXXXXXXXXXXXXXXXXXXX001100000100001110100\n",
+      "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX1--------------------------------0XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX--------X01000X1100101XX1XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX000X0011000000000000000000000000000011XX1111XXXXXX11XXXX11XXXXXXXXXXXXXXXXXXXXXXXXXXXX11X1XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX0100000001000000XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX01X1XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX1X0X0X1XXXXXXXX0XXXXX\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(logic.mv_str(transition_responses[:,:5]))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Initialization patterns and launch patterns can be filtered by providing a call-back function. This can be used to fill in unassigned values."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 56,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "\n",
+    "def zero_fill(mva):\n",
+    "    return np.choose(mva, logic.mvarray('0X01PRFN'))  # maps '0X-1PRFN' -> '0X01PRFN'\n",
+    "\n",
+    "transition_tests_zf = s.tests_loc(b15_prim, init_filter=zero_fill, launch_filter=zero_fill)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000110110011010011101101001011010010110100101101001001100110011001100110011001100011001100110011001100110011001110011001100110011001100110011001011101001011010010110100101101001011010010110100101101001011101100110011001101100110000010010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101011100110011001100110011001100110011111100001001011000100\n",
+      "00000000000000000000000000000000000000RFRRRRFRRFRRRR1R00000000RRRRRRRR0000000000000000000000000000000000F0F0000RR1RR0RF00011010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001100000010101010101010101010101010101010101010101010000000001000000000000000000000000RR0R000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000111111000001001RF0R000000000000000000000000000000R00000R00000001100000\n",
+      "00000000000000000000000000000000000000101110011010010110011001110110110000000000000000000000000000000000R0R0000F1FF1001100110010RR0R0R000R0R000R0R0R00000R0R00000R0R00R0RR00RRR0000000000000000000000000000000000000000000000000000000000000000000000000000000F0010FF010F0F0F0F0F0F0F0F0F0F0F0F0F0F0F0F0F0F0F0F0F000F0F0F0F1111000100100000001000010FR1F1000000000000000000111011110001011011101111100010010001000101010110110011101110000100110000000011000000000000000000000000000000000000011111111111111111111111111111R0F1R0R001RR100FF1000\n",
+      "000000000000000000000000000000000000000000000000000000000000000FF00F0000000000000000000000000000000000000F00000F11F10FRR00R01111000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100000R010000000000000000000000000000000000000000000000000111111111111111111111110000010000000000R000R0RF1FFFRFR000000000000000000000000000000000000000000100000000000000000000000000000000000000000000000000000000000001101000000000000000000000000000000011001010010111111111\n",
+      "00000000000000000000000000000000000000000000000000000000000000FFRFFFFF00000000000000000000000000000000000RR0000RF01F0RF00001001000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000101111111110111111111111111011100000101000000010000010000000000000000000000000000RR0R0000000000000000000000000000000000000000000000000000000000000000000000000101111011011111100000000000000000000000000000000000000000R0R000000000000000000000000000000R0R000R00000001000000\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(logic.mv_str(transition_tests_zf[:,:5]))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### `bp` Arrays\n",
+    "\n",
+    "The logic simulator uses bit-parallel storage of logic values, but our loaded test data uses one `uint8` per logic value.\n",
+    "\n",
+    "Use `mv_to_bp` to convert mv data to the bit-parallel storage layout.\n",
+    "Bit-parallel storage is more compact, but individual values cannot be easily accessed anymore."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 58,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stuck_tests_bp = logic.mv_to_bp(stuck_tests)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 59,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(528, 3, 85)"
+      ]
+     },
+     "execution_count": 59,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "stuck_tests_bp.data.shape"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Instead of 678 bytes per s_node, bit-parallel storage only uses 3*85=255 bytes.\n",
+    "\n",
+    "The reverse operation is `bp_to_mv`. Note that the number of vectors may be rounded up to the next multiple of 8:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 60,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(528, 680)"
+      ]
+     },
+     "execution_count": 60,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "logic.bp_to_mv(stuck_tests_bp).shape"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Logic Simulation\n",
+    "\n",
+    "The following code performs a 8-valued logic simulation on all 678 vectors for one clock cycle."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 61,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from kyupy.logic_sim import LogicSim\n",
+    "\n",
+    "sim = LogicSim(b15_prim, sims=stuck_tests.shape[-1])  # 678 simulations in parallel\n",
+    "sim.s[0] = stuck_tests_bp\n",
+    "sim.s_to_c()\n",
+    "sim.c_prop()\n",
+    "sim.c_to_s()\n",
+    "sim_responses = logic.bp_to_mv(sim.s[1])[...,:stuck_tests.shape[-1]]  # trim from 680 -> 678"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 62,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "11001100110011001100110011001100110000--------------------------------01001100110011001100110011001100--------010X110XXX011010011101101001011010010110100101101001001100110011001100110011001100011001100110011001100110011001110011001100110011001100110011001011101001011010010110100101101001011010010110100101101001011101100110011001101100110000010010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101011100110011001100110011001100110011111100001001011000100\n",
+      "-0101010101010101-0101-01010101010--11--------------------------------001-01-01X01-0101-01-01-01010101---------0001010110000010X01XXXX110XXX110X110XXX110X11001100110X11101010011010X10X1010X1010101010X10X10X1010X10X10X10X101X10101010101010X10X10100X01010101110X00X01000100010001000X000X010001000X010001000X010000000000X10X1010X101010010X10001XX0100X100110000111100001X111111X000011100000X101X01000011110X11X001000011111111X00010101000100010001111011011110110110000011001110101X101010101010X1010X10101010101011X1001101010111111111\n",
+      "1-11--1-------------1-1------1----1-1---------------------------------0--------X------------------------------101010001101110010011111100000000000000000011000000110000000001000110000000001000XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX000000001XXX1XXXX1XX010X1X1XXX10110110011111110101001X0X0X0X1X0X1X0X011X0X1X000X1X001X1000101000000000000000000011111001010XXXX1XXXXXXXXXX1XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX1X11011100011111111XXXXX111XXXXX1X111X0X1X1XXXXXXXXXXXXXXXX0110000100100010100101001011011001100100101100001110101\n",
+      "0101------------------------------0-0---------------------------------0--------X------------------------------011011X11100000000X0XXXXX0XX0X0X1X0X1X0X0X1XXX1X0X1XXXXXXX0X01X11XXX0X0XX00X0X00XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX011100110000011000111110011000100XX0011XXXXXXXXXXXXX1XXXXXXXXXXXXXXXXXXXXX0X1XXXXX1XXX1X1X1X1111111111111111101000100XX001000011001XXXXX1XX111111XX1XX11XX111XX011XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX0XXXXXXX0X0XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX10X1XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX101110010110110X01010\n",
+      "1-----0---------------0-----------1-----------------------------------0--------X------------------------------100011001101001100X1X10101X1X1X1X1X1X1X1X1X101X1X1X100X000XXXX0XX00XX1X0XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX1XXXXXXXXXXXXXXXXXXXXXXXXXX110X0XX00101X0011001X001X0011001X0X0X010X001X1X0110101X1X1X0X100000000000011XX1X11X1110X1XX1000101111100010X0XX00XXX00X01XXX00X0XXXX00X00XXX11X00XX00000001011X0001X00X01XXX001001XX11100XXX00X00XXX00X00XX01101100011011001011XXXXXXXXXXX0XXXXXXXXXXXXXXX0XX010X1X1X0101000X10XXX\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(logic.mv_str(sim_responses[:,:5]))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Compare simulation results to expected fault-free responses loaded from STIL.\n",
+    "\n",
+    "The first test fails, because it is a flush test while simulation implicitly assumes a standard test with a capture clock.\n",
+    "\n",
+    "The remaining 677 responses should be compatible.\n",
+    "\n",
+    "The following checks for compatibility (unknown/unassigned values in STIL always match)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 63,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "677"
+      ]
+     },
+     "execution_count": 63,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "np.sum(np.min((sim_responses == stuck_responses) | \n",
+    "              (stuck_responses == logic.UNASSIGNED) | \n",
+    "              (stuck_responses == logic.UNKNOWN), axis=0))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Same simulation for the transition-fault test set:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 64,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sim = LogicSim(b15_prim, sims=transition_tests_zf.shape[-1])  # 1147 simulations in parallel\n",
+    "sim.s[0] = logic.mv_to_bp(transition_tests_zf)\n",
+    "sim.s_to_c()\n",
+    "sim.c_prop()\n",
+    "sim.c_to_s()\n",
+    "sim_responses = logic.bp_to_mv(sim.s[1])[...,:transition_tests_zf.shape[-1]]  # trim to 1147"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 65,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "11001100110011001100110011001100110000--------------------------------01001100110011001100110011001100--------0100110001011010011101101001011010010110100101101001001100110011001100110011001100011001100110011001100110011001110011001100110011001100110011001011101001011010010110100101101001011010010110100101101001011101100110011001101100110000010010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101011100110011001100110011001100110011111100001001011000100\n",
+      "0000000000000000000000000000000000000R--------------------------------00000000000000000000000000000000--------0FNFFNPFRR0PFF01000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000NNP0PP00F0FPFPFPFPFPFPFPFPFPFPFPFPFPFPFPFPF0F0F0F0FPRP0P0P0PN000000000000000000000000NNPNP000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000RRRRRRRRRRRRRRRRRFFFFFFRRRRRFRRFNPPNP00000000000000000000000000000FPP000NP0000P0NN00000\n",
+      "01001111111111111111111111111111101010--------------------------------00000000000000000000000000000000--------1PNPRN0PNN0011001PFF0F0F0R0F0F0R0F0F0F0R0R0F0F0R0R0FRFRRFRFFRRFFFR000000000000000PP0PP0PPP00PP00P00000000000000000000000000000000000000000000000P00N0PP0N0P0P0P0P0P0P0P0P0P0P0P0P0P0P0P0P0P0P0P0P0P000P0P0P0PFFFFR00N00N0000000N0000N0PNFPF00000000000000000011101111000101101110111110001001000100010101011011001110111000010011000000001100000000000000000000000000000000PPRPRFFFFFFFFFFFFFFFFFFFFFFFFFFFFFNRRFNRFRRFFFFP0PRF000\n",
+      "10110000000000000000000000000000000001--------------------------------00000000000000000000000000000000--------0PFNP1PPNNPPNPFFNF000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000PPPNP00PPNPNPPP0P0P0P0P0P0P0P0P0P0P0P0P0P0P000000000000000000FFFFFFFFFFFFFFFFFFFFFFFR0PPRNR000000000N000N0NPNPPPNPN0P000P0P0P000P0P0P000P0P0P000P0P0P000P0P0PN00P0P0000000000000000000000000000000000000000000000000000000011RFR000000000000000000000000000000F1R0F0F0RFPFFNNNFNFF\n",
+      "0000000000000000000000000000000000000R--------------------------------00000000000000000000000000000000--------0FRPFPPNRP0RPN00N00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000F00PFRNFFFFFFFFPFFFFFFFFFFFFFFFPFFFRRP0RNRN000000RN0000RN000PP0PPPPPPP0P0000000000000NN0N0000000000000000000000000000000000000000000000000000000000000000000000000FRFFFFPFFRFFFFFFPPPPPPPPPPPPPPPP0000000000000000000000000N0N000000000000000000000000000000N0F000N00000PPF00PP00\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(logic.mv_str(sim_responses[:,:5]))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The simulator responses contain 'R' for rising transition, 'F' for falling transition, 'P' for possible positive pulse(s) (010) and 'N' for possible negative pulse(s) (101).\n",
+    "\n",
+    "We need to map each of these cases to the final logic values before we can compare:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 66,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "11001100110011001100110011001100110000--------------------------------01001100110011001100110011001100--------0100110001011010011101101001011010010110100101101001001100110011001100110011001100011001100110011001100110011001110011001100110011001100110011001011101001011010010110100101101001011010010110100101101001011101100110011001101100110000010010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101011100110011001100110011001100110011111100001001011000100\n",
+      "00000000000000000000000000000000000001--------------------------------00000000000000000000000000000000--------0010010011000001000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000110000000000000000000000000000000000000000000000000010000000100000000000000000000000011010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000111111111111111110000001111101101001000000000000000000000000000000000000100000001100000\n",
+      "01001111111111111111111111111111101010--------------------------------00000000000000000000000000000000--------1010110011001100100000000100000100000001010000010100101101001100010000000000000000000000000000000000000000000000000000000000000000000000000000000001000010000000000000000000000000000000000000000000000000000000010010010000000100001001000000000000000000000111011110001011011101111100010010001000101010110110011101110000100110000000011000000000000000000000000000000000010100000000000000000000000000000111011011000000010000\n",
+      "10110000000000000000000000000000000001--------------------------------00000000000000000000000000000000--------0001010011001000100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000001010000000000000000000000000000000000000000000000000000000000000000000000001000111000000000100010101000101000000000000000000000000000000000000000000100000000000000000000000000000000000000000000000000000000000001110100000000000000000000000000000001100000100001110100\n",
+      "00000000000000000000000000000000000001--------------------------------00000000000000000000000000000000--------0010000110010100100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001100000000000000000000000000001100111100000011000011000000000000000000000000000011010000000000000000000000000000000000000000000000000000000000000000000000000010000000100000000000000000000000000000000000000000000000101000000000000000000000000000000100000100000000000000\n"
+     ]
+    }
+   ],
+   "source": [
+    "sim_responses_final = np.choose(sim_responses, logic.mvarray('0X-10101'))  # maps '0X-1PRFN' -> '0X-10101'\n",
+    "print(logic.mv_str(sim_responses_final[:,:5]))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Again, first test is a flush test, so we expect 1146 matches.\n",
+    "\n",
+    "We simulated zero-filled patterns and therefore have more specified output bits.\n",
+    "\n",
+    "The following checks for compatability (unknown/unassigned values in STIL always match)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 67,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "1146"
+      ]
+     },
+     "execution_count": 67,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "np.sum(np.min((sim_responses_final == transition_responses) | \n",
+    "              (transition_responses == logic.UNASSIGNED) | \n",
+    "              (transition_responses == logic.UNKNOWN), axis=0))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Working With Delay Information and Timing Simulation"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Delay data for gates and interconnect can be loaded from SDF files. In kyupy's timing simulators, delays are associated with the lines between nodes, not with the nodes themselves.\n",
+    "Each line in the circuit has 4 delays for the IOPATH of the reading node;\n",
+    "one for each combination rising/falling edges at the input and output of that node.\n",
+    "\n",
+    "These contents of the SDF file is matched by node names to the original (non-resolved) circuit `b15`. Resolving library cells does not change the line indices, so the resulting array is compatible with `b15_prim`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 68,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from kyupy import sdf\n",
+    "\n",
+    "df = sdf.load('../tests/b15_2ig.sdf.gz')\n",
+    "delays = df.iopaths(b15, tlib=SAED32)[0]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The returned delay information is an `ndarray` with a set of delay values for each line in the circuit."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 69,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(32032, 2, 2)"
+      ]
+     },
+     "execution_count": 69,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "delays.shape"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Number of non-0 values loaded:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 70,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "79010"
+      ]
+     },
+     "execution_count": 70,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "(delays != 0).sum()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The available timing simulators are `WaveSim` and `WaveSimCuda`.\n",
+    "They work similarly to `LogicSim` in that they evaluate all cells in topological order.\n",
+    "Instead of propagating a logic value, however, they propagate waveforms.\n",
+    "\n",
+    "`WaveSim` uses the numba just-in-time compiler for acceleration on CPU.\n",
+    "It falls back to pure python if numba is not available. `WaveSimCuda` uses numba for GPU acceleration.\n",
+    "If no CUDA card is available, it will fall back to pure python (not jit-compiled for CPU!).\n",
+    "Pure python is too slow for most purposes.\n",
+    "\n",
+    "Both simulators operate data-parallel.\n",
+    "The following instanciates a new engine for 32 independent timing simulations and each signal line in the circuit can carry at most 16 transitions. All simulators share the same circuit and the same line delay specification."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 71,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from kyupy.wave_sim import WaveSimCuda, TMAX\n",
+    "\n",
+    "wsim = WaveSimCuda(b15_prim, delays, sims=32, c_caps=16)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "These are various memories allocated, with waveforms usually being the largest. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 72,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Waveforms              : 66856.0 kiB\n",
+      "State Allocation Table : 134.3 kiB\n",
+      "Circuit Timing         : 1074.1 kiB\n",
+      "Circuit Netlist        : 1171.2 kiB\n",
+      "Sequential State       : 726.0 kiB\n"
+     ]
+    }
+   ],
+   "source": [
+    "def print_mem(name, arr):\n",
+    "    print(f'{name}: {arr.nbytes / 1024:.1f} kiB')\n",
+    "    \n",
+    "print_mem('Waveforms              ', wsim.c)\n",
+    "print_mem('State Allocation Table ', wsim.c_locs)\n",
+    "print_mem('Circuit Timing         ', wsim.delays)\n",
+    "print_mem('Circuit Netlist        ', wsim.ops)\n",
+    "print_mem('Sequential State       ', wsim.s)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This is a typical simulation loop where the number of patterns is larger than the number of simulators available.\n",
+    "We simulate `transition_tests_zf`.\n",
+    "The initial values, transition times and final values are loaded into `wsim.s` and the following three calls will update this array with simulation results. We collect all results in `wsim_results`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 73,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from kyupy import batchrange\n",
+    "import numpy as np\n",
+    "\n",
+    "sims = 128  # transition_tests_zf.shape[-1]  # Feel free to simulate all tests if CUDA is set up correctly.\n",
+    "\n",
+    "wsim_results = np.zeros((11, wsim.s_len, sims))  # space to store all simulation results\n",
+    "\n",
+    "for offset, size in batchrange(sims, wsim.sims):\n",
+    "    wsim.s[0] = (transition_tests_zf[:,offset:offset+size] >> 1) & 1  # initial value (bit 1)\n",
+    "    wsim.s[1] = 0.0 # transition time\n",
+    "    wsim.s[2] = transition_tests_zf[:,offset:offset+size] & 1  # final value (bit 0)\n",
+    "    wsim.s_to_c()\n",
+    "    wsim.c_prop(sims=size)\n",
+    "    wsim.c_to_s(time=1.5)  # capture at time 1.5\n",
+    "    wsim_results[:,:,offset:offset+size] = wsim.s[:,:,:size]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The arrays `wsim.s` and `wsim_results` contain various information for each PI, PO, and scan flip-flop (axis 1), and each test (axis 2):\n",
+    "* ``s[0]`` (P)PI initial value\n",
+    "* ``s[1]`` (P)PI transition time\n",
+    "* ``s[2]`` (P)PI final value\n",
+    "* ``s[3]`` (P)PO initial value\n",
+    "* ``s[4]`` (P)PO earliest arrival time (EAT): The time at which the output transitioned from its initial value.\n",
+    "* ``s[5]`` (P)PO latest stabilization time (LST): The time at which the output settled to its final value.\n",
+    "* ``s[6]`` (P)PO final value\n",
+    "* ``s[7]`` (P)PO capture value: probability of capturing a 1 at a given capture time\n",
+    "* ``s[8]`` (P)PO sampled capture value: decided by random sampling according to a given seed.\n",
+    "* ``s[9]`` (P)PO sampled capture slack: (capture time - LST) - decided by random sampling according to a given seed.\n",
+    "* ``s[10]`` Overflow indicator: If non-zero, some signals in the input cone of this output had more\n",
+    "          transitions than specified in ``c_caps``. Some transitions have been discarded, the\n",
+    "          final values in the waveforms are still valid."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 74,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(11, 528, 128)"
+      ]
+     },
+     "execution_count": 74,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "wsim_results.shape"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "For validating against known logic values, convert the samples capture values `wsim_results[8]` into an mvarray like this:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 75,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "110011001100110011001100110011001100000000000000000000000000000000000001001100110011001100110011001100000000000100110001011010011101101001011010010110100101101001001100110011001100110011001100011001100110011001100110011001110011001100110011001100110011001011101001011010010110100101101001011010010110100101101001011101100110011001101100110000010010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101011100110011001100110011001100110011111100001001011000100\n",
+      "000000000000000000000000000000000000010000000000000000000000000000000000000000000000000000000000000000000000000010010011000001000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000110000000000000000000000000000000000000000000000000010000000100000000000000000000000011010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000111111111111111110000001111101101001000000000000000000000000000000000000100000001100000\n",
+      "010011111111111111111111111111111010100000000000000000000000000000000000000000000000000000000000000000000000001010110011001100100000000100000100000001010000010100101101001100010000000000000000000000000000000000000000000000000000000000000000000000000000000001000010000000000000000000000000000000000000000000000000000000010010010000000100001001000000000000000000000111011110001011011101111100010010001000101010110110011101110000100110000000011000000000000000000000000000000000010100000000000000000000000000000111011011000000010000\n",
+      "101100000000000000000000000000000000010000000000000000000000000000000000000000000000000000000000000000000000000001010011001000100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000001010000000000000000000000000000000000000000000000000000000000000000000000001000111000000000100010101000101000000000000000000000000000000000000000000100000000000000000000000000000000000000000000000000000000000001110100000000000000000000000000000001100000100001110100\n",
+      "000000000000000000000000000000000000010000000000000000000000000000000000000000000000000000000000000000000000000010000110010100100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001100000000000000000000000000001100111100000011000011000000000000000000000000000011010000000000000000000000000000000000000000000000000000000000000000000000000010000000100000000000000000000000000000000000000000000000101000000000000000000000000000000100000100000000000000\n"
+     ]
+    }
+   ],
+   "source": [
+    "wsim_responses_final = ((wsim_results[8] > 0.5) * logic.ONE).astype(np.uint8)\n",
+    "print(logic.mv_str(wsim_responses_final[:,:5]))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We expect 127 matches here."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 76,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "127"
+      ]
+     },
+     "execution_count": 76,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "np.sum(np.min((wsim_responses_final == transition_responses[:,:sims]) | \n",
+    "              (transition_responses[:,:sims] == logic.UNASSIGNED) | \n",
+    "              (transition_responses[:,:sims] == logic.UNKNOWN), axis=0))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The circuit delay is the maximum among all latest stabilization times:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 77,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "1.0424000024795532"
+      ]
+     },
+     "execution_count": 77,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "wsim_results[5].max()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Check for overflows. If too many of them occur, increase `c_caps` during engine instanciation:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 78,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.0"
+      ]
+     },
+     "execution_count": 78,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "wsim_results[10].sum()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Check for capture failures by comparing the samples PPO capture value with the final PPO value:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 79,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0"
+      ]
+     },
+     "execution_count": 79,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "(wsim_results[6] != wsim_results[8]).sum()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.9"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/setup.py b/setup.py
index 0f443ff..47ef1f7 100644
--- a/setup.py
+++ b/setup.py
@@ -14,7 +14,7 @@ setup(
     url='https://github.com/s-holst/kyupy',
     author='Stefan Holst',
     author_email='mail@s-holst.de',
-    python_requires='>=3.6',
+    python_requires='>=3.8',
     install_requires=[
         'numpy>=1.17.0',
         'lark-parser>=0.8.0'
@@ -33,9 +33,8 @@ setup(
         'Operating System :: OS Independent',
         'Programming Language :: Python :: 3',
         'Programming Language :: Python :: 3 :: Only',
-        'Programming Language :: Python :: 3.6',
-        'Programming Language :: Python :: 3.7',
         'Programming Language :: Python :: 3.8',
         'Programming Language :: Python :: 3.9',
+        'Programming Language :: Python :: 3.10',
     ],
 )
diff --git a/src/kyupy/__init__.py b/src/kyupy/__init__.py
index 07b15e0..123bd03 100644
--- a/src/kyupy/__init__.py
+++ b/src/kyupy/__init__.py
@@ -1,11 +1,12 @@
-"""A package for processing and analysis of non-hierarchical gate-level VLSI designs.
+"""The kyupy package itself contains a miscellaneous utility functions.
 
-The kyupy package itself contains a logger and other simple utility functions.
 In addition, it defines a ``numba`` and a ``cuda`` objects that point to the actual packages
 if they are available and otherwise point to mocks.
 """
 
 import time
+import sys
+from collections import defaultdict
 import importlib.util
 import gzip
 
@@ -15,15 +16,19 @@ import numpy as np
 _pop_count_lut = np.asarray([bin(x).count('1') for x in range(256)])
 
 
+def cdiv(x, y):
+    return -(x // -y)
+
+
 def popcount(a):
-    """Returns the number of 1-bits in a given packed numpy array."""
+    """Returns the number of 1-bits in a given packed numpy array of type ``uint8``."""
     return np.sum(_pop_count_lut[a])
 
 
 def readtext(file):
     """Reads and returns the text in a given file. Transparently decompresses \\*.gz files."""
     if hasattr(file, 'read'):
-        return file.read()
+        return file.read().decode()
     if str(file).endswith('.gz'):
         with gzip.open(file, 'rt') as f:
             return f.read()
@@ -74,6 +79,39 @@ def hr_time(seconds):
     return s
 
 
+def batchrange(nitems, maxsize):
+    """A simple generator that produces offsets and sizes for batch-loops."""
+    for offset in range(0, nitems, maxsize):
+        yield offset, min(nitems-offset, maxsize)
+
+
+class Timer:
+    def __init__(self, s=0): self.s = s
+    def __enter__(self): self.start_time = time.perf_counter(); return self
+    def __exit__(self, *args): self.s += time.perf_counter() - self.start_time
+    @property
+    def ms(self): return self.s*1e3
+    @property
+    def us(self): return self.s*1e6
+    def __repr__(self): return f'{self.s:.3f}'
+    def __add__(self, t):
+        return Timer(self.s + t.s)
+
+
+class Timers:
+    def __init__(self, t={}): self.timers = defaultdict(Timer) | t
+    def __getitem__(self, name): return self.timers[name]
+    def __repr__(self): return '{' + ', '.join([f'{k}: {v}' for k, v in self.timers.items()]) + '}'
+    def __add__(self, t):
+        tmr = Timers(self.timers)
+        for k, v in t.timers.items(): tmr.timers[k] += v
+        return tmr
+    def sum(self):
+        return sum([v.s for v in self.timers.values()])
+    def dict(self):
+        return dict([(k, v.s) for k, v in self.timers.items()])
+
+
 class Log:
     """A very simple logger that formats the messages with the number of seconds since
     program start.
@@ -81,25 +119,58 @@ class Log:
 
     def __init__(self):
         self.start = time.perf_counter()
-        self.logfile = None
+        self.logfile = sys.stdout
         """When set to a file handle, log messages are written to it instead to standard output.
-        After each write, ``flush()`` is called as well.
         """
+        self.indent = 0
+        self._limit = -1
+        self.filtered = 0
+
+    def limit(self, log_limit):
+        class Limiter:
+            def __init__(self, l): self.l = l
+            def __enter__(self): self.l.start_limit(log_limit); return self
+            def __exit__(self, *args): self.l.stop_limit()
+        return Limiter(self)
+
+    def start_limit(self, limit):
+        self.filtered = 0
+        self._limit = limit
+
+    def stop_limit(self):
+        if self.filtered > 0:
+            log.info(f'{self.filtered} more messages (filtered).')
+            self.filtered = 0
+        self._limit = -1
 
     def __getstate__(self):
         return {'elapsed': time.perf_counter() - self.start}
 
     def __setstate__(self, state):
-        self.logfile = None
+        self.logfile = sys.stdout
+        self.indent = 0
         self.start = time.perf_counter() - state['elapsed']
 
+    def write(self, s, indent=0):
+        self.logfile.write(' '*indent + s + '\n')
+        self.logfile.flush()
+
+    def li(self, item): self.write('- ' + str(item).replace('\n', '\n'+' '*(self.indent+1)), self.indent)
+    def lib(self): self.write('-', self.indent); self.indent += 1
+    def lin(self): self.write('-', self.indent-1)
+    def di(self, key, value): self.write(str(key) + ': ' + str(value).replace('\n', '\n'+' '*(self.indent+1)), self.indent)
+    def dib(self, key): self.write(str(key) + ':', self.indent); self.indent += 1
+    def din(self, key): self.write(str(key) + ':', self.indent-1)
+    def ie(self, n=1): self.indent -= n
+
     def log(self, level, message):
+        if self._limit == 0:
+            self.filtered += 1
+            return
         t = time.perf_counter() - self.start
-        if self.logfile is None:
-            print(f'{t:011.3f} {level} {message}')
-        else:
-            self.logfile.write(f'{t:011.3f} {level} {message}\n')
-            self.logfile.flush()
+        self.logfile.write(f'# {t:011.3f} {level} {message}\n')
+        self.logfile.flush()
+        self._limit -= 1
 
     def info(self, message):
         """Log an informational message."""
@@ -156,7 +227,7 @@ class MockCuda:
         self.x = 0
         self.y = 0
 
-    def jit(self, device=False):
+    def jit(self, func=None, device=False):
         _ = device  # silence "not used" warning
         outer = self
 
@@ -184,7 +255,7 @@ class MockCuda:
                     return inner
             return Launcher(func)
 
-        return make_launcher
+        return make_launcher(func) if func else make_launcher
 
     @staticmethod
     def to_device(array, to=None):
@@ -208,6 +279,8 @@ if importlib.util.find_spec('numba') is not None:
     try:
         list(numba.cuda.gpus)
         from numba import cuda
+        from numba.core import config
+        config.CUDA_LOW_OCCUPANCY_WARNINGS = False
     except CudaSupportError:
         log.warn('Cuda unavailable. Falling back to pure Python.')
         cuda = MockCuda()
diff --git a/src/kyupy/bench.py b/src/kyupy/bench.py
index 21310d5..aeec5a2 100644
--- a/src/kyupy/bench.py
+++ b/src/kyupy/bench.py
@@ -21,9 +21,9 @@ class BenchTransformer(Transformer):
 
     def start(self, _): return self.c
 
-    def parameters(self, args): return [self.c.get_or_add_fork(name) for name in args]
+    def parameters(self, args): return [self.c.get_or_add_fork(str(name)) for name in args]
 
-    def interface(self, args): self.c.interface.extend(args[0])
+    def interface(self, args): self.c.io_nodes.extend(args[0])
 
     def assignment(self, args):
         name, cell_type, drivers = args
@@ -57,8 +57,8 @@ def parse(text, name=None):
 def load(file, name=None):
     """Parses the contents of ``file`` as ISCAS89 bench code.
 
-    :param file: The file to be loaded.
-    :param name: The name of the circuit. If none given, the file name is used as circuit name.
+    :param file: The file to be loaded. Files with `.gz`-suffix are decompressed on-the-fly.
+    :param name: The name of the circuit. If None, the file name is used as circuit name.
     :return: A :class:`Circuit` object.
     """
     return parse(readtext(file), name=name or str(file))
diff --git a/src/kyupy/circuit.py b/src/kyupy/circuit.py
index 53b2e51..46c9f38 100644
--- a/src/kyupy/circuit.py
+++ b/src/kyupy/circuit.py
@@ -1,11 +1,19 @@
-"""Data structures for representing non-hierarchical gate-level circuits.
+"""Core module for handling non-hierarchical gate-level circuits.
 
 The class :class:`Circuit` is a container of nodes connected by lines.
 A node is an instance of class :class:`Node`,
 and a line is an instance of class :class:`Line`.
+
+The data structures are designed to work together nicely with numpy arrays.
+For example, all the nodes and connections in the circuit graph have consecutive integer indices that can be used to access ndarrays with associated data.
+Circuit graphs also define an ordering of inputs, outputs and other nodes to easily process test vector data and alike.
+
 """
 
-from collections import deque
+from collections import deque, defaultdict
+import re
+
+import numpy as np
 
 
 class GrowingList(list):
@@ -64,9 +72,9 @@ class Node:
         self.index = len(circuit.nodes) - 1
         """A unique and consecutive integer index of the node within the circuit.
 
-        It can be used to store additional data about the node :code:`n`
+        It can be used to associate additional data to a node :code:`n`
         by allocating an array or list :code:`my_data` of length :code:`len(n.circuit.nodes)` and
-        accessing it by :code:`my_data[n.index]`.
+        accessing it by :code:`my_data[n.index]` or simply by :code:`my_data[n]`.
         """
         self.ins = GrowingList()
         """A list of input connections (:class:`Line` objects).
@@ -81,7 +89,9 @@ class Node:
     def __repr__(self):
         ins = ' '.join([f'<{line.index}' if line is not None else '<None' for line in self.ins])
         outs = ' '.join([f'>{line.index}' if line is not None else '>None' for line in self.outs])
-        return f'{self.index}:{self.kind}"{self.name}" {ins} {outs}'
+        ins = ' ' + ins if len(ins) else ''
+        outs = ' ' + outs if len(outs) else ''
+        return f'{self.index}:{self.kind}"{self.name}"{ins}{outs}'
 
     def remove(self):
         """Removes the node from its circuit.
@@ -135,7 +145,7 @@ class Line:
 
         It can be used to store additional data about the line :code:`l`
         by allocating an array or list :code:`my_data` of length :code:`len(l.circuit.lines)` and
-        accessing it by :code:`my_data[l.index]`.
+        accessing it by :code:`my_data[l.index]` or simply by :code:`my_data[l]`.
         """
         if not isinstance(driver, tuple): driver = (driver, driver.outs.free_index())
         self.driver = driver[0]
@@ -144,7 +154,7 @@ class Line:
         self.driver_pin = driver[1]
         """The output pin position of the driver node this line is connected to.
 
-        This is the position in the outs-list of the driving node this line referenced from:
+        This is the position in the list :py:attr:`Node.outs` of the driving node this line referenced from:
         :code:`self.driver.outs[self.driver_pin] == self`.
         """
         if not isinstance(reader, tuple): reader = (reader, reader.ins.free_index())
@@ -154,7 +164,7 @@ class Line:
         self.reader_pin = reader[1]
         """The input pin position of the reader node this line is connected to.
 
-        This is the position in the ins-list of the reader node this line referenced from:
+        This is the position in the list :py:attr:`Node.ins` of the reader node this line referenced from:
         :code:`self.reader.ins[self.reader_pin] == self`.
         """
         self.driver.outs[self.driver_pin] = self
@@ -166,7 +176,11 @@ class Line:
         To keep the indices consecutive, the line with the highest index within the circuit
         will be assigned the index of the removed line.
         """
-        if self.driver is not None: self.driver.outs[self.driver_pin] = None
+        if self.driver is not None:
+            self.driver.outs[self.driver_pin] = None
+            if self.driver.kind == '__fork__':  # squeeze outputs
+                del self.driver.outs[self.driver_pin]
+                for i, l in enumerate(self.driver.outs): l.driver_pin = i
         if self.reader is not None: self.reader.ins[self.reader_pin] = None
         if self.circuit is not None: del self.circuit.lines[self.index]
         self.driver = None
@@ -202,41 +216,237 @@ class Circuit:
     to enforce consecutiveness.
 
     A subset of nodes can be designated as primary input- or output-ports of the circuit.
-    This is done by adding them to the :py:attr:`interface` list.
+    This is done by adding them to the :py:attr:`io_nodes` list.
     """
     def __init__(self, name=None):
         self.name = name
         """The name of the circuit.
         """
-        self.nodes = IndexList()
+        self.nodes : list[Node] = IndexList()
         """A list of all :class:`Node` objects contained in the circuit.
 
         The position of a node in this list equals its index :code:`self.nodes[42].index == 42`.
+        This list must not be changed directly.
+        Use the :class:`Node` constructor and :py:attr:`Node.remove()` to add and remove nodes.
         """
-        self.lines = IndexList()
+        self.lines : list[Line] = IndexList()
         """A list of all :class:`Line` objects contained in the circuit.
 
         The position of a line in this list equals its index :code:`self.lines[42].index == 42`.
+        This list must not be changed directly.
+        Use the :class:`Line` constructor and :py:attr:`Line.remove()` to add and remove lines.
         """
-        self.interface = GrowingList()
+        self.io_nodes : list[Node] = GrowingList()
         """A list of nodes that are designated as primary input- or output-ports.
 
-        Port-nodes are contained in :py:attr:`nodes` as well as :py:attr:`interface`.
-        The position of a node in the interface list corresponds to positions of logic values in test vectors.
+        Port-nodes are contained in :py:attr:`nodes` as well as :py:attr:`io_nodes`.
+        The position of a node in the io_nodes list corresponds to positions of logic values in test vectors.
         The port direction is not stored explicitly.
-        Usually, nodes in the interface list without any lines in their :py:attr:`Node.ins` list are primary inputs,
-        and nodes without any lines in their :py:attr:`Node.outs` list are regarded as primary outputs.
+        Usually, nodes in the io_nodes list without any lines in their :py:attr:`Node.ins` list are primary inputs,
+        and all other nodes in the io_nodes list are regarded as primary outputs.
         """
-        self.cells = {}
+        self.cells : dict[str, Node] = {}
         """A dictionary to access cells by name.
+
+        This dictionary must not be changed directly.
+        Use the :class:`Node` constructor and :py:attr:`Node.remove()` to add and remove nodes.
         """
-        self.forks = {}
+        self.forks : dict[str, Node] = {}
         """A dictionary to access forks by name.
+
+        This dictionary must not be changed directly.
+        Use the :class:`Node` constructor and :py:attr:`Node.remove()` to add and remove nodes.
         """
 
+    @property
+    def s_nodes(self):
+        """A list of all primary I/Os as well as all flip-flops and latches in the circuit (in that order).
+
+        The s_nodes list defines the order of all ports and all sequential elements in the circuit.
+        This list is constructed on-the-fly. If used in some inner toop, consider caching the list for better performance.
+        """
+        return list(self.io_nodes) + [n for n in self.nodes if 'dff' in n.kind.lower()] + [n for n in self.nodes if 'latch' in n.kind.lower()]
+
+    def io_locs(self, prefix):
+        """Returns the indices of primary I/Os that start with given name prefix.
+
+        The returned values are used to index into the :py:attr:`io_nodes` array.
+        If only one I/O cell matches the given prefix, a single integer is returned.
+        If a bus matches the given prefix, a sorted list of indices is returned.
+        Busses are identified by integers in the cell names following the given prefix.
+        Lists for bus indices are sorted from LSB (e.g. :code:`data[0]`) to MSB (e.g. :code:`data[31]`).
+        If a prefix matches multiple different signals or busses, alphanumerically sorted
+        lists of lists are returned. Therefore, higher-dimensional busses
+        (e.g. :code:`data0[0], data0[1], ...`, :code:`data1[0], data1[1], ...`) are supported as well.
+        """
+        return self._locs(prefix, list(self.io_nodes))
+
+    def s_locs(self, prefix):
+        """Returns the indices of I/Os and sequential elements that start with given name prefix.
+
+        The returned values are used to index into the :py:attr:`s_nodes` list.
+        It works the same as :py:attr:`io_locs`. See there for more details.
+        """
+        return self._locs(prefix, self.s_nodes)
+
+    def _locs(self, prefix, nodes):
+        d_top = dict()
+        for i, n in enumerate(nodes):
+            if m := re.match(fr'({prefix}.*?)((?:[\d_\[\]])*$)', n.name):
+                path = [m[1]] + [int(v) for v in re.split(r'[_\[\]]+', m[2]) if len(v) > 0]
+                d = d_top
+                for j in path[:-1]:
+                    d[j] = d.get(j, dict())
+                    d = d[j]
+                d[path[-1]] = i
+
+        # sort recursively for multi-dimensional lists.
+        def sorted_values(d): return [sorted_values(v) for k, v in sorted(d.items())] if isinstance(d, dict) else d
+        l = sorted_values(d_top)
+        while isinstance(l, list) and len(l) == 1: l = l[0]
+        return None if isinstance(l, list) and len(l) == 0 else l
+
+    @property
+    def stats(self):
+        """A dictionary with the counts of all different elements in the circuit.
+
+        The dictionary contains the number of all different kinds of nodes, the number
+        of lines, as well various sums like number of combinational gates, number of
+        primary I/Os, number of sequential elements, and so on.
+
+        The count of regular cells use their :py:attr:`Node.kind` as key, other statistics use
+        dunder-keys like: `__comb__`, `__io__`, `__seq__`, and so on.
+        """
+        stats = defaultdict(int)
+        stats['__node__'] = len(self.nodes)
+        stats['__cell__'] = len(self.cells)
+        stats['__fork__'] = len(self.forks)
+        stats['__io__'] = len(self.io_nodes)
+        stats['__line__'] = len(self.lines)
+        for n in self.cells.values():
+            stats[n.kind] += 1
+            if 'dff' in n.kind.lower(): stats['__dff__'] += 1
+            elif 'latch' in n.kind.lower(): stats['__latch__'] += 1
+            elif 'put' not in n.kind.lower(): stats['__comb__'] += 1 # no input or output
+        stats['__seq__'] = stats['__dff__'] + stats['__latch__']
+        return dict(stats)
+
     def get_or_add_fork(self, name):
         return self.forks[name] if name in self.forks else Node(self, name)
 
+    def remove_dangling_nodes(self, root_node:Node):
+        if len([l for l in root_node.outs if l is not None]) > 0: return
+        lines = [l for l in root_node.ins if l is not None]
+        drivers = [l.driver for l in lines]
+        root_node.remove()
+        for l in lines:
+            l.remove()
+        for d in drivers:
+            self.remove_dangling_nodes(d)
+
+    def eliminate_1to1_forks(self):
+        """Removes all forks that drive only one node.
+
+        Such forks are inserted by parsers to annotate signal names. If this
+        information is not needed, such forks can be removed and the two neighbors
+        can be connected directly using one line. Forks that drive more than one node
+        are not removed by this function.
+
+        This function may remove some nodes and some lines from the circuit.
+        Therefore that indices of other nodes and lines may change to keep the indices consecutive.
+        It may therefore invalidate external data for nodes and lines.
+        """
+        ios = set(self.io_nodes)
+        for n in list(self.forks.values()):
+            if n in ios: continue
+            if len(n.outs) != 1: continue
+            in_line = n.ins[0]
+            out_line = n.outs[0]
+            out_reader = out_line.reader
+            out_reader_pin = out_line.reader_pin
+            n.remove()
+            out_line.remove()
+            in_line.reader = out_reader
+            in_line.reader_pin = out_reader_pin
+            in_line.reader.ins[in_line.reader_pin] = in_line
+
+    def substitute(self, node, impl):
+        """Replaces a given node with the given implementation circuit.
+
+        The given node will be removed, the implementation is copied in and
+        the signal lines are connected appropriately. The number and arrangement
+        of the input and output ports must match the pins of the replaced node.
+
+        This function tries to preserve node and line indices as much as possible.
+        Usually, it only adds additional nodes and lines, preserving the order of
+        all existing nodes and lines. If an implementation is empty, however, nodes
+        and lines may get removed, changing indices and invalidating external data.
+        """
+        ios = set(impl.io_nodes)
+        impl_in_nodes = [n for n in impl.io_nodes if len(n.ins) == 0]
+        impl_out_lines = [n.ins[0] for n in impl.io_nodes if len(n.ins) > 0]
+        designated_cell = None
+        if len(impl_out_lines) > 0:
+            n = impl_out_lines[0].driver
+            while n.kind == '__fork__' and n not in ios:
+                n = n.ins[0].driver
+            designated_cell = n
+        node_in_lines = list(node.ins) + [None] * (len(impl_in_nodes)-len(node.ins))
+        node_out_lines = list(node.outs) + [None] * (len(impl_out_lines)-len(node.outs))
+        assert len(node_in_lines) == len(impl_in_nodes)
+        assert len(node_out_lines) == len(impl_out_lines)
+        node_map = dict()
+        if designated_cell is not None:
+            node.kind = designated_cell.kind
+            node_map[designated_cell] = node
+            node.ins = GrowingList()
+            node.outs = GrowingList()
+        else:
+            node.remove()
+        ios = set(impl.io_nodes)
+        for n in impl.nodes:  # add all nodes to main circuit
+            if n not in ios:
+                if n != designated_cell:
+                    node_map[n] = Node(self, f'{node.name}~{n.name}', n.kind)
+            elif len(n.outs) > 0 and len(n.ins) > 0:  # output is also read by impl. circuit, need to add a fork.
+                node_map[n] = Node(self, f'{node.name}~{n.name}')
+            elif len(n.ins) == 0 and len(n.outs) > 1:  # input is read by multiple nodes, need to add fork.
+                node_map[n] = Node(self, f'{node.name}~{n.name}')
+        for l in impl.lines:  # add all internal lines to main circuit
+            if l.reader in node_map and l.driver in node_map:
+                Line(self, (node_map[l.driver], l.driver_pin), (node_map[l.reader], l.reader_pin))
+        for inn, ll in zip(impl_in_nodes, node_in_lines):  # connect inputs
+            if ll is None: continue
+            if len(inn.outs) == 1:
+                l = inn.outs[0]
+                ll.reader = node_map[l.reader]
+                ll.reader_pin = l.reader_pin
+            else:
+                ll.reader = node_map[inn]  # connect to existing fork
+                ll.reader_pin = 0
+            ll.reader.ins[ll.reader_pin] = ll
+        for l, ll in zip(impl_out_lines, node_out_lines):  # connect outputs
+            if ll is None:
+                if l.driver in node_map:
+                    self.remove_dangling_nodes(node_map[l.driver])
+                continue
+            if len(l.reader.outs) > 0:  # output is also read by impl. circuit, connect to fork.
+                ll.driver = node_map[l.reader]
+                ll.driver_pin = len(l.reader.outs)
+            else:
+                ll.driver = node_map[l.driver]
+                ll.driver_pin = l.driver_pin
+            ll.driver.outs[ll.driver_pin] = ll
+
+    def resolve_tlib_cells(self, tlib):
+        """Substitute all technology library cells with kyupy native simulation primitives.
+
+        See :py:attr:`substitute()` for more detail.
+        """
+        for n in list(self.nodes):
+            if n.kind in tlib.cells:
+                self.substitute(n, tlib.cells[n.kind][0])
+
     def copy(self):
         """Returns a deep copy of the circuit.
         """
@@ -247,69 +457,71 @@ class Circuit:
             d = c.forks[line.driver.name] if line.driver.kind == '__fork__' else c.cells[line.driver.name]
             r = c.forks[line.reader.name] if line.reader.kind == '__fork__' else c.cells[line.reader.name]
             Line(c, (d, line.driver_pin), (r, line.reader_pin))
-        for node in self.interface:
+        for node in self.io_nodes:
             if node.kind == '__fork__':
                 n = c.forks[node.name]
             else:
                 n = c.cells[node.name]
-            c.interface.append(n)
+            c.io_nodes.append(n)
         return c
 
     def __getstate__(self):
         nodes = [(node.name, node.kind) for node in self.nodes]
         lines = [(line.driver.index, line.driver_pin, line.reader.index, line.reader_pin) for line in self.lines]
-        interface = [n.index for n in self.interface]
+        io_nodes = [n.index for n in self.io_nodes]
         return {'name': self.name,
                 'nodes': nodes,
                 'lines': lines,
-                'interface': interface }
+                'io_nodes': io_nodes }
 
     def __setstate__(self, state):
         self.name = state['name']
         self.nodes = IndexList()
         self.lines = IndexList()
-        self.interface = GrowingList()
+        self.io_nodes = GrowingList()
         self.cells = {}
         self.forks = {}
         for s in state['nodes']:
             Node(self, *s)
         for driver, driver_pin, reader, reader_pin in state['lines']:
             Line(self, (self.nodes[driver], driver_pin), (self.nodes[reader], reader_pin))
-        for n in state['interface']:
-            self.interface.append(self.nodes[n])
+        for n in state['io_nodes']:
+            self.io_nodes.append(self.nodes[n])
 
     def __eq__(self, other):
-        return self.nodes == other.nodes and self.lines == other.lines and self.interface == other.interface
-
-    def dump(self):
-        """Returns a string representation of the circuit and all its nodes.
-        """
-        header = f'{self.name}({",".join([str(n.index) for n in self.interface])})\n'
-        return header + '\n'.join([str(n) for n in self.nodes])
+        return self.nodes == other.nodes and self.lines == other.lines and self.io_nodes == other.io_nodes
 
     def __repr__(self):
-        name = f' {self.name}' if self.name else ''
-        return f'<Circuit{name} cells={len(self.cells)} forks={len(self.forks)} ' + \
-               f'lines={len(self.lines)} ports={len(self.interface)}>'
+        return f'{{name: "{self.name}", cells: {len(self.cells)}, forks: {len(self.forks)}, lines: {len(self.lines)}, io_nodes: {len(self.io_nodes)}}}'
 
     def topological_order(self):
         """Generator function to iterate over all nodes in topological order.
 
-        Nodes without input lines and nodes whose :py:attr:`Node.kind` contains the substring 'DFF' are
-        yielded first.
+        Nodes without input lines and nodes whose :py:attr:`Node.kind` contains the
+        substrings 'dff' or 'latch' are yielded first.
         """
-        visit_count = [0] * len(self.nodes)
-        queue = deque(n for n in self.nodes if len(n.ins) == 0 or 'dff' in n.kind.lower())
+        visit_count = np.zeros(len(self.nodes), dtype=np.uint32)
+        queue = deque(n for n in self.nodes if len(n.ins) == 0 or 'dff' in n.kind.lower() or 'latch' in n.kind.lower())
         while len(queue) > 0:
             n = queue.popleft()
             for line in n.outs:
                 if line is None: continue
                 succ = line.reader
                 visit_count[succ] += 1
-                if visit_count[succ] == len(succ.ins) and 'dff' not in succ.kind.lower():
+                if visit_count[succ] == len(succ.ins) and 'dff' not in succ.kind.lower() and 'latch' not in succ.kind.lower():
                     queue.append(succ)
             yield n
 
+    def topological_order_with_level(self):
+        level = np.zeros(len(self.nodes), dtype=np.int32) - 1
+        for n in self.topological_order():
+            if len(n.ins) == 0 or 'dff' in n.kind.lower() or 'latch' in n.kind.lower():
+                l = 0
+            else:
+                l = level[[l.driver.index for l in n.ins if l is not None]].max() + 1
+            level[n] = l
+            yield n, l
+
     def topological_line_order(self):
         """Generator function to iterate over all lines in topological order.
         """
@@ -321,17 +533,17 @@ class Circuit:
     def reversed_topological_order(self):
         """Generator function to iterate over all nodes in reversed topological order.
 
-        Nodes without output lines and nodes whose :py:attr:`Node.kind` contains the substring 'DFF' are
-        yielded first.
+        Nodes without output lines and nodes whose :py:attr:`Node.kind` contains the
+        substrings 'dff' or 'latch' are yielded first.
         """
         visit_count = [0] * len(self.nodes)
-        queue = deque(n for n in self.nodes if len(n.outs) == 0 or 'dff' in n.kind.lower())
+        queue = deque(n for n in self.nodes if len(n.outs) == 0 or 'dff' in n.kind.lower() or 'latch' in n.kind.lower())
         while len(queue) > 0:
             n = queue.popleft()
             for line in n.ins:
                 pred = line.driver
                 visit_count[pred] += 1
-                if visit_count[pred] == len(pred.outs) and 'dff' not in pred.kind.lower():
+                if visit_count[pred] == len(pred.outs) and 'dff' not in pred.kind.lower() and 'latch' not in pred.kind.lower():
                     queue.append(pred)
             yield n
 
@@ -371,3 +583,33 @@ class Circuit:
                 queue.extend(preds)
                 region.append(n)
             yield stem, region
+
+    def dot(self, format='svg'):
+        from graphviz import Digraph
+        dot = Digraph(format=format, graph_attr={'rankdir': 'LR', 'splines': 'true'})
+
+        s_dict = dict((n, i) for i, n in enumerate(self.s_nodes))
+        node_level = np.zeros(len(self.nodes), dtype=np.uint32)
+        level_nodes = defaultdict(list)
+        for n, lv in self.topological_order_with_level():
+            level_nodes[lv].append(n)
+            node_level[n] = lv
+
+        for lv in level_nodes:
+            with dot.subgraph() as s:
+                s.attr(rank='same')
+                for n in level_nodes[lv]:
+                    ins = '|'.join([f'<i{i}>{i}' for i in range(len(n.ins))])
+                    outs = '|'.join([f'<o{i}>{i}' for i in range(len(n.outs))])
+                    io = f' [{s_dict[n]}]' if n in s_dict else ''
+                    s.node(name=str(n.index), label = f'{{{{{ins}}}|{n.index}{io}\n{n.kind}\n{n.name}|{{{outs}}}}}', shape='record')
+
+        for l in self.lines:
+            driver, reader = f'{l.driver.index}:o{l.driver_pin}', f'{l.reader.index}:i{l.reader_pin}'
+            if node_level[l.driver] >= node_level[l.reader]:
+                dot.edge(driver, reader, style='dotted', label=str(l.index))
+                pass
+            else:
+                dot.edge(driver, reader, label=str(l.index))
+
+        return dot
diff --git a/src/kyupy/def_file.py b/src/kyupy/def_file.py
new file mode 100644
index 0000000..5275d74
--- /dev/null
+++ b/src/kyupy/def_file.py
@@ -0,0 +1,297 @@
+"""A simple and incomplete parser for the Design Exchange Format (DEF).
+
+This parser extracts information on components and nets from DEF files and make them available
+as an intermediate representation (:class:`DefFile` object).
+"""
+
+from collections import defaultdict
+
+from lark import Lark, Transformer, Tree
+
+from kyupy import readtext
+
+
+class DefNet:
+    def __init__(self, name):
+        self.name = name
+        self.pins = []
+
+    @property
+    def wires(self):
+        ww = defaultdict(list)
+        [ww[dw.layer].append((int(dw.width), dw.wire_points)) for dw in self.routed if len(dw.wire_points) > 0]
+        return ww
+
+    @property
+    def vias(self):
+        vv = defaultdict(list)
+        [vv[vtype].extend(locs) for dw in self.routed for vtype, locs in dw.vias.items()]
+        return vv
+
+
+class DefWire:
+    def __init__(self):
+        self.layer = None
+        self.width = None
+        self.points = []
+
+    @property
+    def wire_points(self):
+        start = [self.points[0]]
+        rest = [p for p in self.points[1:] if not isinstance(p[0], str)]  # skip over vias
+        return start + rest if len(rest) > 0 else []
+
+    @property
+    def vias(self):
+        vv = defaultdict(list)
+        loc = self.points[0]
+        for p in self.points[1:]:
+            if not isinstance(p[0], str):  # new location
+                loc = (loc[0] if p[0] is None else p[0], loc[1] if p[1] is None else p[1])  # if None, keep previous value
+                continue
+            vtype, param = p
+            if isinstance(param, tuple):  # expand "DO x BY y STEP xs ys"
+                x_cnt, y_cnt, x_sp, y_sp = param
+                [vv[vtype].append((loc[0] + x*x_sp, loc[1] + y*y_sp, 'N')) for x in range(x_cnt) for y in range(y_cnt)]
+            else:
+                vv[vtype].append((loc[0], loc[1], param or 'N'))
+        return vv
+
+    def __repr__(self):
+        return f'<DefWire {self.layer} {self.width} {self.points}>'
+
+
+class DefVia:
+    def __init__(self, name):
+        self.name = name
+        self.rowcol = [1, 1]
+        self.cutspacing = [0, 0]
+
+
+class DefPin:
+    def __init__(self, name):
+        self.name = name
+        self.points = []
+
+
+class DefFile:
+    """Intermediate representation of a DEF file."""
+    def __init__(self):
+        self.rows = []
+        self.tracks = []
+        self.units = []
+        self.vias = {}
+        self.components = {}
+        self.pins = {}
+        self.specialnets = {}
+        self.nets = {}
+
+
+class DefTransformer(Transformer):
+    def __init__(self): self.def_file = DefFile()
+    def start(self, args): return self.def_file
+    def design(self, args): self.def_file.design = args[0].value
+    def point(self, args): return tuple(int(arg.value) if arg != '*' else None for arg in args)
+    def do_step(self, args): return tuple(map(int, args))
+    def spnet_wires(self, args): return args[0].lower(), args[1:]
+    def net_wires(self, args): return args[0].lower(), args[1:]
+    def sppoints(self, args): return args
+    def points(self, args): return args
+    def net_pin(self, args): return '__pin__', (args[0].value, args[1].value)
+    def net_opt(self, args): return args[0].lower(), args[1].value
+
+    def file_stmt(self, args):
+        value = args[1].value
+        value = value[1:-1] if value[0] == '"' else value
+        setattr(self.def_file, args[0].lower(), value)
+
+    def design_stmt(self, args):
+        stmt = args[0].lower()
+        if stmt == 'units': self.def_file.units.append((args[1].value, args[2].value, int(args[3])))
+        elif stmt == 'diearea': self.def_file.diearea = args[1:]
+        elif stmt == 'row':
+            self.def_file.rows.append((args[1].value,  # rowName
+                                       args[2].value,  # siteName
+                                       (int(args[3]), int(args[4])),  # origin x/y
+                                       args[5].value,  # orientation
+                                       max(args[6][0], args[6][1]),  # number of sites
+                                       max(args[6][2], args[6][3])  # site width
+                                      ))
+        elif stmt == 'tracks':
+            self.def_file.tracks.append((args[1].value,  # orientation
+                                         int(args[2]),  # start
+                                         int(args[3]),  # number of tracks
+                                         int(args[4]),  # spacing
+                                         args[5].value  # layer
+                                        ))
+
+    def vias_stmt(self, args):
+        via = DefVia(args[0].value)
+        [setattr(via, opt, val) for opt, val in args[1:]]
+        self.def_file.vias[via.name] = via
+
+    def vias_opt(self, args):
+        opt = args[0].lower()
+        if opt in ['viarule', 'pattern']: val = args[1].value
+        elif opt in ['layers']: val = [arg.value for arg in args[1:]]
+        else: val = [int(arg) for arg in args[1:]]
+        return opt, val
+
+    def comp_stmt(self, args):
+        name = args[0].value
+        kind = args[1].value
+        point = args[2]
+        orientation = args[3].value
+        self.def_file.components[name] = (kind, point, orientation)
+
+    def pins_stmt(self, args):
+        pin = DefPin(args[0].value)
+        [pin.points.append(val) if opt == 'placed' else setattr(pin, opt, val) for opt, val in args[1:]]
+        self.def_file.pins[pin.name] = pin
+
+    def pins_opt(self, args):
+        opt = args[0].lower()
+        if opt in ['net', 'direction', 'use']: val = args[1].value
+        elif opt in ['layer']: val = [args[1].value] + args[2:]
+        elif opt in ['placed']: val = (args[1][0], args[1][1], args[2].value)
+        else: val = []
+        return opt, val
+
+    def spnets_stmt(self, args):
+        dnet = DefNet(args[0].value)
+        for arg in args[1:]:
+            if arg[0] == '__pin__': dnet.pins.append(arg[1])
+            else: setattr(dnet, arg[0], arg[1])
+        self.def_file.specialnets[dnet.name] = dnet
+
+    def nets_stmt(self, args):
+        dnet = DefNet(args[0].value)
+        for arg in args[1:]:
+            if arg[0] == '__pin__': dnet.pins.append(arg[1])
+            else: setattr(dnet, arg[0], arg[1])
+        self.def_file.nets[dnet.name] = dnet
+
+    def spwire(self, args):
+        wire = DefWire()
+        wire.layer = args[0].value
+        wire.width = args[1].value
+        wire.points = args[-1]
+        return wire
+
+    def wire(self, args):
+        wire = DefWire()
+        wire.layer = args[0].value
+        wire.points = args[-1]
+        return wire
+
+    def sppoints_via(self, args):
+        if len(args) == 1: return args[0].value, None
+        else: return args[0].value, args[1]
+
+    def points_via(self, args):
+        if len(args) == 1: return args[0].value, 'N'
+        else: return args[0].value, args[1].value.strip()
+
+
+GRAMMAR = r"""
+    start: /#[^\n]*/? file_stmt*
+
+    ?file_stmt: /VERSION/ ID ";"
+              | /DIVIDERCHAR/ STRING ";"
+              | /BUSBITCHARS/ STRING ";"
+              | design
+
+    design: "DESIGN" ID ";" design_stmt* "END" "DESIGN"
+
+    ?design_stmt: /UNITS/ ID ID NUMBER ";"
+                | /DIEAREA/ point+ ";"
+                | /ROW/ ID ID NUMBER NUMBER ID do_step ";"
+                | /TRACKS/ /[XY]/ NUMBER "DO" NUMBER "STEP" NUMBER "LAYER" ID ";"
+                | propdef | vias | nondef | comp | pins | pinprop | spnets | nets
+
+    propdef: "PROPERTYDEFINITIONS" propdef_stmt* "END" "PROPERTYDEFINITIONS"
+    propdef_stmt: /COMPONENTPIN/ ID ID ";"
+
+    vias: "VIAS" NUMBER ";" vias_stmt* "END" "VIAS"
+    vias_stmt: "-" ID vias_opt* ";"
+    vias_opt: "+" /VIARULE/ ID
+            | "+" /CUTSIZE/ NUMBER NUMBER
+            | "+" /LAYERS/ ID ID ID
+            | "+" /CUTSPACING/ NUMBER NUMBER
+            | "+" /ENCLOSURE/ NUMBER NUMBER NUMBER NUMBER
+            | "+" /ROWCOL/ NUMBER NUMBER
+            | "+" /PATTERN/ ID
+
+    nondef: "NONDEFAULTRULES" NUMBER ";" nondef_stmt+ "END" "NONDEFAULTRULES"
+    nondef_stmt: "-" ID ( "+" /HARDSPACING/
+                        | "+" /LAYER/ ID "WIDTH" NUMBER "SPACING" NUMBER
+                        | "+" /VIA/ ID )* ";"
+
+    comp: "COMPONENTS" NUMBER ";" comp_stmt* "END" "COMPONENTS"
+    comp_stmt: "-" ID ID "+" "PLACED" point ID ";"
+
+    pins: "PINS" NUMBER ";" pins_stmt* "END" "PINS"
+    pins_stmt: "-" ID pins_opt* ";"
+    pins_opt: "+" /NET/ ID
+            | "+" /SPECIAL/
+            | "+" /DIRECTION/ ID
+            | "+" /USE/ ID
+            | "+" /PORT/
+            | "+" /LAYER/ ID point point
+            | "+" /PLACED/ point ID
+
+    pinprop: "PINPROPERTIES" NUMBER ";" pinprop_stmt* "END" "PINPROPERTIES"
+    pinprop_stmt: "-" "PIN" ID "+" "PROPERTY" ID STRING ";"
+
+    spnets: "SPECIALNETS" NUMBER ";" spnets_stmt* "END" "SPECIALNETS"
+    spnets_stmt: "-" ID ( net_pin | net_opt | spnet_wires )* ";"
+
+    spnet_wires: "+" ( /COVER/ | /FIXED/ | /ROUTED/ ) spwire ( "NEW" spwire )*
+
+    spwire: ID NUMBER spwire_opt* sppoints
+    spwire_opt: "+" /SHAPE/ ID
+              | "+" /STYLE/ ID
+
+    sppoints: point ( point | sppoints_via )+
+    sppoints_via: ID do_step?
+
+    nets: "NETS" NUMBER ";" nets_stmt* "END" "NETS"
+    nets_stmt: "-" ID ( net_pin | net_opt | net_wires )* ";"
+
+    net_pin: "(" ID ID ")"
+    net_opt: "+" /USE/ ID
+           | "+" /NONDEFAULTRULE/ ID
+    net_wires: "+" ( /COVER/ | /FIXED/ | /ROUTED/ | /NOSHIELD/ ) wire ( "NEW" wire )*
+
+    wire: ID wire_opt points
+    wire_opt: ( "TAPER" | "TAPERRULE" ID )? ("STYLE" ID)?
+
+    points: point ( point | points_via )+
+    points_via: ID ORIENTATION?
+
+    point: "(" (NUMBER|/\*/) (NUMBER|/\*/) NUMBER? ")"
+
+    do_step: "DO" NUMBER "BY" NUMBER "STEP" (NUMBER|SIGNED_NUMBER) (NUMBER|SIGNED_NUMBER)
+
+    ORIENTATION.2: /F?[NWES]/ WS
+    ID: /[^ \t\f\r\n+][^ \t\f\r\n]*/
+    STRING : "\"" /.*?/s /(?<!\\)(\\\\)*?/ "\""
+    WS: /[ \t\f\r\n]/
+
+    %import common.NUMBER
+    %import common.SIGNED_NUMBER
+    %ignore WS (/#[^\n]*/)?
+    """
+
+
+def parse(text):
+    """Parses the given ``text`` and returns a :class:`DefFile` object."""
+    return Lark(GRAMMAR, parser="lalr", transformer=DefTransformer()).parse(text)
+
+
+def load(file):
+    """Parses the contents of ``file`` and returns a :class:`DefFile` object.
+
+    Files with `.gz`-suffix are decompressed on-the-fly.
+    """
+    return parse(readtext(file))
\ No newline at end of file
diff --git a/src/kyupy/logic.py b/src/kyupy/logic.py
index e78357e..ff49344 100644
--- a/src/kyupy/logic.py
+++ b/src/kyupy/logic.py
@@ -1,4 +1,9 @@
-"""This module contains definitions and data structures for 2-, 4-, and 8-valued logic operations.
+"""Core module for handling 2-, 4-, and 8-valued logic data and signal values.
+
+Logic values are stored in numpy arrays with data type ``np.uint8``.
+There are no explicit data structures in KyuPy for holding patterns, pattern sets or vectors.
+However, there are conventions on logic value encoding and on the order of axes.
+Utility functions defined here follow these conventions.
 
 8 logic values are defined as integer constants.
 
@@ -6,21 +11,39 @@
 * 4-valued logic adds: ``UNASSIGNED`` and ``UNKNOWN``
 * 8-valued logic adds: ``RISE``, ``FALL``, ``PPULSE``, and ``NPULSE``.
 
-The bits in these constants have the following meaning:
+In general, the bits in these constants have the following meaning:
+
+* bit0: Final/settled binary value of a signal
+* bit1: Initial binary value of a signal
+* bit2: Activity or transitions are present on a signal
+
+Except when bit0 differs from bit1, but bit2 (activity) is 0:
+
+* bit0 = 1, bit1 = 0, bit2 = 0 means ``UNKNOWN`` in 4-valued and 8-valued logic.
+* bit0 = 0, bit1 = 1, bit2 = 0 means ``UNASSIGNED`` in 4-valued and 8-valued logic.
+
+2-valued logic only considers bit0, but should store logic one as ``ONE=0b011`` for interoperability.
+4-valued logic only considers bit0 and bit1.
+8-valued logic considers all 3 bits.
 
-  * bit 0: Final/settled binary value of a signal
-  * bit 1: Initial binary value of a signal
-  * bit 2: Activity or transitions are present on a signal
+Logic values are stored in numpy arrays of data type ``np.uint8``.
+The axis convention is as follows:
 
-Special meaning is given to values where bits 0 and 1 differ, but bit 2 (activity) is 0.
-These values are interpreted as ``UNKNOWN`` or ``UNASSIGNED`` in 4-valued and 8-valued logic.
+* The **last** axis goes along patterns/vectors. I.e. ``values[...,0]`` is pattern 0, ``values[...,1]`` is pattern 1, etc.
+* The **second-to-last** axis goes along the I/O and flip-flops of circuits. For a circuit ``c``, this axis is usually
+  ``len(c.s_nodes)`` long. The values of all inputs, outputs and flip-flops are stored within the same array and the location
+  along the second-to-last axis is determined by the order in :py:attr:`~kyupy.circuit.Circuit.s_nodes`.
+
+Two storage formats are used in KyuPy:
+
+* ``mv...`` (for "multi-valued"): Each logic value is stored in the least significant 3 bits of ``np.uint8``.
+* ``bp...`` (for "bit-parallel"): Groups of 8 logic values are stored as three ``np.uint8``. This format is used
+  for bit-parallel logic simulations. It is also more memory-efficient.
+
+The functions in this module use the ``mv...`` and ``bp...`` prefixes to signify the storage format they operate on.
 
-In general, 2-valued logic only considers bit 0, 4-valued logic considers bits 0 and 1, and 8-valued logic
-considers all 3 bits.
-The only exception is constant ``ONE=0b11`` which has two bits set for all logics including 2-valued logic.
 """
 
-import math
 from collections.abc import Iterable
 
 import numpy as np
@@ -66,245 +89,152 @@ def interpret(value):
     """
     if isinstance(value, Iterable) and not (isinstance(value, str) and len(value) == 1):
         return list(map(interpret, value))
-    if value in [0, '0', False, 'L', 'l']:
-        return ZERO
-    if value in [1, '1', True, 'H', 'h']:
-        return ONE
-    if value in [None, '-', 'Z', 'z']:
-        return UNASSIGNED
-    if value in ['R', 'r', '/']:
-        return RISE
-    if value in ['F', 'f', '\\']:
-        return FALL
-    if value in ['P', 'p', '^']:
-        return PPULSE
-    if value in ['N', 'n', 'v']:
-        return NPULSE
+    if value in [0, '0', False, 'L', 'l']: return ZERO
+    if value in [1, '1', True, 'H', 'h']: return ONE
+    if value in [None, '-', 'Z', 'z']: return UNASSIGNED
+    if value in ['R', 'r', '/']: return RISE
+    if value in ['F', 'f', '\\']: return FALL
+    if value in ['P', 'p', '^']: return PPULSE
+    if value in ['N', 'n', 'v']: return NPULSE
     return UNKNOWN
 
 
-_bit_in_lut = np.array([2 ** x for x in range(7, -1, -1)], dtype='uint8')
-
+def mvarray(*a):
+    """Converts (lists of) Boolean values or strings into a multi-valued array.
 
-@numba.njit
-def bit_in(a, pos):
-    return a[pos >> 3] & _bit_in_lut[pos & 7]
-
-
-class MVArray:
-    """An n-dimensional array of m-valued logic values.
+    The given values are interpreted and the axes are arranged as per KyuPy's convention.
+    Use this function to convert strings into multi-valued arrays.
+    """
+    mva = np.array(interpret(a), dtype=np.uint8)
+    if mva.ndim < 2: return mva
+    if mva.shape[-2] > 1: return mva.swapaxes(-1, -2)
+    return mva[..., 0, :]
 
-    This class wraps a numpy.ndarray of type uint8 and adds support for encoding and
-    interpreting 2-valued, 4-valued, and 8-valued logic values.
-    Each logic value is stored as an uint8, manipulations of individual values are cheaper than in
-    :py:class:`BPArray`.
 
-    :param a: If a tuple is given, it is interpreted as desired shape. To make an array of ``n`` vectors
-        compatible with a simulator ``sim``, use ``(len(sim.interface), n)``. If a :py:class:`BPArray` or
-        :py:class:`MVArray` is given, a deep copy is made. If a string, a list of strings, a list of characters,
-        or a list of lists of characters are given, the data is interpreted best-effort and the array is
-        initialized accordingly.
-    :param m: The arity of the logic. Can be set to 2, 4, or 8. If None is given, the arity of a given
-        :py:class:`BPArray` or :py:class:`MVArray` is used, or, if the array is initialized differently, 8 is used.
+def mv_str(mva, delim='\n'):
+    """Renders a given multi-valued array into a string.
     """
+    sa = np.choose(mva, np.array([*'0X-1PRFN'], dtype=np.unicode_))
+    if not hasattr(mva, 'ndim') or mva.ndim == 0: return sa
+    if mva.ndim == 1: return ''.join(sa)
+    return delim.join([''.join(c) for c in sa.swapaxes(-1,-2)])
+
 
-    def __init__(self, a, m=None):
-        self.m = m or 8
-        assert self.m in [2, 4, 8]
-
-        # Try our best to interpret given a.
-        if isinstance(a, MVArray):
-            self.data = a.data.copy()
-            """The wrapped 2-dimensional ndarray of logic values.
-
-            * Axis 0 is PI/PO/FF position, the length of this axis is called "width".
-            * Axis 1 is vector/pattern, the length of this axis is called "length".
-            """
-            self.m = m or a.m
-        elif hasattr(a, 'data'):  # assume it is a BPArray. Can't use isinstance() because BPArray isn't declared yet.
-            self.data = np.zeros((a.width, a.length), dtype=np.uint8)
-            self.m = m or a.m
-            for i in range(a.data.shape[-2]):
-                self.data[...] <<= 1
-                self.data[...] |= np.unpackbits(a.data[..., -i-1, :], axis=1)[:, :a.length]
-            if a.data.shape[-2] == 1:
-                self.data *= 3
-        elif isinstance(a, int):
-            self.data = np.full((a, 1), UNASSIGNED, dtype=np.uint8)
-        elif isinstance(a, tuple):
-            self.data = np.full(a, UNASSIGNED, dtype=np.uint8)
-        else:
-            if isinstance(a, str): a = [a]
-            self.data = np.asarray(interpret(a), dtype=np.uint8)
-            self.data = self.data[:, np.newaxis] if self.data.ndim == 1 else np.moveaxis(self.data, -2, -1)
-
-        # Cast data to m-valued logic.
-        if self.m == 2:
-            self.data[...] = ((self.data & 0b001) & ((self.data >> 1) & 0b001) | (self.data == RISE)) * ONE
-        elif self.m == 4:
-            self.data[...] = (self.data & 0b011) & ((self.data != FALL) * ONE) | ((self.data == RISE) * ONE)
-        elif self.m == 8:
-            self.data[...] = self.data & 0b111
-
-        self.length = self.data.shape[-1]
-        self.width = self.data.shape[-2]
-
-    def __repr__(self):
-        return f'<MVArray length={self.length} width={self.width} m={self.m} mem={hr_bytes(self.data.nbytes)}>'
-
-    def __str__(self):
-        return str([self[idx] for idx in range(self.length)])
-
-    def __getitem__(self, vector_idx):
-        """Returns a string representing the desired vector."""
-        chars = ["0", "X", "-", "1", "P", "R", "F", "N"]
-        return ''.join(chars[v] for v in self.data[:, vector_idx])
-
-    def __len__(self):
-        return self.length
-
-
-def mv_cast(*args, m=8):
-    return [a if isinstance(a, MVArray) else MVArray(a, m=m) for a in args]
-
-
-def mv_getm(*args):
-    return max([a.m for a in args if isinstance(a, MVArray)] + [0]) or 8
-
-
-def _mv_not(m, out, inp):
+def _mv_not(out, inp):
     np.bitwise_xor(inp, 0b11, out=out)  # this also exchanges UNASSIGNED <-> UNKNOWN
-    if m > 2:
-        np.putmask(out, (inp == UNKNOWN), UNKNOWN)  # restore UNKNOWN
+    np.putmask(out, (inp == UNKNOWN), UNKNOWN)  # restore UNKNOWN
 
 
-def mv_not(x1, out=None):
+def mv_not(x1 : np.ndarray, out=None):
     """A multi-valued NOT operator.
 
-    :param x1: An :py:class:`MVArray` or data the :py:class:`MVArray` constructor accepts.
-    :param out: Optionally an :py:class:`MVArray` as storage destination. If None, a new :py:class:`MVArray`
-        is returned.
-    :return: An :py:class:`MVArray` with the result.
+    :param x1: A multi-valued array.
+    :param out: An optional storage destination. If None, a new multi-valued array is returned.
+    :return: A multi-valued array with the result.
     """
-    m = mv_getm(x1)
-    x1 = mv_cast(x1, m=m)[0]
-    out = out or MVArray(x1.data.shape, m=m)
-    _mv_not(m, out.data, x1.data)
+    out = out or np.empty(x1.shape, dtype=np.uint8)
+    _mv_not(out, x1)
     return out
 
 
-def _mv_or(m, out, *ins):
-    if m > 2:
-        any_unknown = (ins[0] == UNKNOWN) | (ins[0] == UNASSIGNED)
-        for inp in ins[1:]: any_unknown |= (inp == UNKNOWN) | (inp == UNASSIGNED)
-        any_one = (ins[0] == ONE)
-        for inp in ins[1:]: any_one |= (inp == ONE)
+def _mv_or(out, *ins):
+    any_unknown = (ins[0] == UNKNOWN) | (ins[0] == UNASSIGNED)
+    for inp in ins[1:]: any_unknown |= (inp == UNKNOWN) | (inp == UNASSIGNED)
+    any_one = (ins[0] == ONE)
+    for inp in ins[1:]: any_one |= (inp == ONE)
 
-        out[...] = ZERO
-        np.putmask(out, any_one, ONE)
-        for inp in ins:
-            np.bitwise_or(out, inp, out=out, where=~any_one)
-        np.putmask(out, (any_unknown & ~any_one), UNKNOWN)
-    else:
-        out[...] = ZERO
-        for inp in ins: np.bitwise_or(out, inp, out=out)
+    out[...] = ZERO
+    np.putmask(out, any_one, ONE)
+    for inp in ins:
+        np.bitwise_or(out, inp, out=out, where=~any_one)
+    np.putmask(out, (any_unknown & ~any_one), UNKNOWN)
 
 
 def mv_or(x1, x2, out=None):
     """A multi-valued OR operator.
 
-    :param x1: An :py:class:`MVArray` or data the :py:class:`MVArray` constructor accepts.
-    :param x2: An :py:class:`MVArray` or data the :py:class:`MVArray` constructor accepts.
-    :param out: Optionally an :py:class:`MVArray` as storage destination. If None, a new :py:class:`MVArray`
-        is returned.
-    :return: An :py:class:`MVArray` with the result.
+    :param x1: A multi-valued array.
+    :param x2: A multi-valued array.
+    :param out: An optional storage destination. If None, a new multi-valued array is returned.
+    :return: A multi-valued array with the result.
     """
-    m = mv_getm(x1, x2)
-    x1, x2 = mv_cast(x1, x2, m=m)
-    out = out or MVArray(np.broadcast(x1.data, x2.data).shape, m=m)
-    _mv_or(m, out.data, x1.data, x2.data)
+    out = out or np.empty(np.broadcast(x1, x2).shape, dtype=np.uint8)
+    _mv_or(out, x1, x2)
     return out
 
 
-def _mv_and(m, out, *ins):
-    if m > 2:
-        any_unknown = (ins[0] == UNKNOWN) | (ins[0] == UNASSIGNED)
-        for inp in ins[1:]: any_unknown |= (inp == UNKNOWN) | (inp == UNASSIGNED)
-        any_zero = (ins[0] == ZERO)
-        for inp in ins[1:]: any_zero |= (inp == ZERO)
+def _mv_and(out, *ins):
+    any_unknown = (ins[0] == UNKNOWN) | (ins[0] == UNASSIGNED)
+    for inp in ins[1:]: any_unknown |= (inp == UNKNOWN) | (inp == UNASSIGNED)
+    any_zero = (ins[0] == ZERO)
+    for inp in ins[1:]: any_zero |= (inp == ZERO)
 
-        out[...] = ONE
-        np.putmask(out, any_zero, ZERO)
-        for inp in ins:
-            np.bitwise_and(out, inp | 0b100, out=out, where=~any_zero)
-            if m > 4: np.bitwise_or(out, inp & 0b100, out=out, where=~any_zero)
-        np.putmask(out, (any_unknown & ~any_zero), UNKNOWN)
-    else:
-        out[...] = ONE
-        for inp in ins: np.bitwise_and(out, inp, out=out)
+    out[...] = ONE
+    np.putmask(out, any_zero, ZERO)
+    for inp in ins:
+        np.bitwise_and(out, inp | 0b100, out=out, where=~any_zero)
+        np.bitwise_or(out, inp & 0b100, out=out, where=~any_zero)
+    np.putmask(out, (any_unknown & ~any_zero), UNKNOWN)
 
 
 def mv_and(x1, x2, out=None):
     """A multi-valued AND operator.
 
-    :param x1: An :py:class:`MVArray` or data the :py:class:`MVArray` constructor accepts.
-    :param x2: An :py:class:`MVArray` or data the :py:class:`MVArray` constructor accepts.
-    :param out: Optionally an :py:class:`MVArray` as storage destination. If None, a new :py:class:`MVArray`
-        is returned.
-    :return: An :py:class:`MVArray` with the result.
+    :param x1: A multi-valued array.
+    :param x2: A multi-valued array.
+    :param out: An optional storage destination. If None, a new multi-valued array is returned.
+    :return: A multi-valued array with the result.
     """
-    m = mv_getm(x1, x2)
-    x1, x2 = mv_cast(x1, x2, m=m)
-    out = out or MVArray(np.broadcast(x1.data, x2.data).shape, m=m)
-    _mv_and(m, out.data, x1.data, x2.data)
+    out = out or np.empty(np.broadcast(x1, x2).shape, dtype=np.uint8)
+    _mv_and(out, x1, x2)
     return out
 
 
-def _mv_xor(m, out, *ins):
-    if m > 2:
-        any_unknown = (ins[0] == UNKNOWN) | (ins[0] == UNASSIGNED)
-        for inp in ins[1:]: any_unknown |= (inp == UNKNOWN) | (inp == UNASSIGNED)
+def _mv_xor(out, *ins):
+    any_unknown = (ins[0] == UNKNOWN) | (ins[0] == UNASSIGNED)
+    for inp in ins[1:]: any_unknown |= (inp == UNKNOWN) | (inp == UNASSIGNED)
 
-        out[...] = ZERO
-        for inp in ins:
-            np.bitwise_xor(out, inp & 0b011, out=out)
-            if m > 4: np.bitwise_or(out, inp & 0b100, out=out)
-        np.putmask(out, any_unknown, UNKNOWN)
-    else:
-        out[...] = ZERO
-        for inp in ins: np.bitwise_xor(out, inp, out=out)
+    out[...] = ZERO
+    for inp in ins:
+        np.bitwise_xor(out, inp & 0b011, out=out)
+        np.bitwise_or(out, inp & 0b100, out=out)
+    np.putmask(out, any_unknown, UNKNOWN)
 
 
 def mv_xor(x1, x2, out=None):
     """A multi-valued XOR operator.
 
-    :param x1: An :py:class:`MVArray` or data the :py:class:`MVArray` constructor accepts.
-    :param x2: An :py:class:`MVArray` or data the :py:class:`MVArray` constructor accepts.
-    :param out: Optionally an :py:class:`MVArray` as storage destination. If None, a new :py:class:`MVArray`
-        is returned.
-    :return: An :py:class:`MVArray` with the result.
+    :param x1: A multi-valued array.
+    :param x2: A multi-valued array.
+    :param out: An optional storage destination. If None, a new multi-valued array is returned.
+    :return: A multi-valued array with the result.
     """
-    m = mv_getm(x1, x2)
-    x1, x2 = mv_cast(x1, x2, m=m)
-    out = out or MVArray(np.broadcast(x1.data, x2.data).shape, m=m)
-    _mv_xor(m, out.data, x1.data, x2.data)
+    out = out or np.empty(np.broadcast(x1, x2).shape, dtype=np.uint8)
+    _mv_xor(out, x1, x2)
     return out
 
 
 def mv_latch(d, t, q_prev, out=None):
-    """A latch that is transparent if `t` is high. `q_prev` has to be the output value from the previous clock cycle.
+    """A multi-valued latch operator.
+
+    A latch outputs ``d`` when transparent (``t`` is high).
+    It outputs ``q_prev`` when in latched state (``t`` is low).
+
+    :param d: A multi-valued array for the data input.
+    :param t: A multi-valued array for the control input.
+    :param q_prev: A multi-valued array with the output value of this latch from the previous clock cycle.
+    :param out: An optional storage destination. If None, a new multi-valued array is returned.
+    :return: A multi-valued array for the latch output ``q``.
     """
-    m = mv_getm(d, t, q_prev)
-    d, t, q_prev = mv_cast(d, t, q_prev, m=m)
-    out = out or MVArray(np.broadcast(d.data, t.data, q_prev).shape, m=m)
-    out.data[...] = t.data & d.data & 0b011
-    out.data[...] |= ~t.data & 0b010 & (q_prev.data << 1)
-    out.data[...] |= ~t.data & 0b001 & (out.data >> 1)
-    out.data[...] |= ((out.data << 1) ^ (out.data << 2)) & 0b100
-    unknown = (t.data == UNKNOWN) \
-              | (t.data == UNASSIGNED) \
-              | (((d.data == UNKNOWN) | (d.data == UNASSIGNED)) & (t.data != ZERO))
-    np.putmask(out.data, unknown, UNKNOWN)
+    out = out or np.empty(np.broadcast(d, t, q_prev).shape, dtype=np.uint8)
+    out[...] = t & d & 0b011
+    out[...] |= ~t & 0b010 & (q_prev << 1)
+    out[...] |= ~t & 0b001 & (out >> 1)
+    out[...] |= ((out << 1) ^ (out << 2)) & 0b100
+    unknown = (t == UNKNOWN) \
+              | (t == UNASSIGNED) \
+              | (((d == UNKNOWN) | (d == UNASSIGNED)) & (t != ZERO))
+    np.putmask(out, unknown, UNKNOWN)
     return out
 
 
@@ -313,191 +243,191 @@ def mv_transition(init, final, out=None):
     Pulses in the input data are ignored. If any of the inputs are ``UNKNOWN``, the result is ``UNKNOWN``.
     If both inputs are ``UNASSIGNED``, the result is ``UNASSIGNED``.
 
-    :param init: An :py:class:`MVArray` or data the :py:class:`MVArray` constructor accepts.
-    :param final: An :py:class:`MVArray` or data the :py:class:`MVArray` constructor accepts.
-    :param out: Optionally an :py:class:`MVArray` as storage destination. If None, a new :py:class:`MVArray`
-        is returned.
-    :return: An :py:class:`MVArray` with the result.
+    :param init: A multi-valued array.
+    :param final: A multi-valued array.
+    :param out: An optional storage destination. If None, a new multi-valued array is returned.
+    :return: A multi-valued array with the result.
     """
-    m = mv_getm(init, final)
-    init, final = mv_cast(init, final, m=m)
-    init = init.data
-    final = final.data
-    out = out or MVArray(np.broadcast(init, final).shape, m=8)
-    out.data[...] = (init & 0b010) | (final & 0b001)
-    out.data[...] |= ((out.data << 1) ^ (out.data << 2)) & 0b100
+    out = out or np.empty(np.broadcast(init, final).shape, dtype=np.uint8)
+    out[...] = (init & 0b010) | (final & 0b001)
+    out[...] |= ((out << 1) ^ (out << 2)) & 0b100
     unknown = (init == UNKNOWN) | (init == UNASSIGNED) | (final == UNKNOWN) | (final == UNASSIGNED)
     unassigned = (init == UNASSIGNED) & (final == UNASSIGNED)
-    np.putmask(out.data, unknown, UNKNOWN)
-    np.putmask(out.data, unassigned, UNASSIGNED)
+    np.putmask(out, unknown, UNKNOWN)
+    np.putmask(out, unassigned, UNASSIGNED)
     return out
 
 
-class BPArray:
-    """An n-dimensional array of m-valued logic values that uses bit-parallel storage.
+def mv_to_bp(mva):
+    """Converts a multi-valued array into a bit-parallel array.
+    """
+    if mva.ndim == 1: mva = mva[..., np.newaxis]
+    return np.packbits(unpackbits(mva)[...,:3], axis=-2, bitorder='little').swapaxes(-1,-2)
+
+
+def bparray(*a):
+    """Converts (lists of) Boolean values or strings into a bit-parallel array.
+
+    The given values are interpreted and the axes are arranged as per KyuPy's convention.
+    Use this function to convert strings into bit-parallel arrays.
+    """
+    return mv_to_bp(mvarray(*a))
 
-    The primary use of this format is in aiding efficient bit-parallel logic simulation.
-    The secondary benefit over :py:class:`MVArray` is its memory efficiency.
-    Accessing individual values is more expensive than with :py:class:`MVArray`.
-    Therefore it may be more efficient to unpack the data into an :py:class:`MVArray` and pack it again into a
-    :py:class:`BPArray` for simulation.
 
-    See :py:class:`MVArray` for constructor parameters.
+def bp_to_mv(bpa):
+    """Converts a bit-parallel array into a multi-valued array.
     """
+    return packbits(np.unpackbits(bpa, axis=-1, bitorder='little').swapaxes(-1,-2))
 
-    def __init__(self, a, m=None):
-        if not isinstance(a, MVArray) and not isinstance(a, BPArray):
-            a = MVArray(a, m)
-            self.m = a.m
-        if isinstance(a, MVArray):
-            if m is not None and m != a.m:
-                a = MVArray(a, m)  # cast data
-            self.m = a.m
-            assert self.m in [2, 4, 8]
-            nwords = math.ceil(math.log2(self.m))
-            nbytes = (a.data.shape[-1] - 1) // 8 + 1
-            self.data = np.zeros(a.data.shape[:-1] + (nwords, nbytes), dtype=np.uint8)
-            """The wrapped 3-dimensional ndarray.
-
-            * Axis 0 is PI/PO/FF position, the length of this axis is called "width".
-            * Axis 1 has length ``ceil(log2(m))`` for storing all bits.
-            * Axis 2 are the vectors/patterns packed into uint8 words.
-            """
-            for i in range(self.data.shape[-2]):
-                self.data[..., i, :] = np.packbits((a.data >> i) & 1, axis=-1)
-        else:  # we have a BPArray
-            self.data = a.data.copy()  # TODO: support conversion to different m
-            self.m = a.m
-        self.length = a.length
-        self.width = a.width
-
-    def __repr__(self):
-        return f'<BPArray length={self.length} width={self.width} m={self.m} mem={hr_bytes(self.data.nbytes)}>'
-
-    def __len__(self):
-        return self.length
-
-
-def bp_buf(out, inp):
-    md = out.shape[-2]
-    assert md == inp.shape[-2]
-    if md > 1:
-        unknown = inp[..., 0, :] ^ inp[..., 1, :]
-        if md > 2: unknown &= ~inp[..., 2, :]
-        out[..., 0, :] = inp[..., 0, :] | unknown
-        out[..., 1, :] = inp[..., 1, :] & ~unknown
-        if md > 2: out[..., 2, :] = inp[..., 2, :] & ~unknown
-    else:
-        out[..., 0, :] = inp[..., 0, :]
-
-
-def bp_not(out, inp):
-    md = out.shape[-2]
-    assert md == inp.shape[-2]
-    if md > 1:
-        unknown = inp[..., 0, :] ^ inp[..., 1, :]
-        if md > 2: unknown &= ~inp[..., 2, :]
-        out[..., 0, :] = ~inp[..., 0, :] | unknown
-        out[..., 1, :] = ~inp[..., 1, :] & ~unknown
-        if md > 2: out[..., 2, :] = inp[..., 2, :] & ~unknown
-    else:
-        out[..., 0, :] = ~inp[..., 0, :]
-
-
-def bp_or(out, *ins):
-    md = out.shape[-2]
-    for inp in ins: assert md == inp.shape[-2]
+
+def bp4v_buf(out, inp):
+    unknown = inp[..., 0, :] ^ inp[..., 1, :]
+    out[..., 0, :] = inp[..., 0, :] | unknown
+    out[..., 1, :] = inp[..., 1, :] & ~unknown
+    return out
+
+
+def bp8v_buf(out, inp):
+    unknown = (inp[..., 0, :] ^ inp[..., 1, :]) & ~inp[..., 2, :]
+    out[..., 0, :] = inp[..., 0, :] | unknown
+    out[..., 1, :] = inp[..., 1, :] & ~unknown
+    out[..., 2, :] = inp[..., 2, :] & ~unknown
+    return out
+
+
+def bp4v_not(out, inp):
+    unknown = inp[..., 0, :] ^ inp[..., 1, :]
+    out[..., 0, :] = ~inp[..., 0, :] | unknown
+    out[..., 1, :] = ~inp[..., 1, :] & ~unknown
+    return out
+
+
+def bp8v_not(out, inp):
+    unknown = (inp[..., 0, :] ^ inp[..., 1, :]) & ~inp[..., 2, :]
+    out[..., 0, :] = ~inp[..., 0, :] | unknown
+    out[..., 1, :] = ~inp[..., 1, :] & ~unknown
+    out[..., 2, :] = inp[..., 2, :] & ~unknown
+    return out
+
+
+def bp4v_or(out, *ins):
     out[...] = 0
-    if md == 1:
-        for inp in ins: out[..., 0, :] |= inp[..., 0, :]
-    elif md == 2:
-        any_unknown = ins[0][..., 0, :] ^ ins[0][..., 1, :]
-        for inp in ins[1:]: any_unknown |= inp[..., 0, :] ^ inp[..., 1, :]
-        any_one = ins[0][..., 0, :] & ins[0][..., 1, :]
-        for inp in ins[1:]: any_one |= inp[..., 0, :] & inp[..., 1, :]
-        for inp in ins:
-            out[..., 0, :] |= inp[..., 0, :] | any_unknown
-            out[..., 1, :] |= inp[..., 1, :] & (~any_unknown | any_one)
-    else:
-        any_unknown = (ins[0][..., 0, :] ^ ins[0][..., 1, :]) & ~ins[0][..., 2, :]
-        for inp in ins[1:]: any_unknown |= (inp[..., 0, :] ^ inp[..., 1, :]) & ~inp[..., 2, :]
-        any_one = ins[0][..., 0, :] & ins[0][..., 1, :] & ~ins[0][..., 2, :]
-        for inp in ins[1:]: any_one |= inp[..., 0, :] & inp[..., 1, :] & ~inp[..., 2, :]
-        for inp in ins:
-            out[..., 0, :] |= inp[..., 0, :] | any_unknown
-            out[..., 1, :] |= inp[..., 1, :] & (~any_unknown | any_one)
-            out[..., 2, :] |= inp[..., 2, :] & (~any_unknown | any_one) & ~any_one
-
-
-def bp_and(out, *ins):
-    md = out.shape[-2]
-    for inp in ins: assert md == inp.shape[-2]
+    any_unknown = ins[0][..., 0, :] ^ ins[0][..., 1, :]
+    for inp in ins[1:]: any_unknown |= inp[..., 0, :] ^ inp[..., 1, :]
+    any_one = ins[0][..., 0, :] & ins[0][..., 1, :]
+    for inp in ins[1:]: any_one |= inp[..., 0, :] & inp[..., 1, :]
+    for inp in ins:
+        out[..., 0, :] |= inp[..., 0, :] | any_unknown
+        out[..., 1, :] |= inp[..., 1, :] & (~any_unknown | any_one)
+    return out
+
+
+def bp8v_or(out, *ins):
+    out[...] = 0
+    any_unknown = (ins[0][..., 0, :] ^ ins[0][..., 1, :]) & ~ins[0][..., 2, :]
+    for inp in ins[1:]: any_unknown |= (inp[..., 0, :] ^ inp[..., 1, :]) & ~inp[..., 2, :]
+    any_one = ins[0][..., 0, :] & ins[0][..., 1, :] & ~ins[0][..., 2, :]
+    for inp in ins[1:]: any_one |= inp[..., 0, :] & inp[..., 1, :] & ~inp[..., 2, :]
+    for inp in ins:
+        out[..., 0, :] |= inp[..., 0, :] | any_unknown
+        out[..., 1, :] |= inp[..., 1, :] & (~any_unknown | any_one)
+        out[..., 2, :] |= inp[..., 2, :] & (~any_unknown | any_one) & ~any_one
+    return out
+
+
+def bp4v_and(out, *ins):
+    out[...] = 0xff
+    any_unknown = ins[0][..., 0, :] ^ ins[0][..., 1, :]
+    for inp in ins[1:]: any_unknown |= inp[..., 0, :] ^ inp[..., 1, :]
+    any_zero = ~ins[0][..., 0, :] & ~ins[0][..., 1, :]
+    for inp in ins[1:]: any_zero |= ~inp[..., 0, :] & ~inp[..., 1, :]
+    for inp in ins:
+        out[..., 0, :] &= inp[..., 0, :] | (any_unknown & ~any_zero)
+        out[..., 1, :] &= inp[..., 1, :] & ~any_unknown
+    return out
+
+
+def bp8v_and(out, *ins):
     out[...] = 0xff
-    if md == 1:
-        for inp in ins: out[..., 0, :] &= inp[..., 0, :]
-    elif md == 2:
-        any_unknown = ins[0][..., 0, :] ^ ins[0][..., 1, :]
-        for inp in ins[1:]: any_unknown |= inp[..., 0, :] ^ inp[..., 1, :]
-        any_zero = ~ins[0][..., 0, :] & ~ins[0][..., 1, :]
-        for inp in ins[1:]: any_zero |= ~inp[..., 0, :] & ~inp[..., 1, :]
-        for inp in ins:
-            out[..., 0, :] &= inp[..., 0, :] | (any_unknown & ~any_zero)
-            out[..., 1, :] &= inp[..., 1, :] & ~any_unknown
-    else:
-        any_unknown = (ins[0][..., 0, :] ^ ins[0][..., 1, :]) & ~ins[0][..., 2, :]
-        for inp in ins[1:]: any_unknown |= (inp[..., 0, :] ^ inp[..., 1, :]) & ~inp[..., 2, :]
-        any_zero = ~ins[0][..., 0, :] & ~ins[0][..., 1, :] & ~ins[0][..., 2, :]
-        for inp in ins[1:]: any_zero |= ~inp[..., 0, :] & ~inp[..., 1, :] & ~inp[..., 2, :]
-        out[..., 2, :] = 0
-        for inp in ins:
-            out[..., 0, :] &= inp[..., 0, :] | (any_unknown & ~any_zero)
-            out[..., 1, :] &= inp[..., 1, :] & ~any_unknown
-            out[..., 2, :] |= inp[..., 2, :] & (~any_unknown | any_zero) & ~any_zero
-
-
-def bp_xor(out, *ins):
-    md = out.shape[-2]
-    for inp in ins: assert md == inp.shape[-2]
+    any_unknown = (ins[0][..., 0, :] ^ ins[0][..., 1, :]) & ~ins[0][..., 2, :]
+    for inp in ins[1:]: any_unknown |= (inp[..., 0, :] ^ inp[..., 1, :]) & ~inp[..., 2, :]
+    any_zero = ~ins[0][..., 0, :] & ~ins[0][..., 1, :] & ~ins[0][..., 2, :]
+    for inp in ins[1:]: any_zero |= ~inp[..., 0, :] & ~inp[..., 1, :] & ~inp[..., 2, :]
+    out[..., 2, :] = 0
+    for inp in ins:
+        out[..., 0, :] &= inp[..., 0, :] | (any_unknown & ~any_zero)
+        out[..., 1, :] &= inp[..., 1, :] & ~any_unknown
+        out[..., 2, :] |= inp[..., 2, :] & (~any_unknown | any_zero) & ~any_zero
+    return out
+
+
+def bp4v_xor(out, *ins):
     out[...] = 0
-    if md == 1:
-        for inp in ins: out[..., 0, :] ^= inp[..., 0, :]
-    elif md == 2:
-        any_unknown = ins[0][..., 0, :] ^ ins[0][..., 1, :]
-        for inp in ins[1:]: any_unknown |= inp[..., 0, :] ^ inp[..., 1, :]
-        for inp in ins: out[...] ^= inp
-        out[..., 0, :] |= any_unknown
-        out[..., 1, :] &= ~any_unknown
-    else:
-        any_unknown = (ins[0][..., 0, :] ^ ins[0][..., 1, :]) & ~ins[0][..., 2, :]
-        for inp in ins[1:]: any_unknown |= (inp[..., 0, :] ^ inp[..., 1, :]) & ~inp[..., 2, :]
-        for inp in ins:
-            out[..., 0, :] ^= inp[..., 0, :]
-            out[..., 1, :] ^= inp[..., 1, :]
-            out[..., 2, :] |= inp[..., 2, :]
-        out[..., 0, :] |= any_unknown
-        out[..., 1, :] &= ~any_unknown
-        out[..., 2, :] &= ~any_unknown
-
-
-def bp_latch(out, d, t, q_prev):
-    md = out.shape[-2]
-    assert md == d.shape[-2]
-    assert md == t.shape[-2]
-    assert md == q_prev.shape[-2]
-    if md == 1:
-        out[...] = (d & t) | (q_prev & ~t)
-    elif md == 2:
-        any_unknown = t[..., 0, :] ^ t[..., 1, :]
-        any_unknown |= (d[..., 0, :] ^ d[..., 1, :]) & (t[..., 0, :] | t[..., 1, :])
-        out[...] = (d & t) | (q_prev & ~t)
-        out[..., 0, :] |= any_unknown
-        out[..., 1, :] &= ~any_unknown
-    else:
-        any_unknown = (t[..., 0, :] ^ t[..., 1, :]) & ~t[..., 2, :]
-        any_unknown |= ((d[..., 0, :] ^ d[..., 1, :]) & ~d[..., 2, :]) & (t[..., 0, :] | t[..., 1, :] | t[..., 2, :])
-        out[..., 1, :] = (d[..., 1, :] & t[..., 1, :]) | (q_prev[..., 0, :] & ~t[..., 1, :])
-        out[..., 0, :] = (d[..., 0, :] & t[..., 0, :]) | (out[..., 1, :] & ~t[..., 0, :])
-        out[..., 2, :] = out[..., 1, :] ^ out[..., 0, :]
-        out[..., 0, :] |= any_unknown
-        out[..., 1, :] &= ~any_unknown
-        out[..., 2, :] &= ~any_unknown
+    any_unknown = ins[0][..., 0, :] ^ ins[0][..., 1, :]
+    for inp in ins[1:]: any_unknown |= inp[..., 0, :] ^ inp[..., 1, :]
+    for inp in ins:
+        out[..., 0, :] ^= inp[..., 0, :]
+        out[..., 1, :] ^= inp[..., 1, :]
+    out[..., 0, :] |= any_unknown
+    out[..., 1, :] &= ~any_unknown
+    return out
+
+
+def bp8v_xor(out, *ins):
+    out[...] = 0
+    any_unknown = (ins[0][..., 0, :] ^ ins[0][..., 1, :]) & ~ins[0][..., 2, :]
+    for inp in ins[1:]: any_unknown |= (inp[..., 0, :] ^ inp[..., 1, :]) & ~inp[..., 2, :]
+    for inp in ins:
+        out[..., 0, :] ^= inp[..., 0, :]
+        out[..., 1, :] ^= inp[..., 1, :]
+        out[..., 2, :] |= inp[..., 2, :]
+    out[..., 0, :] |= any_unknown
+    out[..., 1, :] &= ~any_unknown
+    out[..., 2, :] &= ~any_unknown
+    return out
+
+
+def bp8v_latch(out, d, t, q_prev):
+    any_unknown = (t[..., 0, :] ^ t[..., 1, :]) & ~t[..., 2, :]
+    any_unknown |= ((d[..., 0, :] ^ d[..., 1, :]) & ~d[..., 2, :]) & (t[..., 0, :] | t[..., 1, :] | t[..., 2, :])
+    out[..., 1, :] = (d[..., 1, :] & t[..., 1, :]) | (q_prev[..., 0, :] & ~t[..., 1, :])
+    out[..., 0, :] = (d[..., 0, :] & t[..., 0, :]) | (out[..., 1, :] & ~t[..., 0, :])
+    out[..., 2, :] = out[..., 1, :] ^ out[..., 0, :]
+    out[..., 0, :] |= any_unknown
+    out[..., 1, :] &= ~any_unknown
+    out[..., 2, :] &= ~any_unknown
+    return out
+
+
+_bit_in_lut = np.array([2 ** x for x in range(7, -1, -1)], dtype='uint8')
+
+
+@numba.njit
+def bit_in(a, pos):
+    return a[pos >> 3] & _bit_in_lut[pos & 7]
+
+
+def unpackbits(a : np.ndarray):
+    """Unpacks the bits of given ndarray ``a``.
+
+    Similar to ``np.unpackbits``, but accepts any dtype, preserves the shape of ``a`` and
+    adds a new last axis with the bits of each item. Bits are in 'little'-order, i.e.,
+    a[...,0] is the least significant bit of each item.
+    """
+    return np.unpackbits(a.view(np.uint8), bitorder='little').reshape(*a.shape, 8*a.itemsize)
+
+
+def packbits(a, dtype=np.uint8):
+    """Packs the values of a boolean-valued array ``a`` along its last axis into bits.
+
+    Similar to ``np.packbits``, but returns an array of given dtype and the shape of ``a`` with the last axis removed.
+    The last axis of `a` is truncated or padded according to the bit-width of the given dtype.
+    Signed integer datatypes are padded with the most significant bit, all others are padded with `0`.
+    """
+    dtype = np.dtype(dtype)
+    bits = 8 * dtype.itemsize
+    a = a[...,:bits]
+    if a.shape[-1] < bits:
+        p = [(0,0)]*(len(a.shape)-1) + [(0, bits-a.shape[-1])]
+        a = np.pad(a, p, 'edge') if dtype.name[0] == 'i' else np.pad(a, p, 'constant', constant_values=0)
+    return np.packbits(a, bitorder='little').view(dtype).reshape(a.shape[:-1])
diff --git a/src/kyupy/logic_sim.py b/src/kyupy/logic_sim.py
index 29be2b6..81ef44b 100644
--- a/src/kyupy/logic_sim.py
+++ b/src/kyupy/logic_sim.py
@@ -1,7 +1,7 @@
 """A high-throughput combinational logic simulator.
 
 The class :py:class:`~kyupy.logic_sim.LogicSim` performs parallel simulations of the combinational part of a circuit.
-The logic operations are performed bit-parallel on packed numpy arrays.
+The logic operations are performed bit-parallel on packed numpy arrays (see bit-parallel (bp) array description in :py:mod:`~kyupy.logic`).
 Simple sequential circuits can be simulated by repeated assignments and propagations.
 However, this simulator ignores the clock network and simply assumes that all state-elements are clocked all the time.
 """
@@ -10,127 +10,52 @@ import math
 
 import numpy as np
 
-from . import logic, hr_bytes
+from . import numba, logic, hr_bytes, sim
+from .circuit import Circuit
 
-
-class LogicSim:
+class LogicSim(sim.SimOps):
     """A bit-parallel naïve combinational simulator for 2-, 4-, or 8-valued logic.
 
     :param circuit: The circuit to simulate.
-    :type circuit: :py:class:`~kyupy.circuit.Circuit`
     :param sims: The number of parallel logic simulations to perform.
-    :type sims: int
     :param m: The arity of the logic, must be 2, 4, or 8.
-    :type m: int
+    :param c_reuse: If True, intermediate signal values may get overwritten when not needed anymore to save memory.
+    :param strip_forks: If True, forks are not included in the simulation model to save memory and simulation time.
     """
-    def __init__(self, circuit, sims=8, m=8):
+    def __init__(self, circuit: Circuit, sims: int = 8, m: int = 8, c_reuse: bool = False, strip_forks: bool = False):
         assert m in [2, 4, 8]
+        super().__init__(circuit, c_reuse=c_reuse, strip_forks=strip_forks)
         self.m = m
-        mdim = math.ceil(math.log2(m))
-        self.circuit = circuit
+        self.mdim = math.ceil(math.log2(m))
         self.sims = sims
         nbytes = (sims - 1) // 8 + 1
-        dffs = [n for n in circuit.nodes if 'dff' in n.kind.lower()]
-        latches = [n for n in circuit.nodes if 'latch' in n.kind.lower()]
-        self.interface = list(circuit.interface) + dffs + latches
-
-        self.width = len(self.interface)
-        """The number of bits in the circuit state (number of ports + number of state-elements)."""
-
-        self.state = np.zeros((len(circuit.lines), mdim, nbytes), dtype='uint8')
-        self.state_epoch = np.zeros(len(circuit.nodes), dtype='int8') - 1
-        self.tmp = np.zeros((5, mdim, nbytes), dtype='uint8')
-        self.zero = np.zeros((mdim, nbytes), dtype='uint8')
-        self.epoch = 0
-
-        self.latch_dict = dict((n.index, i) for i, n in enumerate(latches))
-        self.latch_state = np.zeros((len(latches), mdim, nbytes), dtype='uint8')
-
-        known_fct = [(f[:-4], getattr(self, f)) for f in dir(self) if f.endswith('_fct')]
-        self.node_fct = []
-        for n in circuit.nodes:
-            t = n.kind.lower().replace('__fork__', 'fork')
-            t = t.replace('nbuff', 'fork')
-            t = t.replace('input', 'fork')
-            t = t.replace('output', 'fork')
-            t = t.replace('__const0__', 'const0')
-            t = t.replace('__const1__', 'const1')
-            t = t.replace('tieh', 'const1')
-            t = t.replace('ibuff', 'not')
-            t = t.replace('inv', 'not')
-
-            fcts = [f for n, f in known_fct if t.startswith(n)]
-            if len(fcts) < 1:
-                raise ValueError(f'Unknown node kind {n.kind}')
-            self.node_fct.append(fcts[0])
 
-    def __repr__(self):
-        return f'<LogicSim {self.circuit.name} sims={self.sims} m={self.m} state_mem={hr_bytes(self.state.nbytes)}>'
+        self.c = np.zeros((self.c_len, self.mdim, nbytes), dtype=np.uint8)
+        self.s = np.zeros((2, self.s_len, 3, nbytes), dtype=np.uint8)
+        """Logic values of the sequential elements (flip-flops) and ports.
 
-    def assign(self, stimuli):
-        """Assign stimuli to the primary inputs and state-elements (flip-flops).
+        It is a pair of arrays in bit-parallel (bp) storage format:
 
-        :param stimuli: The input data to assign. Must be in bit-parallel storage format and in a compatible shape.
-        :type stimuli: :py:class:`~kyupy.logic.BPArray`
-        :returns: The given stimuli object.
-        """
-        for node, stim in zip(self.interface, stimuli.data if hasattr(stimuli, 'data') else stimuli):
-            if len(node.outs) == 0: continue
-            if node.index in self.latch_dict:
-                self.latch_state[self.latch_dict[node.index]] = stim
-            else:
-                outputs = [self.state[line] if line else self.tmp[3] for line in node.outs]
-                self.node_fct[node]([stim], outputs)
-            for line in node.outs:
-                if line is not None: self.state_epoch[line.reader] = self.epoch
-        for n in self.circuit.nodes:
-            if n.kind in ('__const1__', '__const0__'):
-                outputs = [self.state[line] if line else self.tmp[3] for line in n.outs]
-                self.node_fct[n]([], outputs)
-                for line in n.outs:
-                    if line is not None: self.state_epoch[line.reader] = self.epoch
-        return stimuli
-
-    def capture(self, responses):
-        """Capture the current values at the primary outputs and in the state-elements (flip-flops).
-        For primary outputs, the logic value is stored unmodified in the given target array.
-        For flip-flops, the logic value is constructed from the previous state and the new state.
-
-        :param responses: A bit-parallel storage target for the responses in a compatible shape.
-        :type responses: :py:class:`~kyupy.logic.BPArray`
-        :returns: The given responses object.
+        * ``s[0]`` Assigned values. Simulator will read (P)PI value from here.
+        * ``s[1]`` Result values. Simulator will write (P)PO values here.
+
+        Access this array to assign new values to the (P)PIs or read values from the (P)POs.
         """
-        for node, resp in zip(self.interface, responses.data if hasattr(responses, 'data') else responses):
-            if len(node.ins) == 0: continue
-            if node.index in self.latch_dict:
-                resp[...] = self.state[node.outs[0]]
-            else:
-                resp[...] = self.state[node.ins[0]]
-            # FIXME: unclear why we should use outs for DFFs
-            #if self.m > 2 and 'dff' in node.kind.lower() and len(node.outs) > 0:
-            #    if node.outs[0] is None:
-            #        resp[1, :] = ~self.state[node.outs[1], 0, :]  # assume QN is connected, take inverse of that.
-            #    else:
-            #        resp[1, :] = self.state[node.outs[0], 0, :]
-            #    if self.m > 4:
-            #        resp[..., 2, :] = resp[..., 0, :] ^ resp[..., 1, :]
-            #    # We don't handle X or - correctly.
-
-        return responses
-
-    def propagate(self, inject_cb=None):
-        """Propagate the input values towards the outputs (Perform all logic operations in topological order).
+        self.s[:,:,1,:] = 255  # unassigned
 
-        If the circuit is sequential (it contains flip-flops), one call simulates one clock cycle.
-        Multiple clock cycles are simulated by a assign-propagate-capture loop:
+    def __repr__(self):
+        return f'{{name: "{self.circuit.name}", sims: {self.sims}, m: {self.m}, c_bytes: {self.c.nbytes}}}'
+
+    def s_to_c(self):
+        """Copies the values from ``s[0]`` the inputs of the combinational portion.
+        """
+        self.c[self.pippi_c_locs] = self.s[0, self.pippi_s_locs, :self.mdim]
 
-        .. code-block:: python
+    def c_prop(self, inject_cb=None):
+        """Propagate the input values through the combinational circuit towards the outputs.
 
-           # initial state in state_bp
-           for cycle in range(10):  # simulate 10 clock cycles
-               sim.assign(state_bp)
-               sim.propagate()
-               sim.capture(state_bp)
+        Performs all logic operations in topological order.
+        If the circuit is sequential (it contains flip-flops), one call simulates one clock cycle.
 
         :param inject_cb: A callback function for manipulating intermediate signal values.
             This function is called with a line and its new logic values (in bit-parallel format) after
@@ -138,83 +63,273 @@ class LogicSim:
             resumes with the manipulated values after the callback returns.
         :type inject_cb: ``f(Line, ndarray)``
         """
-        for node in self.circuit.topological_order():
-            if self.state_epoch[node] != self.epoch: continue
-            inputs = [self.state[line] if line else self.zero for line in node.ins]
-            outputs = [self.state[line] if line else self.tmp[3] for line in node.outs]
-            if node.index in self.latch_dict:
-                inputs.append(self.latch_state[self.latch_dict[node.index]])
-            self.node_fct[node](inputs, outputs)
-            for line in node.outs:
-                if inject_cb is not None: inject_cb(line, self.state[line])
-                self.state_epoch[line.reader] = self.epoch
-        self.epoch = (self.epoch + 1) % 128
-
-    def cycle(self, state, inject_cb=None):
-        """Assigns the given state, propagates it and captures the new state.
-
-        :param state: A bit-parallel array in a compatible shape holding the current circuit state.
-            The contained data is assigned to the PI and PPI and overwritten by data at the PO and PPO after
-            propagation.
-        :type state: :py:class:`~kyupy.logic.BPArray`
-        :param inject_cb: A callback function for manipulating intermediate signal values. See :py:func:`propagate`.
-        :returns: The given state object.
+        t0 = self.c_locs[self.tmp_idx]
+        t1 = self.c_locs[self.tmp2_idx]
+        if self.m == 2:
+            if inject_cb is None:
+                _prop_cpu(self.ops, self.c_locs, self.c)
+            else:
+                for op, o0, i0, i1, i2, i3 in self.ops[:,:6]:
+                    o0, i0, i1, i2, i3 = [self.c_locs[x] for x in (o0, i0, i1, i2, i3)]
+                    if op == sim.BUF1: self.c[o0]=self.c[i0]
+                    elif op == sim.INV1: self.c[o0] = ~self.c[i0]
+                    elif op == sim.AND2: self.c[o0] = self.c[i0] & self.c[i1]
+                    elif op == sim.AND3: self.c[o0] = self.c[i0] & self.c[i1] & self.c[i2]
+                    elif op == sim.AND4: self.c[o0] = self.c[i0] & self.c[i1] & self.c[i2] & self.c[i3]
+                    elif op == sim.NAND2: self.c[o0] = ~(self.c[i0] & self.c[i1])
+                    elif op == sim.NAND3: self.c[o0] = ~(self.c[i0] & self.c[i1] & self.c[i2])
+                    elif op == sim.NAND4: self.c[o0] = ~(self.c[i0] & self.c[i1] & self.c[i2] & self.c[i3])
+                    elif op == sim.OR2: self.c[o0] = self.c[i0] | self.c[i1]
+                    elif op == sim.OR3: self.c[o0] = self.c[i0] | self.c[i1] | self.c[i2]
+                    elif op == sim.OR4: self.c[o0] = self.c[i0] | self.c[i1] | self.c[i2] | self.c[i3]
+                    elif op == sim.NOR2: self.c[o0] = ~(self.c[i0] | self.c[i1])
+                    elif op == sim.NOR3: self.c[o0] = ~(self.c[i0] | self.c[i1] | self.c[i2])
+                    elif op == sim.NOR4: self.c[o0] = ~(self.c[i0] | self.c[i1] | self.c[i2] | self.c[i3])
+                    elif op == sim.XOR2: self.c[o0] = self.c[i0] ^ self.c[i1]
+                    elif op == sim.XOR3: self.c[o0] = self.c[i0] ^ self.c[i1] ^ self.c[i2]
+                    elif op == sim.XOR4: self.c[o0] = self.c[i0] ^ self.c[i1] ^ self.c[i2] ^ self.c[i3]
+                    elif op == sim.XNOR2: self.c[o0] = ~(self.c[i0] ^ self.c[i1])
+                    elif op == sim.XNOR3: self.c[o0] = ~(self.c[i0] ^ self.c[i1] ^ self.c[i2])
+                    elif op == sim.XNOR4: self.c[o0] = ~(self.c[i0] ^ self.c[i1] ^ self.c[i2] ^ self.c[i3])
+                    elif op == sim.AO21: self.c[o0] = (self.c[i0] & self.c[i1]) | self.c[i2]
+                    elif op == sim.AOI21: self.c[o0] = ~((self.c[i0] & self.c[i1]) | self.c[i2])
+                    elif op == sim.OA21: self.c[o0] = (self.c[i0] | self.c[i1]) & self.c[i2]
+                    elif op == sim.OAI21: self.c[o0] = ~((self.c[i0] | self.c[i1]) & self.c[i2])
+                    elif op == sim.AO22: self.c[o0] = (self.c[i0] & self.c[i1]) | (self.c[i2] & self.c[i3])
+                    elif op == sim.AOI22: self.c[o0] = ~((self.c[i0] & self.c[i1]) | (self.c[i2] & self.c[i3]))
+                    elif op == sim.OA22: self.c[o0] = (self.c[i0] | self.c[i1]) & (self.c[i2] | self.c[i3])
+                    elif op == sim.OAI22: self.c[o0] = ~((self.c[i0] | self.c[i1]) & (self.c[i2] | self.c[i3]))
+                    elif op == sim.AO211: self.c[o0] =  (self.c[i0] & self.c[i1]) | self.c[i2] | self.c[i3]
+                    elif op == sim.AOI211:self.c[o0] = ~((self.c[i0] & self.c[i1]) | self.c[i2] | self.c[i3])
+                    elif op == sim.OA211: self.c[o0] =  (self.c[i0] | self.c[i1]) & self.c[i2] & self.c[i3]
+                    elif op == sim.OAI211:self.c[o0] = ~((self.c[i0] | self.c[i1]) & self.c[i2] & self.c[i3])
+                    elif op == sim.MUX21: self.c[o0] = (self.c[i0] & ~self.c[i2]) | (self.c[i1] & self.c[i2])
+                    else: print(f'unknown op {op}')
+                    inject_cb(o0, self.s[o0])
+        elif self.m == 4:
+            for op, o0, i0, i1, i2, i3 in self.ops[:,:6]:
+                o0, i0, i1, i2, i3 = [self.c_locs[x] for x in (o0, i0, i1, i2, i3)]
+                if op == sim.BUF1: self.c[o0]=self.c[i0]
+                elif op == sim.INV1: logic.bp4v_not(self.c[o0], self.c[i0])
+                elif op == sim.AND2: logic.bp4v_and(self.c[o0], self.c[i0], self.c[i1])
+                elif op == sim.AND3: logic.bp4v_and(self.c[o0], self.c[i0], self.c[i1], self.c[i2])
+                elif op == sim.AND4: logic.bp4v_and(self.c[o0], self.c[i0], self.c[i1], self.c[i2], self.c[i3])
+                elif op == sim.NAND2: logic.bp4v_and(self.c[o0], self.c[i0], self.c[i1]); logic.bp4v_not(self.c[o0], self.c[o0])
+                elif op == sim.NAND3: logic.bp4v_and(self.c[o0], self.c[i0], self.c[i1], self.c[i2]); logic.bp4v_not(self.c[o0], self.c[o0])
+                elif op == sim.NAND4: logic.bp4v_and(self.c[o0], self.c[i0], self.c[i1], self.c[i2], self.c[i3]); logic.bp4v_not(self.c[o0], self.c[o0])
+                elif op == sim.OR2: logic.bp4v_or(self.c[o0], self.c[i0], self.c[i1])
+                elif op == sim.OR3: logic.bp4v_or(self.c[o0], self.c[i0], self.c[i1], self.c[i2])
+                elif op == sim.OR4: logic.bp4v_or(self.c[o0], self.c[i0], self.c[i1], self.c[i2], self.c[i3])
+                elif op == sim.NOR2: logic.bp4v_or(self.c[o0], self.c[i0], self.c[i1]); logic.bp4v_not(self.c[o0], self.c[o0])
+                elif op == sim.NOR3: logic.bp4v_or(self.c[o0], self.c[i0], self.c[i1], self.c[i2]); logic.bp4v_not(self.c[o0], self.c[o0])
+                elif op == sim.NOR4: logic.bp4v_or(self.c[o0], self.c[i0], self.c[i1], self.c[i2], self.c[i3]); logic.bp4v_not(self.c[o0], self.c[o0])
+                elif op == sim.XOR2: logic.bp4v_xor(self.c[o0], self.c[i0], self.c[i1])
+                elif op == sim.XOR3: logic.bp4v_xor(self.c[o0], self.c[i0], self.c[i1], self.c[i2])
+                elif op == sim.XOR4: logic.bp4v_xor(self.c[o0], self.c[i0], self.c[i1], self.c[i2], self.c[i3])
+                elif op == sim.XNOR2: logic.bp4v_xor(self.c[o0], self.c[i0], self.c[i1]); logic.bp4v_not(self.c[o0], self.c[o0])
+                elif op == sim.XNOR3: logic.bp4v_xor(self.c[o0], self.c[i0], self.c[i1], self.c[i2]); logic.bp4v_not(self.c[o0], self.c[o0])
+                elif op == sim.XNOR4: logic.bp4v_xor(self.c[o0], self.c[i0], self.c[i1], self.c[i2], self.c[i3]); logic.bp4v_not(self.c[o0], self.c[o0])
+                elif op == sim.AO21:
+                    logic.bp4v_and(self.c[t0], self.c[i0], self.c[i1])
+                    logic.bp4v_or(self.c[o0], self.c[t0], self.c[i2])
+                elif op == sim.AOI21:
+                    logic.bp4v_and(self.c[t0], self.c[i0], self.c[i1])
+                    logic.bp4v_or(self.c[o0], self.c[t0], self.c[i2])
+                    logic.bp4v_not(self.c[o0], self.c[o0])
+                elif op == sim.OA21:
+                    logic.bp4v_or(self.c[t0], self.c[i0], self.c[i1])
+                    logic.bp4v_and(self.c[o0], self.c[t0], self.c[i2])
+                elif op == sim.OAI21:
+                    logic.bp4v_or(self.c[t0], self.c[i0], self.c[i1])
+                    logic.bp4v_and(self.c[o0], self.c[t0], self.c[i2])
+                    logic.bp4v_not(self.c[o0], self.c[o0])
+                elif op == sim.AO22:
+                    logic.bp4v_and(self.c[t0], self.c[i0], self.c[i1])
+                    logic.bp4v_and(self.c[t1], self.c[i2], self.c[i3])
+                    logic.bp4v_or(self.c[o0], self.c[t0], self.c[t1])
+                elif op == sim.AOI22:
+                    logic.bp4v_and(self.c[t0], self.c[i0], self.c[i1])
+                    logic.bp4v_and(self.c[t1], self.c[i2], self.c[i3])
+                    logic.bp4v_or(self.c[o0], self.c[t0], self.c[t1])
+                    logic.bp4v_not(self.c[o0], self.c[o0])
+                elif op == sim.OA22:
+                    logic.bp4v_or(self.c[t0], self.c[i0], self.c[i1])
+                    logic.bp4v_or(self.c[t1], self.c[i2], self.c[i3])
+                    logic.bp4v_and(self.c[o0], self.c[t0], self.c[t1])
+                elif op == sim.OAI22:
+                    logic.bp4v_or(self.c[t0], self.c[i0], self.c[i1])
+                    logic.bp4v_or(self.c[t1], self.c[i2], self.c[i3])
+                    logic.bp4v_and(self.c[o0], self.c[t0], self.c[t1])
+                    logic.bp4v_not(self.c[o0], self.c[o0])
+                elif op == sim.AO211:
+                    logic.bp4v_and(self.c[t0], self.c[i0], self.c[i1])
+                    logic.bp4v_or(self.c[o0], self.c[t0], self.c[i2], self.c[i3])
+                elif op == sim.AOI211:
+                    logic.bp4v_and(self.c[t0], self.c[i0], self.c[i1])
+                    logic.bp4v_or(self.c[o0], self.c[t0], self.c[i2], self.c[i3])
+                    logic.bp4v_not(self.c[o0], self.c[o0])
+                elif op == sim.OA211:
+                    logic.bp4v_or(self.c[t0], self.c[i0], self.c[i1])
+                    logic.bp4v_and(self.c[o0], self.c[t0], self.c[i2], self.c[i3])
+                elif op == sim.OAI211:
+                    logic.bp4v_or(self.c[t0], self.c[i0], self.c[i1])
+                    logic.bp4v_and(self.c[o0], self.c[t0], self.c[i2], self.c[i3])
+                    logic.bp4v_not(self.c[o0], self.c[o0])
+                elif op == sim.MUX21:
+                    logic.bp4v_not(self.c[t1], self.c[i2])
+                    logic.bp4v_and(self.c[t0], self.c[i0], self.c[t1])
+                    logic.bp4v_and(self.c[t1], self.c[i1], self.c[i2])
+                    logic.bp4v_or(self.c[o0], self.c[t0], self.c[t1])
+                else: print(f'unknown op {op}')
+        else:
+            for op, o0, i0, i1, i2, i3 in self.ops[:,:6]:
+                o0, i0, i1, i2, i3 = [self.c_locs[x] for x in (o0, i0, i1, i2, i3)]
+                if op == sim.BUF1: self.c[o0]=self.c[i0]
+                elif op == sim.INV1: logic.bp8v_not(self.c[o0], self.c[i0])
+                elif op == sim.AND2: logic.bp8v_and(self.c[o0], self.c[i0], self.c[i1])
+                elif op == sim.AND3: logic.bp8v_and(self.c[o0], self.c[i0], self.c[i1], self.c[i2])
+                elif op == sim.AND4: logic.bp8v_and(self.c[o0], self.c[i0], self.c[i1], self.c[i2], self.c[i3])
+                elif op == sim.NAND2: logic.bp8v_and(self.c[o0], self.c[i0], self.c[i1]); logic.bp8v_not(self.c[o0], self.c[o0])
+                elif op == sim.NAND3: logic.bp8v_and(self.c[o0], self.c[i0], self.c[i1], self.c[i2]); logic.bp8v_not(self.c[o0], self.c[o0])
+                elif op == sim.NAND4: logic.bp8v_and(self.c[o0], self.c[i0], self.c[i1], self.c[i2], self.c[i3]); logic.bp8v_not(self.c[o0], self.c[o0])
+                elif op == sim.OR2: logic.bp8v_or(self.c[o0], self.c[i0], self.c[i1])
+                elif op == sim.OR3: logic.bp8v_or(self.c[o0], self.c[i0], self.c[i1], self.c[i2])
+                elif op == sim.OR4: logic.bp8v_or(self.c[o0], self.c[i0], self.c[i1], self.c[i2], self.c[i3])
+                elif op == sim.NOR2: logic.bp8v_or(self.c[o0], self.c[i0], self.c[i1]); logic.bp8v_not(self.c[o0], self.c[o0])
+                elif op == sim.NOR3: logic.bp8v_or(self.c[o0], self.c[i0], self.c[i1], self.c[i2]); logic.bp8v_not(self.c[o0], self.c[o0])
+                elif op == sim.NOR4: logic.bp8v_or(self.c[o0], self.c[i0], self.c[i1], self.c[i2], self.c[i3]); logic.bp8v_not(self.c[o0], self.c[o0])
+                elif op == sim.XOR2: logic.bp8v_xor(self.c[o0], self.c[i0], self.c[i1])
+                elif op == sim.XOR3: logic.bp8v_xor(self.c[o0], self.c[i0], self.c[i1], self.c[i2])
+                elif op == sim.XOR4: logic.bp8v_xor(self.c[o0], self.c[i0], self.c[i1], self.c[i2], self.c[i3])
+                elif op == sim.XNOR2: logic.bp8v_xor(self.c[o0], self.c[i0], self.c[i1]); logic.bp8v_not(self.c[o0], self.c[o0])
+                elif op == sim.XNOR3: logic.bp8v_xor(self.c[o0], self.c[i0], self.c[i1], self.c[i2]); logic.bp8v_not(self.c[o0], self.c[o0])
+                elif op == sim.XNOR4: logic.bp8v_xor(self.c[o0], self.c[i0], self.c[i1], self.c[i2], self.c[i3]); logic.bp8v_not(self.c[o0], self.c[o0])
+                elif op == sim.AO21:
+                    logic.bp8v_and(self.c[t0], self.c[i0], self.c[i1])
+                    logic.bp8v_or(self.c[o0], self.c[t0], self.c[i2])
+                elif op == sim.AOI21:
+                    logic.bp8v_and(self.c[t0], self.c[i0], self.c[i1])
+                    logic.bp8v_or(self.c[o0], self.c[t0], self.c[i2])
+                    logic.bp8v_not(self.c[o0], self.c[o0])
+                elif op == sim.OA21:
+                    logic.bp8v_or(self.c[t0], self.c[i0], self.c[i1])
+                    logic.bp8v_and(self.c[o0], self.c[t0], self.c[i2])
+                elif op == sim.OAI21:
+                    logic.bp8v_or(self.c[t0], self.c[i0], self.c[i1])
+                    logic.bp8v_and(self.c[o0], self.c[t0], self.c[i2])
+                    logic.bp8v_not(self.c[o0], self.c[o0])
+                elif op == sim.AO22:
+                    logic.bp8v_and(self.c[t0], self.c[i0], self.c[i1])
+                    logic.bp8v_and(self.c[t1], self.c[i2], self.c[i3])
+                    logic.bp8v_or(self.c[o0], self.c[t0], self.c[t1])
+                elif op == sim.AOI22:
+                    logic.bp8v_and(self.c[t0], self.c[i0], self.c[i1])
+                    logic.bp8v_and(self.c[t1], self.c[i2], self.c[i3])
+                    logic.bp8v_or(self.c[o0], self.c[t0], self.c[t1])
+                    logic.bp8v_not(self.c[o0], self.c[o0])
+                elif op == sim.OA22:
+                    logic.bp8v_or(self.c[t0], self.c[i0], self.c[i1])
+                    logic.bp8v_or(self.c[t1], self.c[i2], self.c[i3])
+                    logic.bp8v_and(self.c[o0], self.c[t0], self.c[t1])
+                elif op == sim.OAI22:
+                    logic.bp8v_or(self.c[t0], self.c[i0], self.c[i1])
+                    logic.bp8v_or(self.c[t1], self.c[i2], self.c[i3])
+                    logic.bp8v_and(self.c[o0], self.c[t0], self.c[t1])
+                    logic.bp8v_not(self.c[o0], self.c[o0])
+                elif op == sim.AO211:
+                    logic.bp8v_and(self.c[t0], self.c[i0], self.c[i1])
+                    logic.bp8v_or(self.c[o0], self.c[t0], self.c[i2], self.c[i3])
+                elif op == sim.AOI211:
+                    logic.bp8v_and(self.c[t0], self.c[i0], self.c[i1])
+                    logic.bp8v_or(self.c[o0], self.c[t0], self.c[i2], self.c[i3])
+                    logic.bp8v_not(self.c[o0], self.c[o0])
+                elif op == sim.OA211:
+                    logic.bp8v_or(self.c[t0], self.c[i0], self.c[i1])
+                    logic.bp8v_and(self.c[o0], self.c[t0], self.c[i2], self.c[i3])
+                elif op == sim.OAI211:
+                    logic.bp8v_or(self.c[t0], self.c[i0], self.c[i1])
+                    logic.bp8v_and(self.c[o0], self.c[t0], self.c[i2], self.c[i3])
+                    logic.bp8v_not(self.c[o0], self.c[o0])
+                elif op == sim.MUX21:
+                    logic.bp8v_not(self.c[t1], self.c[i2])
+                    logic.bp8v_and(self.c[t0], self.c[i0], self.c[t1])
+                    logic.bp8v_and(self.c[t1], self.c[i1], self.c[i2])
+                    logic.bp8v_or(self.c[o0], self.c[t0], self.c[t1])
+                else: print(f'unknown op {op}')
+                if inject_cb is not None: inject_cb(o0, self.s[o0])
+
+    def c_to_s(self):
+        """Copies (captures) the results of the combinational portion to ``s[1]``.
         """
-        self.assign(state)
-        self.propagate(inject_cb)
-        return self.capture(state)
-
-    def fork_fct(self, inputs, outputs):
-        for o in outputs: o[...] = inputs[0]
+        self.s[1, self.poppo_s_locs, :self.mdim] = self.c[self.poppo_c_locs]
+        if self.mdim == 1:
+            self.s[1, self.poppo_s_locs, 1:2] = self.c[self.poppo_c_locs]
 
-    def const0_fct(self, _, outputs):
-        for o in outputs: o[...] = 0
+    def s_ppo_to_ppi(self):
+        """Constructs a new assignment based on the current data in ``s``.
 
-    def const1_fct(self, _, outputs):
-        for o in outputs:
-            o[...] = 0
-            logic.bp_not(o, o)
+        Use this function for simulating consecutive clock cycles.
 
-    def not_fct(self, inputs, outputs):
-        logic.bp_not(outputs[0], inputs[0])
-
-    def and_fct(self, inputs, outputs):
-        logic.bp_and(outputs[0], *inputs)
-
-    def or_fct(self, inputs, outputs):
-        logic.bp_or(outputs[0], *inputs)
-
-    def xor_fct(self, inputs, outputs):
-        logic.bp_xor(outputs[0], *inputs)
-
-    def sdff_fct(self, inputs, outputs):
-        logic.bp_buf(outputs[0], inputs[0])
-        if len(outputs) > 1:
-            logic.bp_not(outputs[1], inputs[0])
-
-    def dff_fct(self, inputs, outputs):
-        logic.bp_buf(outputs[0], inputs[0])
-        if len(outputs) > 1:
-            logic.bp_not(outputs[1], inputs[0])
-
-    def latch_fct(self, inputs, outputs):
-        logic.bp_latch(outputs[0], inputs[0], inputs[1], inputs[2])
-        if len(outputs) > 1:
-            logic.bp_not(outputs[1], inputs[0])
-
-    def nand_fct(self, inputs, outputs):
-        logic.bp_and(outputs[0], *inputs)
-        logic.bp_not(outputs[0], outputs[0])
-
-    def nor_fct(self, inputs, outputs):
-        logic.bp_or(outputs[0], *inputs)
-        logic.bp_not(outputs[0], outputs[0])
-
-    def xnor_fct(self, inputs, outputs):
-        logic.bp_xor(outputs[0], *inputs)
-        logic.bp_not(outputs[0], outputs[0])
-
-    def aoi21_fct(self, inputs, outputs):
-        logic.bp_and(self.tmp[0], inputs[0], inputs[1])
-        logic.bp_or(outputs[0], self.tmp[0], inputs[2])
-        logic.bp_not(outputs[0], outputs[0])
\ No newline at end of file
+        For 2-valued or 4-valued simulations, all valued from PPOs (in ``s[1]``) and copied to the PPIs (in ``s[0]``).
+        For 8-valued simulations, PPI transitions are constructed from the final values of the assignment (in ``s[0]``) and the
+        final values of the results (in ``s[1]``).
+        """
+        # TODO: handle latches correctly
+        if self.mdim < 3:
+            self.s[0, self.ppio_s_locs] = self.s[1, self.ppio_s_locs]
+        else:
+            self.s[0, self.ppio_s_locs, 1] = self.s[0, self.ppio_s_locs, 0]  # initial value is previously assigned final value
+            self.s[0, self.ppio_s_locs, 0] = self.s[1, self.ppio_s_locs, 0]  # final value is newly captured final value
+            self.s[0, self.ppio_s_locs, 2] = self.s[0, self.ppio_s_locs, 0] ^ self.s[0, self.ppio_s_locs, 1]  # TODO: not correct for X, -
+
+    def cycle(self, cycles: int = 1, inject_cb=None):
+        """Repeatedly assigns a state, propagates it, captures the new state, and transfers PPOs to PPIs.
+
+        :param cycles: The number of cycles to simulate.
+        :param inject_cb: A callback function for manipulating intermediate signal values. See :py:func:`c_prop`.
+        """
+        for _ in range(cycles):
+            self.s_to_c()
+            self.c_prop(inject_cb)
+            self.c_to_s()
+            self.s_ppo_to_ppi()
+
+
+@numba.njit
+def _prop_cpu(ops, c_locs, c):
+    for op, o0, i0, i1, i2, i3 in ops[:,:6]:
+        o0, i0, i1, i2, i3 = [c_locs[x] for x in (o0, i0, i1, i2, i3)]
+        if op == sim.BUF1: c[o0]=c[i0]
+        elif op == sim.INV1: c[o0] = ~c[i0]
+        elif op == sim.AND2: c[o0] = c[i0] & c[i1]
+        elif op == sim.AND3: c[o0] = c[i0] & c[i1] & c[i2]
+        elif op == sim.AND4: c[o0] = c[i0] & c[i1] & c[i2] & c[i3]
+        elif op == sim.NAND2: c[o0] = ~(c[i0] & c[i1])
+        elif op == sim.NAND3: c[o0] = ~(c[i0] & c[i1] & c[i2])
+        elif op == sim.NAND4: c[o0] = ~(c[i0] & c[i1] & c[i2] & c[i3])
+        elif op == sim.OR2: c[o0] = c[i0] | c[i1]
+        elif op == sim.OR3: c[o0] = c[i0] | c[i1] | c[i2]
+        elif op == sim.OR4: c[o0] = c[i0] | c[i1] | c[i2] | c[i3]
+        elif op == sim.NOR2: c[o0] = ~(c[i0] | c[i1])
+        elif op == sim.NOR3: c[o0] = ~(c[i0] | c[i1] | c[i2])
+        elif op == sim.NOR4: c[o0] = ~(c[i0] | c[i1] | c[i2] | c[i3])
+        elif op == sim.XOR2: c[o0] = c[i0] ^ c[i1]
+        elif op == sim.XOR3: c[o0] = c[i0] ^ c[i1] ^ c[i2]
+        elif op == sim.XOR4: c[o0] = c[i0] ^ c[i1] ^ c[i2] ^ c[i3]
+        elif op == sim.XNOR2: c[o0] = ~(c[i0] ^ c[i1])
+        elif op == sim.XNOR3: c[o0] = ~(c[i0] ^ c[i1] ^ c[i2])
+        elif op == sim.XNOR4: c[o0] = ~(c[i0] ^ c[i1] ^ c[i2] ^ c[i3])
+        elif op == sim.AO21: c[o0] = (c[i0] & c[i1]) | c[i2]
+        elif op == sim.OA21: c[o0] = (c[i0] | c[i1]) & c[i2]
+        elif op == sim.AO22: c[o0] = (c[i0] & c[i1]) | (c[i2] & c[i3])
+        elif op == sim.OA22: c[o0] = (c[i0] | c[i1]) & (c[i2] | c[i3])
+        elif op == sim.AOI21: c[o0] = ~((c[i0] & c[i1]) | c[i2])
+        elif op == sim.OAI21: c[o0] = ~((c[i0] | c[i1]) & c[i2])
+        elif op == sim.AOI22: c[o0] = ~((c[i0] & c[i1]) | (c[i2] & c[i3]))
+        elif op == sim.OAI22: c[o0] = ~((c[i0] | c[i1]) & (c[i2] | c[i3]))
+        elif op == sim.AO211: c[o0] = (c[i0] & c[i1]) | c[i2] | c[i3]
+        elif op == sim.OA211: c[o0] = (c[i0] | c[i1]) & c[i2] & c[i3]
+        elif op == sim.AOI211: c[o0] = ~((c[i0] & c[i1]) | c[i2] | c[i3])
+        elif op == sim.OAI211: c[o0] = ~((c[i0] | c[i1]) & c[i2] & c[i3])
+        elif op == sim.MUX21: c[o0] = (c[i0] & ~c[i2]) | (c[i1] & c[i2])
+        else: print(f'unknown op {op}')
diff --git a/src/kyupy/sdf.py b/src/kyupy/sdf.py
index c1e3ebf..015f975 100644
--- a/src/kyupy/sdf.py
+++ b/src/kyupy/sdf.py
@@ -1,11 +1,10 @@
 """A simple and incomplete parser for the Standard Delay Format (SDF).
 
-The main purpose of this parser is to extract pin-to-pin delay and interconnect delay information from SDF files.
-Sophisticated timing specifications (timing checks, conditional delays, etc.) are currently not supported.
-
-The functions :py:func:`load` and :py:func:`read` return an intermediate representation (:class:`DelayFile` object).
-Call :py:func:`DelayFile.annotation` to match the intermediate representation to a given circuit.
+This parser extracts pin-to-pin delay and interconnect delay information from SDF files.
+Sophisticated timing specifications (timing checks, conditional delays, etc.) are ignored.
 
+The functions :py:func:`parse` and :py:func:`load` return an intermediate representation (:class:`DelayFile` object).
+Call :py:func:`DelayFile.iopaths` and :py:func:`DelayFile.interconnects` to generate delay information for a given circuit.
 """
 
 from collections import namedtuple
@@ -15,6 +14,7 @@ import numpy as np
 from lark import Lark, Transformer
 
 from . import log, readtext
+from .circuit import Circuit
 from .techlib import TechLib
 
 
@@ -27,145 +27,112 @@ class DelayFile:
     """
     def __init__(self, name, cells):
         self.name = name
-        if None in cells:
-            self.interconnects = cells[None]
-        else:
-            self.interconnects = None
+        self._interconnects = cells.get(None, None)
         self.cells = dict((n, l) for n, l in cells.items() if n)
 
     def __repr__(self):
         return '\n'.join(f'{n}: {l}' for n, l in self.cells.items()) + '\n' + \
-               '\n'.join(str(i) for i in self.interconnects)
-
-    def annotation(self, circuit, tlib=TechLib(), dataset=1, interconnect=True, ffdelays=True):
-        """Constructs an 3-dimensional ndarray with timing data for each line in ``circuit``.
-
-        An IOPATH delay for a node is annotated to the line connected to the input pin specified in the IOPATH.
-
-        Currently, only ABSOLUTE IOPATH and INTERCONNECT delays are supported.
-        Pulse rejection limits are derived from absolute delays, explicit declarations (PATHPULSE etc.) are ignored.
-
-        :param circuit: The circuit to annotate. Names from the STIL file are matched to the node names.
-        :type circuit: :class:`~kyupy.circuit.Circuit`
-        :param tlib: A technology library object that provides pin name mappings.
-        :type tlib: :py:class:`~kyupy.techlib.TechLib`
-        :param dataset: SDFs store multiple values for each delay (e.g. minimum, typical, maximum).
-            An integer selects the dataset to use (default is 1 for 'typical').
-            If a tuple is given, the annotator will calculate the average of multiple datasets.
-        :type dataset: ``int`` or ``tuple``
-        :param interconnect: Whether or not to include the delays of interconnects in the annotation.
-            To properly annotate interconnect delays, the circuit model has to include a '__fork__' node on
-            every signal and every fanout-branch. The Verilog parser aids in this by setting the parameter
-            `branchforks=True` in :py:func:`kyupy.verilog.parse`.
-        :type interconnect: ``bool``
-        :param ffdelays: Whether or not to include the delays of flip-flops in the annotation.
-        :type ffdelays: ``bool``
-        :return: A 3-dimensional ndarray with timing data.
-
-            * Axis 0: line index.
-            * Axis 1: type of timing data: 0='delay', 1='pulse rejection limit'.
-            * Axis 2: The polarity of the output transition of the reading node: 0='rising', 1='falling'.
-
-            The polarity for pulse rejection is determined by the latter transition of the pulse.
-            E.g., ``timing[42, 1, 0]`` is the rejection limit of a negative pulse at the output
-            of the reader of line 42.
+               '\n'.join(str(i) for i in self._interconnects)
+
+    def iopaths(self, circuit:Circuit, tlib:TechLib):
+        """Constructs an ndarray containing all IOPATH delays.
+
+        All IOPATH delays for a node ``n`` are annotated to the line connected to the input pin specified in the IOPATH.
+
+        Limited support of SDF spec:
+
+        * Only ABSOLUTE delay values are supported.
+        * Only two delvals per delval_list is supported. First is rising/posedge, second is falling/negedge
+          transition at the output of the IOPATH (SDF spec, pp. 3-17).
+        * PATHPULSE declarations are ignored.
+
+        The axes convention of KyuPy's delay data arrays is as follows:
+
+        * Axis 0: dataset (usually 3 datasets per SDF-file)
+        * Axis 1: line index (e.g. ``n.ins[0]``, ``n.ins[1]``)
+        * Axis 2: polarity of the transition at the IOPATH-input (e.g. at ``n.ins[0]`` or ``n.ins[1]``), 0='rising/posedge', 1='falling/negedge'
+        * Axis 3: polarity of the transition at the IOPATH-output (at ``n.outs[0]``), 0='rising/posedge', 1='falling/negedge'
         """
-        def select_del(_delvals, idx):
-            if isinstance(dataset, tuple):
-                return sum(_delvals[idx][d] for d in dataset) / len(dataset)
-            return _delvals[idx][dataset]
-
-        def find_cell(name):
-            if name not in circuit.cells:
-                name = name.replace('\\', '')
-            if name not in circuit.cells:
-                name = name.replace('[', '_').replace(']', '_')
-            if name not in circuit.cells:
-                return None
-            return circuit.cells[name]
-
-        timing = np.zeros((len(circuit.lines), 2, 2))
-        for cn, iopaths in self.cells.items():
-            for ipn, opn, *delvals in iopaths:
-                delvals = [d if len(d) > 0 else [0, 0, 0] for d in delvals]
-                if max(max(delvals)) == 0:
-                    continue
-                cell = find_cell(cn)
-                if cell is None:
-                    #log.warn(f'Cell from SDF not found in circuit: {cn}')
-                    continue
-                ipn = re.sub(r'\((neg|pos)edge ([^)]+)\)', r'\2', ipn)
-                ipin = tlib.pin_index(cell.kind, ipn)
-                opin = tlib.pin_index(cell.kind, opn)
-                kind = cell.kind.lower()
-
-                def add_delays(_line):
-                    if _line is not None:
-                        timing[_line, :, 0] += select_del(delvals, 0)
-                        timing[_line, :, 1] += select_del(delvals, 1)
-
-                take_avg = False
-                if kind.startswith('sdff'):
-                    if not ipn.startswith('CLK'):
-                        continue
-                    if ffdelays and (len(cell.outs) > opin):
-                        add_delays(cell.outs[opin])
-                else:
-                    if ipin < len(cell.ins):
-                        if kind.startswith(('xor', 'xnor')):
-                            # print(ipn, ipin, times[cell.i_lines[ipin], 0, 0])
-                            take_avg = timing[cell.ins[ipin]].sum() > 0
-                        add_delays(cell.ins[ipin])
-                        if take_avg:
-                            timing[cell.ins[ipin]] /= 2
+
+        def find_cell(name:str):
+            if name not in circuit.cells: name = name.replace('\\', '')
+            if name not in circuit.cells: name = name.replace('[', '_').replace(']', '_')
+            return circuit.cells.get(name, None)
+
+        delays = np.zeros((len(circuit.lines), 2, 2, 3))  # dataset last during construction.
+
+        for name, iopaths in self.cells.items():
+            name = name.replace('\\', '')
+            if cell := circuit.cells.get(name, None):
+                for i_pin_spec, o_pin_spec, *dels in iopaths:
+                    if i_pin_spec.startswith('(posedge '): i_pol_idxs = [0]
+                    elif i_pin_spec.startswith('(negedge '): i_pol_idxs = [1]
+                    else: i_pol_idxs = [0, 1]
+                    i_pin_spec = re.sub(r'\((neg|pos)edge ([^)]+)\)', r'\2', i_pin_spec)
+                    if line := cell.ins[tlib.pin_index(cell.kind, i_pin_spec)]:
+                        delays[line, i_pol_idxs] = [d if len(d) > 0 else [0, 0, 0] for d in dels]
                     else:
-                        log.warn(f'No line to annotate pin {ipn} of {cell}')
+                        log.warn(f'No line to annotate in circuit: {i_pin_spec} for {cell}')
+            else:
+                log.warn(f'Name from SDF not found in circuit: {name}')
+
+        return np.moveaxis(delays, -1, 0)
+
+    def interconnects(self, circuit:Circuit, tlib:TechLib):
+        """Constructs an ndarray containing all INTERCONNECT delays.
+
+        To properly annotate interconnect delays, the circuit model has to include a '__fork__' node on
+        every signal and every fanout-branch. The Verilog parser aids in this by setting the parameter
+        `branchforks=True` in :py:func:`~kyupy.verilog.parse` or :py:func:`~kyupy.verilog.load`.
 
-        if not interconnect or self.interconnects is None:
-            return timing
+        Limited support of SDF spec:
 
-        for n1, n2, *delvals in self.interconnects:
+        * Only ABSOLUTE delay values are supported.
+        * Only two delvals per delval_list is supported. First is rising/posedge, second is falling/negedge
+          transition.
+        * PATHPULSE declarations are ignored.
+
+        The axes convention of KyuPy's delay data arrays is as follows:
+
+        * Axis 0: dataset (usually 3 datasets per SDF-file)
+        * Axis 1: line index. Usually input line of a __fork__.
+        * Axis 2: (axis of size 2 for compatability to IOPATH results. Values are broadcast along this axis.)
+        * Axis 3: polarity of the transition, 0='rising/posedge', 1='falling/negedge'
+        """
+
+        delays = np.zeros((len(circuit.lines), 2, 2, 3))  # dataset last during construction.
+
+        for n1, n2, *delvals in self._interconnects:
             delvals = [d if len(d) > 0 else [0, 0, 0] for d in delvals]
-            if max(max(delvals)) == 0:
+            if max(max(delvals)) == 0: continue
+            cn1, pn1 = n1.split('/') if '/' in n1 else (n1, None)
+            cn2, pn2 = n2.split('/') if '/' in n2 else (n2, None)
+            cn1 = cn1.replace('\\','')
+            cn2 = cn2.replace('\\','')
+            c1, c2 = circuit.cells[cn1], circuit.cells[cn2]
+            p1 = tlib.pin_index(c1.kind, pn1) if pn1 is not None else 0
+            p2 = tlib.pin_index(c2.kind, pn2) if pn2 is not None else 0
+            if len(c1.outs) <= p1 or c1.outs[p1] is None:
+                log.warn(f'No line to annotate pin {pn1} of {c1}')
                 continue
-            if '/' in n1:
-                i = n1.rfind('/')
-                cn1 = n1[0:i]
-                pn1 = n1[i+1:]
-            else:
-                cn1, pn1 = (n1, 'Z')
-            if '/' in n2:
-                i = n2.rfind('/')
-                cn2 = n2[0:i]
-                pn2 = n2[i+1:]
-            else:
-                cn2, pn2 = (n2, 'IN')
-            c1 = find_cell(cn1)
-            if c1 is None:
-                #log.warn(f'Cell from SDF not found in circuit: {cn1}')
-                continue
-            c2 = find_cell(cn2)
-            if c2 is None:
-                #log.warn(f'Cell from SDF not found in circuit: {cn2}')
-                continue
-            p1, p2 = tlib.pin_index(c1.kind, pn1), tlib.pin_index(c2.kind, pn2)
-            line = None
-            if len(c2.ins) <= p2:
+            if len(c2.ins) <= p2 or c2.ins[p2] is None:
                 log.warn(f'No line to annotate pin {pn2} of {c2}')
                 continue
-            f1, f2 = c1.outs[p1].reader, c2.ins[p2].driver
-            if f1 != f2:  # possible branchfork
-                assert len(f2.ins) == 1
+            f1, f2 = c1.outs[p1].reader, c2.ins[p2].driver  # find the forks between cells.
+            assert f1.kind == '__fork__'
+            assert f2.kind == '__fork__'
+            if f1 != f2:  # at least two forks, make sure f2 is a branchfork connected to f1
+                assert len(f2.outs) == 1
+                assert f1.outs[f2.ins[0].driver_pin] == f2.ins[0]
                 line = f2.ins[0]
-                assert f1.outs[f2.ins[0].driver_pin] == line
-            elif len(f2.outs) == 1:  # no fanout?
+            elif len(f2.outs) == 1:  # f1==f2, only OK when there is no fanout.
                 line = f2.ins[0]
-            if line is not None:
-                timing[line, :, 0] += select_del(delvals, 0)
-                timing[line, :, 1] += select_del(delvals, 1)
             else:
-                log.warn(f'No branchfork for annotating interconnect delay {c1.name}/{p1}->{c2.name}/{p2}')
-        return timing
+                log.warn(f'No branchfork to annotate interconnect delay {c1.name}/{p1}->{c2.name}/{p2}')
+                continue
+            delays[line, :] = delvals
+
+        return np.moveaxis(delays, -1, 0)
 
 
 def sanitize(args):
@@ -236,6 +203,6 @@ def parse(text):
 def load(file):
     """Parses the contents of ``file`` and returns a :class:`DelayFile` object.
 
-    The given file may be gzip compressed.
+    Files with `.gz`-suffix are decompressed on-the-fly.
     """
     return parse(readtext(file))
diff --git a/src/kyupy/sim.py b/src/kyupy/sim.py
new file mode 100644
index 0000000..de21b27
--- /dev/null
+++ b/src/kyupy/sim.py
@@ -0,0 +1,333 @@
+
+from collections import defaultdict
+from bisect import bisect, insort_left
+
+import numpy as np
+
+BUF1 = np.uint16(0b1010_1010_1010_1010)
+INV1 = ~BUF1
+
+AND2 = np.uint16(0b1000_1000_1000_1000)
+AND3 = np.uint16(0b1000_0000_1000_0000)
+AND4 = np.uint16(0b1000_0000_0000_0000)
+
+NAND2, NAND3, NAND4 = ~AND2, ~AND3, ~AND4
+
+OR2 = np.uint16(0b1110_1110_1110_1110)
+OR3 = np.uint16(0b1111_1110_1111_1110)
+OR4 = np.uint16(0b1111_1111_1111_1110)
+
+NOR2, NOR3, NOR4 = ~OR2, ~OR3, ~OR4
+
+XOR2 = np.uint16(0b0110_0110_0110_0110)
+XOR3 = np.uint16(0b1001_0110_1001_0110)
+XOR4 = np.uint16(0b0110_1001_1001_0110)
+
+XNOR2, XNOR3, XNOR4 = ~XOR2, ~XOR3, ~XOR4
+
+AO21 = np.uint16(0b1111_1000_1111_1000)  # (i0 & i1) | i2
+AO22 = np.uint16(0b1111_1000_1000_1000)  # (i0 & i1) | (i2 & i3)
+OA21 = np.uint16(0b1110_0000_1110_0000)  # (i0 | i1) & i2
+OA22 = np.uint16(0b1110_1110_1110_0000)  # (i0 | i1) & (i2 | i3)
+
+AOI21, AOI22, OAI21, OAI22 = ~AO21, ~AO22, ~OA21, ~OA22
+
+AO211 = np.uint16(0b1111_1111_1111_1000)  # (i0 & i1) | i2 | i3
+OA211 = np.uint16(0b1110_0000_0000_0000)  # (i0 | i1) & i2 & i3
+
+AOI211, OAI211 = ~AO211, ~OA211
+
+MUX21 = np.uint16(0b1100_1010_1100_1010)  # z = i1 if i2 else i0 (i2 is select)
+
+names = dict([(v, k) for k, v in globals().items() if isinstance(v, np.uint16)])
+
+kind_prefixes = {
+    'nand': (NAND4, NAND3, NAND2),
+    'nor': (NOR4, NOR3, NOR2),
+    'and': (AND4, AND3, AND2),
+    'or': (OR4, OR3, OR2),
+    'isolor': (OR2, OR2, OR2),
+    'xor': (XOR4, XOR3, XOR2),
+    'xnor': (XNOR4, XNOR3, XNOR2),
+
+    'not': (INV1, INV1, INV1),
+    'inv': (INV1, INV1, INV1),
+    'ibuf': (INV1, INV1, INV1),
+    '__const1__': (INV1, INV1, INV1),
+    'tieh': (INV1, INV1, INV1),
+
+    'buf': (BUF1, BUF1, BUF1),
+    'nbuf': (BUF1, BUF1, BUF1),
+    'delln': (BUF1, BUF1, BUF1),
+    '__const0__': (BUF1, BUF1, BUF1),
+    'tiel': (BUF1, BUF1, BUF1),
+
+    'ao211': (AO211, AO211, AO211),
+    'oa211': (OA211, OA211, OA211),
+    'aoi211': (AOI211, AOI211, AOI211),
+    'oai211': (OAI211, OAI211, OAI211),
+
+    'ao22': (AO22, AO22, AO22),
+    'aoi22': (AOI22, AOI22, AOI22),
+    'ao21': (AO21, AO21, AO21),
+    'aoi21': (AOI21, AOI21, AOI21),
+
+    'oa22': (OA22, OA22, OA22),
+    'oai22': (OAI22, OAI22, OAI22),
+    'oa21': (OA21, OA21, OA21),
+    'oai21': (OAI21, OAI21, OAI21),
+
+    'mux21': (MUX21, MUX21, MUX21),
+}
+
+class Heap:
+    def __init__(self):
+        self.chunks = dict()  # map start location to chunk size
+        self.released = list()  # chunks that were released
+        self.current_size = 0
+        self.max_size = 0
+
+    def alloc(self, size):
+        for idx, loc in enumerate(self.released):
+            if self.chunks[loc] == size:
+                del self.released[idx]
+                return loc
+            if self.chunks[loc] > size:  # split chunk
+                chunksize = self.chunks[loc]
+                self.chunks[loc] = size
+                self.chunks[loc + size] = chunksize - size
+                self.released[idx] = loc + size  # move released pointer: loc -> loc+size
+                return loc
+        # no previously released chunk; make new one
+        loc = self.current_size
+        self.chunks[loc] = size
+        self.current_size += size
+        self.max_size = max(self.max_size, self.current_size)
+        return loc
+
+    def free(self, loc):
+        size = self.chunks[loc]
+        if loc + size == self.current_size:  # end of managed area, remove chunk
+            del self.chunks[loc]
+            self.current_size -= size
+            # check and remove prev chunk if free
+            if len(self.released) > 0:
+                prev = self.released[-1]
+                if prev + self.chunks[prev] == self.current_size:
+                    chunksize = self.chunks[prev]
+                    del self.chunks[prev]
+                    del self.released[-1]
+                    self.current_size -= chunksize
+            return
+        released_idx = bisect(self.released, loc)
+        if released_idx < len(self.released) and loc + size == self.released[released_idx]:  # next chunk is free, merge
+            chunksize = size + self.chunks[loc + size]
+            del self.chunks[loc + size]
+            self.chunks[loc] = chunksize
+            size = self.chunks[loc]
+            self.released[released_idx] = loc
+        else:
+            insort_left(self.released, loc)  # put in a new release
+        if released_idx > 0:  # check if previous chunk is free
+            prev = self.released[released_idx - 1]
+            if prev + self.chunks[prev] == loc:  # previous chunk is adjacent to freed one, merge
+                chunksize = size + self.chunks[prev]
+                del self.chunks[loc]
+                self.chunks[prev] = chunksize
+                del self.released[released_idx]
+
+    def __repr__(self):
+        r = []
+        for loc in sorted(self.chunks.keys()):
+            size = self.chunks[loc]
+            released_idx = bisect(self.released, loc)
+            is_released = released_idx > 0 and len(self.released) > 0 and self.released[released_idx - 1] == loc
+            r.append(f'{loc:5d}: {"free" if is_released else "used"} {size}')
+        return "\n".join(r)
+
+
+class SimOps:
+    """A static scheduler that translates a Circuit into a topologically sorted list of basic logic operations (self.ops) and
+    a memory mapping (self.c_locs, self.c_caps) for use in simulators.
+
+    :param circuit: The circuit to create a schedule for.
+    :param strip_forks: If enabled, the scheduler will not include fork nodes to safe simulation time.
+        Stripping forks will cause interconnect delay annotations of lines read by fork nodes to be ignored.
+    :param c_reuse: If enabled, memory of intermediate signal waveforms will be re-used. This greatly reduces
+        memory footprint, but intermediate signal waveforms become unaccessible after a propagation.
+    """
+    def __init__(self, circuit, c_caps=1, c_caps_min=1, a_ctrl=None, c_reuse=False, strip_forks=False):
+        self.circuit = circuit
+        self.s_len = len(circuit.s_nodes)
+
+        if isinstance(c_caps, int):
+            c_caps = [c_caps] * (len(circuit.lines)+3)
+
+        if a_ctrl is None:
+            a_ctrl = np.zeros((len(circuit.lines)+3, 3), dtype=np.int32)  # add 3 for zero, tmp, tmp2
+            a_ctrl[:,0] = -1
+
+        # special locations and offsets in c_locs/c_caps
+        self.zero_idx = len(circuit.lines)
+        self.tmp_idx = self.zero_idx + 1
+        self.tmp2_idx = self.tmp_idx + 1
+        self.ppi_offset = self.tmp2_idx + 1
+        self.ppo_offset = self.ppi_offset + self.s_len
+        self.c_locs_len = self.ppo_offset + self.s_len
+
+        # translate circuit structure into self.ops
+        ops = []
+        interface_dict = dict((n, i) for i, n in enumerate(circuit.s_nodes))
+        for n in circuit.topological_order():
+            if n in interface_dict:
+                inp_idx = self.ppi_offset + interface_dict[n]
+                if len(n.outs) > 0 and n.outs[0] is not None:  # first output of a PI/PPI
+                    ops.append((BUF1, n.outs[0].index, inp_idx, self.zero_idx, self.zero_idx, self.zero_idx, *a_ctrl[n.outs[0]]))
+                if 'dff' in n.kind.lower():  # second output of DFF is inverted
+                    if len(n.outs) > 1 and n.outs[1] is not None:
+                        ops.append((INV1, n.outs[1].index, inp_idx, self.zero_idx, self.zero_idx, self.zero_idx, *a_ctrl[n.outs[1]]))
+                else:  # if not DFF, no output is inverted.
+                    for o_line in n.outs[1:]:
+                        if o_line is not None:
+                            ops.append((BUF1, o_line.index, inp_idx, self.zero_idx, self.zero_idx, self.zero_idx, *a_ctrl[o_line]))
+                continue
+            # regular node, not PI/PPI or PO/PPO
+            o0_idx = n.outs[0].index if len(n.outs) > 0 and n.outs[0] is not None else self.tmp_idx
+            i0_idx = n.ins[0].index if len(n.ins) > 0 and n.ins[0] is not None else self.zero_idx
+            i1_idx = n.ins[1].index if len(n.ins) > 1 and n.ins[1] is not None else self.zero_idx
+            i2_idx = n.ins[2].index if len(n.ins) > 2 and n.ins[2] is not None else self.zero_idx
+            i3_idx = n.ins[3].index if len(n.ins) > 3 and n.ins[3] is not None else self.zero_idx
+            kind = n.kind.lower()
+            if kind == '__fork__':
+                if not strip_forks:
+                    for o_line in n.outs:
+                        if o_line is not None:
+                            ops.append((BUF1, o_line.index, i0_idx, i1_idx, i2_idx, i3_idx, *a_ctrl[o_line]))
+                continue
+            sp = None
+            for prefix, prims in kind_prefixes.items():
+                if kind.startswith(prefix):
+                    sp = prims[0]
+                    if i3_idx == self.zero_idx:
+                        sp = prims[1]
+                        if i2_idx == self.zero_idx:
+                            sp = prims[2]
+                    break
+            if sp is None:
+                print('unknown cell type', kind)
+            else:
+                ops.append((sp, o0_idx, i0_idx, i1_idx, i2_idx, i3_idx, *a_ctrl[o0_idx]))
+
+        self.ops = np.asarray(ops, dtype='int32')
+
+        # create a map from fanout lines to stem lines for fork stripping
+        stems = np.zeros(self.c_locs_len, dtype='int32') - 1  # default to -1: 'no fanout line'
+        if strip_forks:
+            for f in circuit.forks.values():
+                prev_line = f.ins[0]
+                while prev_line.driver.kind == '__fork__':
+                    prev_line = prev_line.driver.ins[0]
+                stem_idx = prev_line.index
+                for ol in f.outs:
+                    if ol is not None:
+                        stems[ol] = stem_idx
+
+        # calculate level (distance from PI/PPI) and reference count for each line
+        levels = np.zeros(self.c_locs_len, dtype='int32')
+        ref_count = np.zeros(self.c_locs_len, dtype='int32')
+        level_starts = [0]
+        current_level = 1
+        for i, op in enumerate(self.ops):
+            # if we fork-strip, always take the stems for determining fan-in level
+            i0_idx = stems[op[2]] if stems[op[2]] >= 0 else op[2]
+            i1_idx = stems[op[3]] if stems[op[3]] >= 0 else op[3]
+            i2_idx = stems[op[4]] if stems[op[4]] >= 0 else op[4]
+            i3_idx = stems[op[5]] if stems[op[5]] >= 0 else op[5]
+            if levels[i0_idx] >= current_level or levels[i1_idx] >= current_level or levels[i2_idx] >= current_level or levels[i3_idx] >= current_level:
+                current_level += 1
+                level_starts.append(i)
+            levels[op[1]] = current_level  # set level of the output line
+            ref_count[i0_idx] += 1
+            ref_count[i1_idx] += 1
+            ref_count[i2_idx] += 1
+            ref_count[i3_idx] += 1
+        self.level_starts = np.asarray(level_starts, dtype='int32')
+        self.level_stops = np.asarray(level_starts[1:] + [len(self.ops)], dtype='int32')
+
+        # combinational signal allocation table. maps line and interface indices to self.c memory locations
+        self.c_locs = np.full((self.c_locs_len,), -1, dtype=np.int32)
+        self.c_caps = np.zeros((self.c_locs_len,), dtype=np.int32)
+
+        h = Heap()
+
+        # allocate and keep memory for special fields
+        self.c_locs[self.zero_idx], self.c_caps[self.zero_idx] = h.alloc(c_caps_min), c_caps_min
+        self.c_locs[self.tmp_idx], self.c_caps[self.tmp_idx] = h.alloc(c_caps_min), c_caps_min
+        self.c_locs[self.tmp2_idx], self.c_caps[self.tmp2_idx] = h.alloc(c_caps_min), c_caps_min
+        ref_count[self.zero_idx] += 1
+        ref_count[self.tmp_idx] += 1
+        ref_count[self.tmp2_idx] += 1
+
+        # allocate and keep memory for PI/PPI, keep memory for PO/PPO (allocated later)
+        for i, n in enumerate(circuit.s_nodes):
+            if len(n.outs) > 0:
+                self.c_locs[self.ppi_offset + i], self.c_caps[self.ppi_offset + i] = h.alloc(c_caps_min), c_caps_min
+                ref_count[self.ppi_offset + i] += 1
+            if len(n.ins) > 0:
+                i0_idx = stems[n.ins[0]] if stems[n.ins[0]] >= 0 else n.ins[0]
+                ref_count[i0_idx] += 1
+
+        # allocate memory for the rest of the circuit
+        for op_start, op_stop in zip(self.level_starts, self.level_stops):
+            free_set = set()
+            for op in self.ops[op_start:op_stop]:
+                # if we fork-strip, always take the stems
+                i0_idx = stems[op[2]] if stems[op[2]] >= 0 else op[2]
+                i1_idx = stems[op[3]] if stems[op[3]] >= 0 else op[3]
+                i2_idx = stems[op[4]] if stems[op[4]] >= 0 else op[4]
+                i3_idx = stems[op[5]] if stems[op[5]] >= 0 else op[5]
+                ref_count[i0_idx] -= 1
+                ref_count[i1_idx] -= 1
+                ref_count[i2_idx] -= 1
+                ref_count[i3_idx] -= 1
+                if ref_count[i0_idx] <= 0: free_set.add(self.c_locs[i0_idx])
+                if ref_count[i1_idx] <= 0: free_set.add(self.c_locs[i1_idx])
+                if ref_count[i2_idx] <= 0: free_set.add(self.c_locs[i2_idx])
+                if ref_count[i3_idx] <= 0: free_set.add(self.c_locs[i3_idx])
+                o_idx = op[1]
+                cap = max(c_caps_min, c_caps[o_idx])
+                self.c_locs[o_idx], self.c_caps[o_idx] = h.alloc(cap), cap
+            if c_reuse:
+                for loc in free_set:
+                    h.free(loc)
+
+        # copy memory location and capacity from stems to fanout lines
+        for lidx, stem in enumerate(stems):
+            if stem >= 0:  # if at a fanout line
+                self.c_locs[lidx], self.c_caps[lidx] = self.c_locs[stem], self.c_caps[stem]
+
+        # copy memory location to PO/PPO area
+        for i, n in enumerate(circuit.s_nodes):
+            if len(n.ins) > 0:
+                self.c_locs[self.ppo_offset + i], self.c_caps[self.ppo_offset + i] = self.c_locs[n.ins[0]], self.c_caps[n.ins[0]]
+
+        self.c_len = h.max_size
+
+        d = defaultdict(int)
+        for op in self.ops[:,0]: d[names[op]] += 1
+        self.prim_counts = dict(d)
+
+        self.pi_s_locs = np.flatnonzero(self.c_locs[self.ppi_offset+np.arange(len(self.circuit.io_nodes))] >= 0)
+        self.po_s_locs = np.flatnonzero(self.c_locs[self.ppo_offset+np.arange(len(self.circuit.io_nodes))] >= 0)
+        self.ppio_s_locs = np.arange(len(self.circuit.io_nodes), self.s_len)
+
+        self.pippi_s_locs = np.concatenate([self.pi_s_locs, self.ppio_s_locs])
+        self.poppo_s_locs = np.concatenate([self.po_s_locs, self.ppio_s_locs])
+
+        self.pi_c_locs = self.c_locs[self.ppi_offset+self.pi_s_locs]
+        self.po_c_locs = self.c_locs[self.ppo_offset+self.po_s_locs]
+        self.ppi_c_locs = self.c_locs[self.ppi_offset+self.ppio_s_locs]
+        self.ppo_c_locs = self.c_locs[self.ppo_offset+self.ppio_s_locs]
+
+        self.pippi_c_locs = np.concatenate([self.pi_c_locs, self.ppi_c_locs])
+        self.poppo_c_locs = np.concatenate([self.po_c_locs, self.ppo_c_locs])
diff --git a/src/kyupy/stil.py b/src/kyupy/stil.py
index 7cacc5b..98cc2df 100644
--- a/src/kyupy/stil.py
+++ b/src/kyupy/stil.py
@@ -1,16 +1,17 @@
 """A simple and incomplete parser for the Standard Test Interface Language (STIL).
 
 The main purpose of this parser is to load scan pattern sets from STIL files.
-It supports only a very limited subset of STIL.
+It supports only a subset of STIL.
 
-The functions :py:func:`load` and :py:func:`read` return an intermediate representation (:class:`StilFile` object).
-Call :py:func:`StilFile.tests`, :py:func:`StilFile.tests_loc`, or :py:func:`StilFile.responses` to
+The functions :py:func:`parse` and :py:func:`load` return an intermediate representation (:py:class:`StilFile` object).
+Call :py:func:`StilFile.tests()`, :py:func:`StilFile.tests_loc()`, or :py:func:`StilFile.responses()` to
 obtain the appropriate vector sets.
 """
 
 import re
 from collections import namedtuple
 
+import numpy as np
 from lark import Lark, Transformer
 
 from . import readtext, logic
@@ -55,7 +56,7 @@ class StilFile:
                 capture = dict((k, v.replace('\n', '').replace('N', '-')) for k, v in call.parameters.items())
 
     def _maps(self, c):
-        interface = list(c.interface) + [n for n in c.nodes if 'DFF' in n.kind]
+        interface = list(c.io_nodes) + [n for n in c.nodes if 'DFF' in n.kind]
         intf_pos = dict((n.name, i) for i, n in enumerate(interface))
         pi_map = [intf_pos[n] for n in self.signal_groups['_pi']]
         po_map = [intf_pos[n] for n in self.signal_groups['_po']]
@@ -81,73 +82,99 @@ class StilFile:
                     scan_out_inversion.append(inversion)
             scan_maps[chain[0]] = scan_map
             scan_maps[chain[-1]] = scan_map
-            scan_inversions[chain[0]] = scan_in_inversion
-            scan_inversions[chain[-1]] = scan_out_inversion
+            scan_inversions[chain[0]] = logic.mvarray(scan_in_inversion)[0]
+            scan_inversions[chain[-1]] = logic.mvarray(scan_out_inversion)[0]
         return interface, pi_map, po_map, scan_maps, scan_inversions
 
     def tests(self, circuit):
         """Assembles and returns a scan test pattern set for given circuit.
 
         This function assumes a static (stuck-at fault) test.
+
+        :param circuit: The circuit to assemble the patterns for. The patterns will follow the
+            :py:attr:`~kyupy.circuit.Circuit.s_nodes` ordering of the this circuit.
+        :return: A 4-valued multi-valued (mv) logic array (see :py:mod:`~kyupy.logic`).
+            The values for primary inputs and sequential elements are filled, the primary outputs are left unassigned.
         """
         interface, pi_map, _, scan_maps, scan_inversions = self._maps(circuit)
-        tests = logic.MVArray((len(interface), len(self.patterns)))
+        tests = np.full((len(interface), len(self.patterns)), logic.UNASSIGNED)
         for i, p in enumerate(self.patterns):
             for si_port in self.si_ports.keys():
-                pattern = logic.mv_xor(p.load[si_port], scan_inversions[si_port])
-                tests.data[scan_maps[si_port], i] = pattern.data[:, 0]
-            tests.data[pi_map, i] = logic.MVArray(p.capture['_pi']).data[:, 0]
+                pattern = logic.mvarray(p.load[si_port])
+                inversions = np.choose((pattern == logic.UNASSIGNED) | (pattern == logic.UNKNOWN),
+                                       [scan_inversions[si_port], logic.ZERO]).astype(np.uint8)
+                np.bitwise_xor(pattern, inversions, out=pattern)
+                tests[scan_maps[si_port], i] = pattern
+            tests[pi_map, i] = logic.mvarray(p.capture['_pi'])
         return tests
 
-    def tests_loc(self, circuit):
+    def tests_loc(self, circuit, init_filter=None, launch_filter=None):
         """Assembles and returns a LoC scan test pattern set for given circuit.
 
         This function assumes a launch-on-capture (LoC) delay test.
         It performs a logic simulation to obtain the first capture pattern (the one that launches the delay
         test) and assembles the test pattern set from from pairs for initialization- and launch-patterns.
+
+        :param circuit: The circuit to assemble the patterns for. The patterns will follow the
+            :py:attr:`~kyupy.circuit.Circuit.s_nodes` ordering of the this circuit.
+        :param init_filter: A function for filtering the initialization patterns. This function is called
+            with the initialization patterns from the STIL file as mvarray before logic simulation.
+            It shall return an mvarray with the same shape. This function can be used, for example, to fill
+            patterns.
+        :param launch_filter: A function for filtering the launch patterns. This function is called
+            with the launch patterns generated by logic simulation before they are combined with
+            the initialization patterns to form the final 8-valued test patterns.
+            The function shall return an mvarray with the same shape. This function can be used, for example, to fill
+            patterns.
+        :return: An 8-valued multi-valued (mv) logic array (see :py:mod:`~kyupy.logic`). The values for primary
+            inputs and sequential elements are filled, the primary outputs are left unassigned.
         """
         interface, pi_map, po_map, scan_maps, scan_inversions = self._maps(circuit)
-        init = logic.MVArray((len(interface), len(self.patterns)), m=4)
-        # init = PackedVectors(len(self.patterns), len(interface), 2)
+        init = np.full((len(interface), len(self.patterns)), logic.UNASSIGNED)
         for i, p in enumerate(self.patterns):
             # init.set_values(i, '0' * len(interface))
             for si_port in self.si_ports.keys():
-                pattern = logic.mv_xor(p.load[si_port], scan_inversions[si_port])
-                init.data[scan_maps[si_port], i] = pattern.data[:, 0]
-            init.data[pi_map, i] = logic.MVArray(p.launch['_pi'] if '_pi' in p.launch else p.capture['_pi']).data[:, 0]
-        launch_bp = logic.BPArray(init)
-        sim4v = LogicSim(circuit, len(init), m=4)
-        sim4v.assign(launch_bp)
-        sim4v.propagate()
-        sim4v.capture(launch_bp)
-        launch = logic.MVArray(launch_bp)
+                pattern = logic.mvarray(p.load[si_port])
+                inversions = np.choose((pattern == logic.UNASSIGNED) | (pattern == logic.UNKNOWN),
+                                       [scan_inversions[si_port], logic.ZERO]).astype(np.uint8)
+                np.bitwise_xor(pattern, inversions, out=pattern)
+                init[scan_maps[si_port], i] = pattern
+            init[pi_map, i] = logic.mvarray(p.launch['_pi'] if '_pi' in p.launch else p.capture['_pi'])
+        if init_filter: init = init_filter(init)
+        sim8v = LogicSim(circuit, init.shape[-1], m=8)
+        sim8v.s[0] = logic.mv_to_bp(init)
+        sim8v.s_to_c()
+        sim8v.c_prop()
+        sim8v.c_to_s()
+        launch = logic.bp_to_mv(sim8v.s[1])[..., :init.shape[-1]]
         for i, p in enumerate(self.patterns):
             # if there was no launch cycle or launch clock, then init = launch
             if '_pi' not in p.launch or 'P' not in p.launch['_pi'] or 'P' not in p.capture['_pi']:
                 for si_port in self.si_ports.keys():
-                    pattern = logic.mv_xor(p.load[si_port], scan_inversions[si_port])
-                    launch.data[scan_maps[si_port], i] = pattern.data[:, 0]
+                    pattern = logic.mv_xor(logic.mvarray(p.load[si_port]), scan_inversions[si_port])
+                    launch[scan_maps[si_port], i] = pattern
             if '_pi' in p.capture and 'P' in p.capture['_pi']:
-                launch.data[pi_map, i] = logic.MVArray(p.capture['_pi']).data[:, 0]
-            launch.data[po_map, i] = logic.UNASSIGNED
+                launch[pi_map, i] = logic.mvarray(p.capture['_pi'])
+            launch[po_map, i] = logic.UNASSIGNED
+        if launch_filter: launch = launch_filter(launch)
 
         return logic.mv_transition(init, launch)
 
     def responses(self, circuit):
-        """Assembles and returns a scan test response pattern set for given circuit."""
+        """Assembles and returns a scan test response pattern set for given circuit.
+
+        :param circuit: The circuit to assemble the patterns for. The patterns will follow the
+            :py:attr:`~kyupy.circuit.Circuit.s_nodes` ordering of the this circuit.
+        :return: A 4-valued multi-valued (mv) logic array (see :py:mod:`~kyupy.logic`).
+            The values for primary outputs and sequential elements are filled, the primary inputs are left unassigned.
+        """
         interface, _, po_map, scan_maps, scan_inversions = self._maps(circuit)
-        resp = logic.MVArray((len(interface), len(self.patterns)))
-        # resp = PackedVectors(len(self.patterns), len(interface), 2)
+        resp = np.full((len(interface), len(self.patterns)), logic.UNASSIGNED)
         for i, p in enumerate(self.patterns):
-            resp.data[po_map, i] = logic.MVArray(p.capture['_po'] if len(p.capture) > 0 else p.launch['_po']).data[:, 0]
-            # if len(p.capture) > 0:
-            #    resp.set_values(i, p.capture['_po'], po_map)
-            # else:
-            #    resp.set_values(i, p.launch['_po'], po_map)
+            resp[po_map, i] = logic.mvarray(p.capture['_po'] if len(p.capture) > 0 else p.launch['_po'])
             for so_port in self.so_ports.keys():
-                pattern = logic.mv_xor(p.unload[so_port], scan_inversions[so_port])
-                resp.data[scan_maps[so_port], i] = pattern.data[:, 0]
-                # resp.set_values(i, p.unload[so_port], scan_maps[so_port], scan_inversions[so_port])
+                pattern = logic.mv_xor(logic.mvarray(p.unload[so_port]), scan_inversions[so_port])
+                resp[scan_maps[so_port], i] = pattern
         return resp
 
 
@@ -246,6 +273,6 @@ def parse(text):
 def load(file):
     """Parses the contents of ``file`` and returns a :class:`StilFile` object.
 
-    The given file may be gzip compressed.
+    Files with `.gz`-suffix are decompressed on-the-fly.
     """
     return parse(readtext(file))
diff --git a/src/kyupy/techlib.py b/src/kyupy/techlib.py
index 21c82a6..ce15ed1 100644
--- a/src/kyupy/techlib.py
+++ b/src/kyupy/techlib.py
@@ -1,38 +1,27 @@
-from .circuit import Node, Line
-
-
-def add_and_connect(circuit, name, kind, in1=None, in2=None, out=None):
-    n = Node(circuit, name, kind)
-    if in1 is not None:
-        n.ins[0] = in1
-        in1.reader = n
-        in1.reader_pin = 0
-    if in2 is not None:
-        n.ins[1] = in2
-        in2.reader = n
-        in2.reader_pin = 1
-    if out is not None:
-        n.outs[0] = out
-        out.driver = n
-        out.driver_pin = 0
-    return n
+"""KyuPy's Built-In Technology Libraries
 
+Technology libraries provide cell definitions and their implementation with simulation primitives.
+A couple of common standard cell libraries are built-in.
+Others can be easily added by providing a bench-like description of the cells.
+"""
 
-class TechLib:
-    """Provides some information specific to standard cell libraries necessary
-    for loading gate-level designs. :py:class:`~kyupy.circuit.Node` objects do not
-    have pin names. The methods defined here map pin names to pin directions and defined
-    positions in the ``node.ins`` and ``node.outs`` lists. The default implementation
-    provides mappings for SAED-inspired standard cell libraries.
-    """
+import re
+from itertools import product
+
+from . import bench
 
+
+class TechLibOld:
     @staticmethod
     def pin_index(kind, pin):
-        """Returns a pin list position for a given node kind and pin name."""
+        if isinstance(pin, int):
+            return max(0, pin-1)
         if kind[:3] in ('OAI', 'AOI'):
             if pin[0] == 'A': return int(pin[1]) - 1
-            if pin[0] == 'B': return int(pin[1]) + int(kind[4]) - 1
+            if pin == 'B': return int(kind[3])
+            if pin[0] == 'B': return int(pin[1]) - 1 + int(kind[3])
         for prefix, pins, index in [('HADD', ('B0', 'SO'), 1),
+                                    ('HADD', ('A0', 'C1'), 0),
                                     ('MUX21', ('S', 'S0'), 2),
                                     ('MX2', ('S0',), 2),
                                     ('TBUF', ('OE',), 1),
@@ -45,7 +34,9 @@ class TechLib:
                                     ('SDFF', ('QN',), 1),
                                     ('SDFF', ('CLK',), 3),
                                     ('SDFF', ('RSTB', 'RN'), 4),
-                                    ('SDFF', ('SETB',), 5)]:
+                                    ('SDFF', ('SETB',), 5),
+                                    ('ISOL', ('ISO',), 0),
+                                    ('ISOL', ('D',), 1)]:
             if kind.startswith(prefix) and pin in pins: return index
         for index, pins in enumerate([('A1', 'IN1', 'A', 'S', 'INP', 'I', 'Q', 'QN', 'Y', 'Z', 'ZN'),
                                       ('A2', 'IN2', 'B', 'CK', 'CLK', 'CO', 'SE'),
@@ -58,254 +49,367 @@ class TechLib:
 
     @staticmethod
     def pin_is_output(kind, pin):
-        """Returns True, if given pin name of a node kind is an output."""
+        if isinstance(pin, int):
+            return pin == 0
         if 'MUX' in kind and pin == 'S': return False
         return pin in ('Q', 'QN', 'Z', 'ZN', 'Y', 'CO', 'S', 'SO', 'C1')
 
-    @staticmethod
-    def split_complex_gates(circuit):
-        node_list = circuit.nodes
-        for n in node_list:
-            name = n.name
-            ins = n.ins
-            outs = n.outs
-            if n.kind.startswith('AO21X'):
-                n.remove()
-                n_and = add_and_connect(circuit, name+'~and', 'AND2', ins[0], ins[1], None)
-                n_or = add_and_connect(circuit, name+'~or', 'OR2', None, ins[2], outs[0])
-                Line(circuit, n_and, n_or)
-            elif n.kind.startswith('AOI21X'):
-                n.remove()
-                n_and = add_and_connect(circuit, name+'~and', 'AND2', ins[0], ins[1], None)
-                n_nor = add_and_connect(circuit, name+'~nor', 'NOR2', None, ins[2], outs[0])
-                Line(circuit, n_and, n_nor)
-            elif n.kind.startswith('OA21X'):
-                n.remove()
-                n_or = add_and_connect(circuit, name+'~or', 'OR2', ins[0], ins[1], None)
-                n_and = add_and_connect(circuit, name+'~and', 'AND2', None, ins[2], outs[0])
-                Line(circuit, n_or, n_and)
-            elif n.kind.startswith('OAI21X'):
-                n.remove()
-                n_or = add_and_connect(circuit, name+'~or', 'OR2', ins[0], ins[1], None)
-                n_nand = add_and_connect(circuit, name+'~nand', 'NAND2', None, ins[2], outs[0])
-                Line(circuit, n_or, n_nand)
-            elif n.kind.startswith('OA22X'):
-                n.remove()
-                n_or0 = add_and_connect(circuit, name+'~or0', 'OR2', ins[0], ins[1], None)
-                n_or1 = add_and_connect(circuit, name+'~or1', 'OR2', ins[2], ins[3], None)
-                n_and = add_and_connect(circuit, name+'~and', 'AND2', None, None, outs[0])
-                Line(circuit, n_or0, n_and)
-                Line(circuit, n_or1, n_and)
-            elif n.kind.startswith('OAI22X'):
-                n.remove()
-                n_or0 = add_and_connect(circuit, name+'~or0', 'OR2', ins[0], ins[1], None)
-                n_or1 = add_and_connect(circuit, name+'~or1', 'OR2', ins[2], ins[3], None)
-                n_nand = add_and_connect(circuit, name+'~nand', 'NAND2', None, None, outs[0])
-                Line(circuit, n_or0, n_nand)
-                Line(circuit, n_or1, n_nand)
-            elif n.kind.startswith('AO22X'):
-                n.remove()
-                n_and0 = add_and_connect(circuit, name+'~and0', 'AND2', ins[0], ins[1], None)
-                n_and1 = add_and_connect(circuit, name+'~and1', 'AND2', ins[2], ins[3], None)
-                n_or = add_and_connect(circuit, name+'~or', 'OR2', None, None, outs[0])
-                Line(circuit, n_and0, n_or)
-                Line(circuit, n_and1, n_or)
-            elif n.kind.startswith('AOI22X'):
-                n.remove()
-                n_and0 = add_and_connect(circuit, name+'~and0', 'AND2', ins[0], ins[1], None)
-                n_and1 = add_and_connect(circuit, name+'~and1', 'AND2', ins[2], ins[3], None)
-                n_nor = add_and_connect(circuit, name+'~nor', 'NOR2', None, None, outs[0])
-                Line(circuit, n_and0, n_nor)
-                Line(circuit, n_and1, n_nor)
-            elif n.kind.startswith('AO221X'):
-                n.remove()
-                n_and0 = add_and_connect(circuit, name+'~and0', 'AND2', ins[0], ins[1], None)
-                n_and1 = add_and_connect(circuit, name+'~and1', 'AND2', ins[2], ins[3], None)
-                n_or0 = add_and_connect(circuit, name+'~or0', 'OR2', None, None, None)
-                n_or1 = add_and_connect(circuit, name+'~or1', 'OR2', None, ins[4], outs[0])
-                Line(circuit, n_and0, n_or0)
-                Line(circuit, n_and1, n_or0)
-                Line(circuit, n_or0, n_or1)
-            elif n.kind.startswith('AOI221X'):
-                n.remove()
-                n_and0 = add_and_connect(circuit, name+'~and0', 'AND2', ins[0], ins[1], None)
-                n_and1 = add_and_connect(circuit, name+'~and1', 'AND2', ins[2], ins[3], None)
-                n_or = add_and_connect(circuit, name+'~or', 'OR2', None, None, None)
-                n_nor = add_and_connect(circuit, name+'~nor', 'NOR2', None, ins[4], outs[0])
-                Line(circuit, n_and0, n_or)
-                Line(circuit, n_and1, n_or)
-                Line(circuit, n_or, n_nor)
-            elif n.kind.startswith('OA221X'):
-                n.remove()
-                n_or0 = add_and_connect(circuit, name+'~or0', 'OR2', ins[0], ins[1], None)
-                n_or1 = add_and_connect(circuit, name+'~or1', 'OR2', ins[2], ins[3], None)
-                n_and0 = add_and_connect(circuit, name+'~and0', 'AND2', None, None, None)
-                n_and1 = add_and_connect(circuit, name+'~and1', 'AND2', None, ins[4], outs[0])
-                Line(circuit, n_or0, n_and0)
-                Line(circuit, n_or1, n_and0)
-                Line(circuit, n_and0, n_and1)
-            elif n.kind.startswith('OAI221X'):
-                n.remove()
-                n_or0 = add_and_connect(circuit, name+'~or0', 'OR2', ins[0], ins[1], None)
-                n_or1 = add_and_connect(circuit, name+'~or1', 'OR2', ins[2], ins[3], None)
-                n_and0 = add_and_connect(circuit, name+'~and0', 'AND2', None, None, None)
-                n_nand1 = add_and_connect(circuit, name+'~nand1', 'NAND2', None, ins[4], outs[0])
-                Line(circuit, n_or0, n_and0)
-                Line(circuit, n_or1, n_and0)
-                Line(circuit, n_and0, n_nand1)
-            elif n.kind.startswith('AO222X'):
-                n.remove()
-                n_and0 = add_and_connect(circuit, name+'~and0', 'AND2', ins[0], ins[1], None)
-                n_and1 = add_and_connect(circuit, name+'~and1', 'AND2', ins[2], ins[3], None)
-                n_and2 = add_and_connect(circuit, name+'~and2', 'AND2', ins[4], ins[5], None)
-                n_or0 = add_and_connect(circuit, name+'~or0', 'OR2', None, None, None)
-                n_or1 = add_and_connect(circuit, name+'~or1', 'OR2', None, None, outs[0])
-                Line(circuit, n_and0, n_or0)
-                Line(circuit, n_and1, n_or0)
-                Line(circuit, n_and2, n_or1)
-                Line(circuit, n_or0, n_or1)
-            elif n.kind.startswith('AOI222X'):
-                n.remove()
-                n_and0 = add_and_connect(circuit, name+'~and0', 'AND2', ins[0], ins[1], None)
-                n_and1 = add_and_connect(circuit, name+'~and1', 'AND2', ins[2], ins[3], None)
-                n_and2 = add_and_connect(circuit, name+'~and2', 'AND2', ins[4], ins[5], None)
-                n_or0 = add_and_connect(circuit, name+'~or0', 'OR2', None, None, None)
-                n_nor1 = add_and_connect(circuit, name+'~nor1', 'NOR2', None, None, outs[0])
-                Line(circuit, n_and0, n_or0)
-                Line(circuit, n_and1, n_or0)
-                Line(circuit, n_and2, n_nor1)
-                Line(circuit, n_or0, n_nor1)
-            elif n.kind.startswith('OA222X'):
-                n.remove()
-                n_or0 = add_and_connect(circuit, name+'~or0', 'OR2', ins[0], ins[1], None)
-                n_or1 = add_and_connect(circuit, name+'~or1', 'OR2', ins[2], ins[3], None)
-                n_or2 = add_and_connect(circuit, name+'~or2', 'OR2', ins[4], ins[5], None)
-                n_and0 = add_and_connect(circuit, name+'~and0', 'AND2', None, None, None)
-                n_and1 = add_and_connect(circuit, name+'~and1', 'AND2', None, None, outs[0])
-                Line(circuit, n_or0, n_and0)
-                Line(circuit, n_or1, n_and0)
-                Line(circuit, n_or2, n_and1)
-                Line(circuit, n_and0, n_and1)
-            elif n.kind.startswith('OAI222X'):
-                n.remove()
-                n0 = add_and_connect(circuit, name+'~or0', 'OR2', ins[0], ins[1], None)
-                n1 = add_and_connect(circuit, name+'~or1', 'OR2', ins[2], ins[3], None)
-                n2 = add_and_connect(circuit, name+'~or2', 'OR2', ins[4], ins[5], None)
-                n3 = add_and_connect(circuit, name+'~and0', 'AND2', None, None, None)
-                n4 = add_and_connect(circuit, name+'~nand1', 'NAND2', None, None, outs[0])
-                Line(circuit, n0, n3)
-                Line(circuit, n1, n3)
-                Line(circuit, n2, n4)
-                Line(circuit, n3, n4)
-            elif n.kind.startswith('AND3X'):
-                n.remove()
-                n0 = add_and_connect(circuit, name+'~and0', 'AND2', ins[0], ins[1], None)
-                n1 = add_and_connect(circuit, name+'~and1', 'AND2', None, ins[2], outs[0])
-                Line(circuit, n0, n1)
-            elif n.kind.startswith('OR3X'):
-                n.remove()
-                n0 = add_and_connect(circuit, name+'~or0', 'OR2', ins[0], ins[1], None)
-                n1 = add_and_connect(circuit, name+'~or1', 'OR2', None, ins[2], outs[0])
-                Line(circuit, n0, n1)
-            elif n.kind.startswith('XOR3X'):
-                n.remove()
-                n0 = add_and_connect(circuit, name+'~xor0', 'XOR2', ins[0], ins[1], None)
-                n1 = add_and_connect(circuit, name+'~xor1', 'XOR2', None, ins[2], outs[0])
-                Line(circuit, n0, n1)
-            elif n.kind.startswith('NAND3X'):
-                n.remove()
-                n0 = add_and_connect(circuit, name+'~and', 'AND2', ins[0], ins[1], None)
-                n1 = add_and_connect(circuit, name+'~nand', 'NAND2', None, ins[2], outs[0])
-                Line(circuit, n0, n1)
-            elif n.kind.startswith('NOR3X'):
-                n.remove()
-                n0 = add_and_connect(circuit, name+'~or', 'OR2', ins[0], ins[1], None)
-                n1 = add_and_connect(circuit, name+'~nor', 'NOR2', None, ins[2], outs[0])
-                Line(circuit, n0, n1)
-            elif n.kind.startswith('XNOR3X'):
-                n.remove()
-                n0 = add_and_connect(circuit, name+'~xor', 'XOR2', ins[0], ins[1], None)
-                n1 = add_and_connect(circuit, name+'~xnor', 'XNOR2', None, ins[2], outs[0])
-                Line(circuit, n0, n1)
-            elif n.kind.startswith('AND4X'):
-                n.remove()
-                n0 = add_and_connect(circuit, name+'~and0', 'AND2', ins[0], ins[1], None)
-                n1 = add_and_connect(circuit, name+'~and1', 'AND2', ins[2], ins[3], None)
-                n2 = add_and_connect(circuit, name+'~and2', 'AND2', None, None, outs[0])
-                Line(circuit, n0, n2)
-                Line(circuit, n1, n2)
-            elif n.kind.startswith('OR4X'):
-                n.remove()
-                n0 = add_and_connect(circuit, name+'~or0', 'OR2', ins[0], ins[1], None)
-                n1 = add_and_connect(circuit, name+'~or1', 'OR2', ins[2], ins[3], None)
-                n2 = add_and_connect(circuit, name+'~or2', 'OR2', None, None, outs[0])
-                Line(circuit, n0, n2)
-                Line(circuit, n1, n2)
-            elif n.kind.startswith('NAND4X'):
-                n.remove()
-                n0 = add_and_connect(circuit, name+'~and0', 'AND2', ins[0], ins[1], None)
-                n1 = add_and_connect(circuit, name+'~and1', 'AND2', ins[2], ins[3], None)
-                n2 = add_and_connect(circuit, name+'~nand2', 'NAND2', None, None, outs[0])
-                Line(circuit, n0, n2)
-                Line(circuit, n1, n2)
-            elif n.kind.startswith('NOR4X'):
-                n.remove()
-                n0 = add_and_connect(circuit, name+'~or0', 'OR2', ins[0], ins[1], None)
-                n1 = add_and_connect(circuit, name+'~or1', 'OR2', ins[2], ins[3], None)
-                n2 = add_and_connect(circuit, name+'~nor2', 'NOR2', None, None, outs[0])
-                Line(circuit, n0, n2)
-                Line(circuit, n1, n2)
-            elif n.kind.startswith('FADDX'):
-                n.remove()
-                # forks for fan-outs
-                f_a = add_and_connect(circuit, name + '~fork0', '__fork__', ins[0])
-                f_b = add_and_connect(circuit, name + '~fork1', '__fork__', ins[1])
-                f_ci = add_and_connect(circuit, name + '~fork2', '__fork__', ins[2])
-                f_ab = Node(circuit, name + '~fork3')
-                # sum-block
-                n_xor0 = Node(circuit, name + '~xor0', 'XOR2')
-                Line(circuit, f_a, n_xor0)
-                Line(circuit, f_b, n_xor0)
-                Line(circuit, n_xor0, f_ab)
-                if len(outs) > 0 and outs[0] is not None:
-                    n_xor1 = add_and_connect(circuit, name + '~xor1', 'XOR2', None, None, outs[0])
-                    Line(circuit, f_ab, n_xor1)
-                    Line(circuit, f_ci, n_xor1)
-                # carry-block
-                if len(outs) > 1 and outs[1] is not None:
-                    n_and0 = Node(circuit, name + '~and0', 'AND2')
-                    Line(circuit, f_ab, n_and0)
-                    Line(circuit, f_ci, n_and0)
-                    n_and1 = Node(circuit, name + '~and1', 'AND2')
-                    Line(circuit, f_a, n_and1)
-                    Line(circuit, f_b, n_and1)
-                    n_or = add_and_connect(circuit, name + '~or0', 'OR2', None, None, outs[1])
-                    Line(circuit, n_and0, n_or)
-                    Line(circuit, n_and1, n_or)
-            elif n.kind.startswith('HADDX'):
-                n.remove()
-                # forks for fan-outs
-                f_a = add_and_connect(circuit, name + '~fork0', '__fork__', ins[0])
-                f_b = add_and_connect(circuit, name + '~fork1', '__fork__', ins[1])
-                n_xor0 = add_and_connect(circuit, name + '~xor0', 'XOR2', None, None, outs[1])
-                Line(circuit, f_a, n_xor0)
-                Line(circuit, f_b, n_xor0)
-                n_and0 = add_and_connect(circuit, name + '~and0', 'AND2', None, None, outs[0])
-                Line(circuit, f_a, n_and0)
-                Line(circuit, f_b, n_and0)
-            elif n.kind.startswith('MUX21X'):
-                n.remove()
-                f_s = add_and_connect(circuit, name + '~fork0', '__fork__', ins[2])
-                n_not = Node(circuit, name + '~not', 'INV')
-                Line(circuit, f_s, n_not)
-                n_and0 = add_and_connect(circuit, name + '~and0', 'AND2', ins[0])
-                n_and1 = add_and_connect(circuit, name + '~and1', 'AND2', ins[1])
-                n_or0 = add_and_connect(circuit, name + '~or0', 'OR2', None, None, outs[0])
-                Line(circuit, n_not, n_and0)
-                Line(circuit, f_s, n_and1)
-                Line(circuit, n_and0, n_or0)
-                Line(circuit, n_and1, n_or0)
-            elif n.kind.startswith('DFFSSR'):
-                n.kind = 'DFFX1'
-                n_and0 = add_and_connect(circuit, name + '~and0', 'AND2', ins[0], ins[2], None)
-                Line(circuit, n_and0, (n, 0))
+
+class TechLib:
+    """Class for standard cell library definitions.
+
+    :py:class:`~kyupy.circuit.Node` objects do not have pin names.
+    This class maps pin names to pin directions and defined positions in the ``node.ins`` and ``node.outs`` lists.
+    Furthermore, it gives access to implementations of complex cells. See also :py:func:`~kyupy.circuit.substitute` and
+    :py:func:`~kyupy.circuit.resolve_tlib_cells`.
+    """
+    def __init__(self, lib_src):
+        self.cells = dict()
+        """A dictionary with pin definitions and circuits for each cell kind (type).
+        """
+        for c_str in re.split(r';\s+', lib_src):
+            c_str = re.sub(r'^\s+', '', c_str)
+            name_len = c_str.find(' ')
+            if name_len <= 0: continue
+            c = bench.parse(c_str[name_len:])
+            c.name = c_str[:name_len]
+            c.eliminate_1to1_forks()
+            i_idx, o_idx = 0, 0
+            pin_dict = dict()
+            for n in c.io_nodes:
+                if len(n.ins) == 0:
+                    pin_dict[n.name] = (i_idx, False)
+                    i_idx += 1
+                else:
+                    pin_dict[n.name] = (o_idx, True)
+                    o_idx += 1
+            parts = [s[1:-1].split(',') if s[0] == '{' else [s] for s in re.split(r'({[^}]+})', c.name) if len(s) > 0]
+            for name in [''.join(item) for item in product(*parts)]:
+                self.cells[name] = (c, pin_dict)
+
+    def pin_index(self, kind, pin):
+        """Returns a pin list position for a given node kind and pin name."""
+        assert kind in self.cells, f'Unknown cell: {kind}'
+        assert pin in self.cells[kind][1], f'Unknown pin: {pin} for cell {kind}'
+        return self.cells[kind][1][pin][0]
+
+    def pin_is_output(self, kind, pin):
+        """Returns True, if given pin name of a node kind is an output."""
+        assert kind in self.cells, f'Unknown cell: {kind}'
+        assert pin in self.cells[kind][1], f'Unknown pin: {pin} for cell {kind}'
+        return self.cells[kind][1][pin][1]
+
+
+GSC180 = TechLib(r"""
+BUFX{1,3}      input(A)    output(Y) Y=BUF1(A)    ;
+CLKBUFX{1,2,3} input(A)    output(Y) Y=BUF1(A)    ;
+INVX{1,2,4,8}  input(A)    output(Y) Y=INV1(A)    ;
+TBUFX{1,2,4,8} input(A,OE) output(Y) Y=AND2(A,OE) ;
+TINVX1         input(A,OE) output(Y) AB=INV1(A) Y=AND2(AB,OE) ;
+
+AND2X1      input(A,B)     output(Y) Y=AND2(A,B)      ;
+NAND2X{1,2} input(A,B)     output(Y) Y=NAND2(A,B)     ;
+NAND3X1     input(A,B,C)   output(Y) Y=NAND3(A,B,C)   ;
+NAND4X1     input(A,B,C,D) output(Y) Y=NAND4(A,B,C,D) ;
+OR2X1       input(A,B)     output(Y) Y=OR2(A,B)       ;
+OR4X1       input(A,B,C,D) output(Y) Y=OR4(A,B,C,D)   ;
+NOR2X1      input(A,B)     output(Y) Y=NOR2(A,B)      ;
+NOR3X1      input(A,B,C)   output(Y) Y=NOR3(A,B,C)    ;
+NOR4X1      input(A,B,C,D) output(Y) Y=NOR4(A,B,C,D)  ;
+XOR2X1      input(A,B)     output(Y) Y=XOR2(A,B)      ;
+
+MX2X1   input(A,B,S0)            output(Y)    Y=MUX21(A,B,S0)      ;
+AOI21X1 input(A0,A1,B0)          output(Y)    Y=AOI21(A0,A1,B0)    ;
+AOI22X1 input(A0,A1,B0,B1)       output(Y)    Y=AOI22(A0,A1,B0,B1) ;
+OAI21X1 input(A0,A1,B0)          output(Y)    Y=OAI21(A0,A1,B0)    ;
+OAI22X1 input(A0,A1,B0,B1)       output(Y)    Y=OAI22(A0,A1,B0,B1) ;
+OAI33X1 input(A0,A1,A2,B0,B1,B2) output(Y)    AA=OR2(A0,A1) BB=OR2(B0,B1) Y=OAI22(AA,A2,BB,B2) ;
+ADDFX1  input(A,B,CI)            output(CO,S) AB=XOR2(A,B) CO=XOR2(AB,CI) S=AO22(AB,CI,A,B)    ;
+ADDHX1  input(A,B)               output(CO,S) CO=XOR2(A,B) S=AND2(A,B)                         ;
+
+DFFX1    input(CK,D)             output(Q,QN) Q=DFF(D,CK) QN=INV1(Q) ;
+DFFSRX1  input(CK,D,RN,SN)       output(Q,QN) DR=AND2(D,RN) SET=INV1(SN) DRS=OR2(DR,SET) Q=DFF(DRS,CK) QN=INV1(Q) ;
+SDFFSRX1 input(CK,D,RN,SE,SI,SN) output(Q,QN) DR=AND2(D,RN) SET=INV1(SN) DRS=OR2(DR,SET) DI=MUX21(DRS,SI,SE) Q=DFF(DI,CK) QN=INV1(Q) ;
+
+TLATSRX1 input(D,G,RN,SN) output(Q,QN) DR=AND2(D,RN) SET=INV1(SN) DRS=OR2(DR,SET) Q=LATCH(DRS,G) QN=INV1(Q) ;
+TLATX1   input(C,D)       output(Q,QN) Q=LATCH(D,C) QN=INV1(Q) ;
+""")
+"""The GSC 180nm generic standard cell library.
+"""
+
+
+_nangate_common = r"""
+FILLCELL_X{1,2,4,8,16,32} ;
+
+LOGIC0_X1 output(Z) Z=__const0__() ;
+LOGIC1_X1 output(Z) Z=__const1__() ;
+
+BUF_X{1,2,4,8,16,32}  input(A) output(Z)  Z=BUF1(A)  ;
+CLKBUF_X{1,2,3}       input(A) output(Z)  Z=BUF1(A)  ;
+
+NAND2_X{1,2,4} input(A1,A2)       output(ZN) ZN=NAND2(A1,A2)       ;
+NAND3_X{1,2,4} input(A1,A2,A3)    output(ZN) ZN=NAND3(A1,A2,A3)    ;
+NAND4_X{1,2,4} input(A1,A2,A3,A4) output(ZN) ZN=NAND4(A1,A2,A3,A4) ;
+NOR2_X{1,2,4}  input(A1,A2)       output(ZN) ZN=NOR2(A1,A2)        ;
+NOR3_X{1,2,4}  input(A1,A2,A3)    output(ZN) ZN=NOR3(A1,A2,A3)     ;
+NOR4_X{1,2,4}  input(A1,A2,A3,A4) output(ZN) ZN=NOR4(A1,A2,A3,A4)  ;
+
+AOI21_X{1,2,4} input(A,B1,B2)     output(ZN) ZN=AOI21(B1,B2,A)     ;
+OAI21_X{1,2,4} input(A,B1,B2)     output(ZN) ZN=OAI21(B1,B2,A)     ;
+AOI22_X{1,2,4} input(A1,A2,B1,B2) output(ZN) ZN=AOI22(A1,A2,B1,B2) ;
+OAI22_X{1,2,4} input(A1,A2,B1,B2) output(ZN) ZN=OAI22(A1,A2,B1,B2) ;
+
+OAI211_X{1,2,4} input(A,B,C1,C2) output(ZN) ZN=OAI211(C1,C2,A,B)   ;
+AOI211_X{1,2,4} input(A,B,C1,C2) output(ZN) ZN=AOI211(C1,C2,A,B)   ;
+
+MUX2_X{1,2} input(A,B,S) output(Z) Z=MUX21(A,B,S) ;
+
+AOI221_X{1,2,4} input(A,B1,B2,C1,C2) output(ZN) BC=AO22(B1,B2,C1,C2) ZN=NOR2(BC,A)  ;
+OAI221_X{1,2,4} input(A,B1,B2,C1,C2) output(ZN) BC=OA22(B1,B2,C1,C2) ZN=NAND2(BC,A) ;
+
+AOI222_X{1,2,4} input(A1,A2,B1,B2,C1,C2) output(ZN) BC=AO22(B1,B2,C1,C2) ZN=AOI21(A1,A2,BC) ;
+OAI222_X{1,2,4} input(A1,A2,B1,B2,C1,C2) output(ZN) BC=OA22(B1,B2,C1,C2) ZN=OAI21(A1,A2,BC) ;
+
+OAI33_X1 input(A1,A2,A3,B1,B2,B3) output(ZN) AA=OR2(A1,A2) BB=OR2(B1,B2) ZN=OAI22(AA,A3,BB,B3) ;
+
+HA_X1 input(A,B) output(CO,S) CO=XOR2(A,B) S=AND2(A,B) ;
+
+FA_X1 input(A,B,CI) output(CO,S) AB=XOR2(A,B) CO=XOR2(AB,CI) S=AO22(CI,A,B) ;
+
+CLKGATE_X{1,2,4,8} input(CK,E) output(GCK) GCK=AND2(CK,E) ;
+
+CLKGATETST_X{1,2,4,8} input(CK,E,SE) output(GCK) GCK=OA21(CK,E,SE) ;
+
+DFF_X{1,2}   input(D,CK)       output(Q,QN)  Q=DFF(D,CK) QN=INV1(Q) ;
+DFFR_X{1,2}  input(D,RN,CK)    output(Q,QN)  DR=AND2(D,RN) Q=DFF(DR,CK) QN=INV1(Q) ;
+DFFS_X{1,2}  input(D,SN,CK)    output(Q,QN)  S=INV1(SN) DS=OR2(D,S) Q=DFF(DS,CK) QN=INV1(Q) ;
+DFFRS_X{1,2} input(D,RN,SN,CK) output(Q,QN)  S=INV1(SN) DS=OR2(D,S) DRS=AND2(DS,RN) Q=DFF(DRS,CK) QN=INV1(Q) ;
+
+SDFF_X{1,2}   input(D,SE,SI,CK)       output(Q,QN)  DI=MUX21(D,SI,SE) Q=DFF(DI,CK) QN=INV1(Q) ;
+SDFFR_X{1,2}  input(D,RN,SE,SI,CK)    output(Q,QN)  DR=AND2(D,RN) DI=MUX21(DR,SI,SE) Q=DFF(DI,CK) QN=INV1(Q) ;
+SDFFS_X{1,2}  input(D,SE,SI,SN,CK)    output(Q,QN)  S=INV1(SN) DS=OR2(D,S) DI=MUX21(DS,SI,SE) Q=DFF(DI,CK) QN=INV1(Q) ;
+SDFFRS_X{1,2} input(D,RN,SE,SI,SN,CK) output(Q,QN)  S=INV1(SN) DS=OR2(D,S) DRS=AND2(DS,RN) DI=MUX21(DRS,SI,SE) Q=DFF(DI,CK) QN=INV1(Q) ;
+
+TBUF_X{1,2,4,8,16} input(A,EN)   output(Z)  Z=BUF1(A)    ;
+TINV_X1            input(I,EN)   output(ZN) ZN=INV1(I)   ;
+TLAT_X1            input(D,G,OE) output(Q)  Q=LATCH(D,G) ;
+
+DLH_X{1,2} input(D,G) output(Q)  Q=LATCH(D,G)            ;
+DLL_X{1,2} input(D,GN) output(Q) G=INV1(GN) Q=LATCH(D,G) ;
+"""
+
+
+NANGATE = TechLib(_nangate_common + r"""
+INV_X{1,2,4,8,16,32}  input(I) output(ZN) ZN=INV1(I) ;
+
+AND2_X{1,2,4}  input(A1,A2)       output(Z)  Z=AND2(A1,A2)        ;
+AND3_X{1,2,4}  input(A1,A2,A3)    output(Z)  Z=AND3(A1,A2,A3)     ;
+AND4_X{1,2,4}  input(A1,A2,A3,A4) output(Z)  Z=AND4(A1,A2,A3,A4)  ;
+OR2_X{1,2,4}   input(A1,A2)       output(Z)  Z=OR2(A1,A2)         ;
+OR3_X{1,2,4}   input(A1,A2,A3)    output(Z)  Z=OR3(A1,A2,A3)      ;
+OR4_X{1,2,4}   input(A1,A2,A3,A4) output(Z)  Z=OR4(A1,A2,A3,A4)   ;
+XOR2_X{1,2}    input(A1,A2)       output(Z)  Z=XOR2(A1,A2)        ;
+XNOR2_X{1,2}   input(A1,A2)       output(ZN) ZN=XNOR2(A1,A2)      ;
+""")
+"""An newer NANGATE-variant that uses 'Z' as output pin names for AND and OR gates.
+"""
+
+
+NANGATE_ZN = TechLib(_nangate_common + r"""
+INV_X{1,2,4,8,16,32}  input(A) output(ZN) ZN=INV1(A) ;
+
+AND2_X{1,2,4}  input(A1,A2)       output(ZN) ZN=AND2(A1,A2)        ;
+AND3_X{1,2,4}  input(A1,A2,A3)    output(ZN) ZN=AND3(A1,A2,A3)     ;
+AND4_X{1,2,4}  input(A1,A2,A3,A4) output(ZN) ZN=AND4(A1,A2,A3,A4)  ;
+OR2_X{1,2,4}   input(A1,A2)       output(ZN) ZN=OR2(A1,A2)         ;
+OR3_X{1,2,4}   input(A1,A2,A3)    output(ZN) ZN=OR3(A1,A2,A3)      ;
+OR4_X{1,2,4}   input(A1,A2,A3,A4) output(ZN) ZN=OR4(A1,A2,A3,A4)   ;
+XOR2_X{1,2}    input(A,B)         output(Z)  Z=XOR2(A,B)           ;
+XNOR2_X{1,2}   input(A,B)         output(ZN) ZN=XNOR2(A,B)         ;
+""")
+"""An older NANGATE-variant that uses 'ZN' as output pin names for AND and OR gates.
+"""
+
+
+SAED32 = TechLib(r"""
+NBUFFX{2,4,8,16,32}$ input(A) output(Y) Y=BUF1(A) ;
+AOBUFX{1,2,4}$       input(A) output(Y) Y=BUF1(A) ;
+DELLN{1,2,3}X2$      input(A) output(Y) Y=BUF1(A) ;
+
+INVX{0,1,2,4,8,16,32}$ input(A) output(Y) Y=INV1(A) ;
+AOINVX{1,2,4}$         input(A) output(Y) Y=INV1(A) ;
+IBUFFX{2,4,8,16,32}$   input(A) output(Y) Y=INV1(A) ;
+
+TIEH$ output(Y) Y=__const1__() ;
+TIEL$ output(Y) Y=__const0__() ;
+
+HEAD2X{2,4,8,16,32}$ input(SLEEP) output(SLEEPOUT) SLEEPOUT=BUF1(SLEEP) ;
+HEADX{2,4,8,16,32}$  input(SLEEP) ;
+
+FOOT2X{2,4,8,16,32}$ input(SLEEP) output(SLEEPOUT) SLEEPOUT=BUF1(SLEEP) ;
+FOOTX{2,4,8,16,32}$  input(SLEEP) ;
+
+ANTENNA$ input(INP)   ;
+CLOAD1$  input(A)     ;
+DCAP$                 ;
+DHFILLH2$             ;
+DHFILLHL2$            ;
+DHFILLHLHLS11$        ;
+SHFILL{1,2,3,64,128}$ ;
+
+AND2X{1,2,4}$    input(A1,A2)       output(Y) Y=AND2(A1,A2)        ;
+AND3X{1,2,4}$    input(A1,A2,A3)    output(Y) Y=AND3(A1,A2,A3)     ;
+AND4X{1,2,4}$    input(A1,A2,A3,A4) output(Y) Y=AND4(A1,A2,A3,A4)  ;
+OR2X{1,2,4}$     input(A1,A2)       output(Y) Y=OR2(A1,A2)         ;
+OR3X{1,2,4}$     input(A1,A2,A3)    output(Y) Y=OR3(A1,A2,A3)      ;
+OR4X{1,2,4}$     input(A1,A2,A3,A4) output(Y) Y=OR4(A1,A2,A3,A4)   ;
+XOR2X{1,2}$      input(A1,A2)       output(Y) Y=XOR2(A1,A2)        ;
+XOR3X{1,2}$      input(A1,A2,A3)    output(Y) Y=XOR3(A1,A2,A3)     ;
+NAND2X{0,1,2,4}$ input(A1,A2)       output(Y) Y=NAND2(A1,A2)       ;
+NAND3X{0,1,2,4}$ input(A1,A2,A3)    output(Y) Y=NAND3(A1,A2,A3)    ;
+NAND4X{0,1}$     input(A1,A2,A3,A4) output(Y) Y=NAND4(A1,A2,A3,A4) ;
+NOR2X{0,1,2,4}$  input(A1,A2)       output(Y) Y=NOR2(A1,A2)        ;
+NOR3X{0,1,2,4}$  input(A1,A2,A3)    output(Y) Y=NOR3(A1,A2,A3)     ;
+NOR4X{0,1}$      input(A1,A2,A3,A4) output(Y) Y=NOR4(A1,A2,A3,A4)  ;
+XNOR2X{1,2}$     input(A1,A2)       output(Y) Y=XNOR2(A1,A2)       ;
+XNOR3X{1,2}$     input(A1,A2,A3)    output(Y) Y=XNOR3(A1,A2,A3)    ;
+
+ISOLAND{,AO}X{1,2,4,8}$ input(ISO,D) output(Q) ISOB=NOT1(ISO) Q=AND2(ISOB,D) ;
+ISOLOR{,AO}X{1,2,4,8}$  input(ISO,D) output(Q) Q=OR2(ISO,D)  ;
+
+AO21X{1,2}$  input(A1,A2,A3) output(Y) Y=AO21(A1,A2,A3)  ;
+OA21X{1,2}$  input(A1,A2,A3) output(Y) Y=OA21(A1,A2,A3)  ;
+AOI21X{1,2}$ input(A1,A2,A3) output(Y) Y=AOI21(A1,A2,A3) ;
+OAI21X{1,2}$ input(A1,A2,A3) output(Y) Y=OAI21(A1,A2,A3) ;
+
+AO22X{1,2}$  input(A1,A2,A3,A4) output(Y) Y=AO22(A1,A2,A3,A4)  ;
+OA22X{1,2}$  input(A1,A2,A3,A4) output(Y) Y=OA22(A1,A2,A3,A4)  ;
+AOI22X{1,2}$ input(A1,A2,A3,A4) output(Y) Y=AOI22(A1,A2,A3,A4) ;
+OAI22X{1,2}$ input(A1,A2,A3,A4) output(Y) Y=OAI22(A1,A2,A3,A4) ;
+
+MUX21X{1,2}$ input(A1,A2,S0) output(Y) Y=MUX21(A1,A2,S0) ;
+
+AO221X{1,2}$  input(A1,A2,A3,A4,A5) output(Y) A=AO22(A1,A2,A3,A4) Y=OR2(A5,A)   ;
+OA221X{1,2}$  input(A1,A2,A3,A4,A5) output(Y) A=OA22(A1,A2,A3,A4) Y=AND2(A5,A)  ;
+AOI221X{1,2}$ input(A1,A2,A3,A4,A5) output(Y) A=AO22(A1,A2,A3,A4) Y=NOR2(A5,A)  ;
+OAI221X{1,2}$ input(A1,A2,A3,A4,A5) output(Y) A=OA22(A1,A2,A3,A4) Y=NAND2(A5,A) ;
+
+AO222X{1,2}$ input(A1,A2,A3,A4,A5,A6)  output(Y) A=AO22(A1,A2,A3,A4) Y=AO21(A5,A6,A)  ;
+OA222X{1,2}$ input(A1,A2,A3,A4,A5,A6)  output(Y) A=OA22(A1,A2,A3,A4) Y=OA21(A5,A6,A)  ;
+AOI222X{1,2}$ input(A1,A2,A3,A4,A5,A6) output(Y) A=AO22(A1,A2,A3,A4) Y=AOI21(A5,A6,A) ;
+OAI222X{1,2}$ input(A1,A2,A3,A4,A5,A6) output(Y) A=OA22(A1,A2,A3,A4) Y=OAI21(A5,A6,A) ;
+
+MUX41X{1,2}$ input(A1,A2,A3,A4,S0,S1) output(Y) A=MUX21(A1,A2,S0) B=MUX21(A3,A4,S0) Y=MUX21(A,B,S1) ;
+
+DEC24X{1,2}$ input(A0,A1) output(Y0,Y1,Y2,Y3) A0B=INV1(A0) A1B=INV1(A1) Y0=NOR2(A0,A1) Y1=AND(A0,A1B) Y2=AND(A0B,A1) Y3=AND(A0,A1) ;
+FADDX{1,2}$ input(A,B,CI) output(S,CO) AB=XOR2(A,B) CO=XOR2(AB,CI) S=AO22(AB,CI,A,B) ;
+HADDX{1,2}$ input(A0,B0) output(SO,C1) C1=XOR2(A0,B0) SO=AND2(A0,B0) ;
+
+{,AO}DFFARX{1,2}$ input(D,CLK,RSTB)      output(Q,QN) DR=AND2(D,RSTB) Q=DFF(DR,CLK) QN=INV1(Q) ;
+DFFASRX{1,2}$     input(D,CLK,RSTB,SETB) output(Q,QN) DR=AND2(D,RSTB) SET=INV1(SETB) DRS=OR2(DR,SET) Q=DFF(DRS,CLK) QN=INV1(Q) ;
+DFFASX{1,2}$      input(D,CLK,SETB)      output(Q,QN) SET=INV1(SETB) DS=OR2(D,SET) Q=DFF(DS,CLK) QN=INV1(Q) ;
+DFFSSRX{1,2}$     input(CLK,D,RSTB,SETB) output(Q,QN) DR=AND2(D,RSTB) SET=INV1(SETB) DRS=OR2(DR,SET) Q=DFF(DRS,CLK) QN=INV1(Q) ;
+DFFX{1,2}$        input(D,CLK)           output(Q,QN) Q=DFF(D,CLK) QN=INV1(Q) ;
+
+SDFFARX{1,2}$   input(D,CLK,RSTB,SE,SI)      output(Q,QN) DR=AND2(D,RSTB) DI=MUX21(DR,SI,SE) Q=DFF(DI,CLK) QN=INV1(Q) ;
+SDFFASRSX{1,2}$ input(D,CLK,RSTB,SETB,SE,SI) output(Q,QN,SO) DR=AND2(D,RSTB) SET=INV1(SETB) DRS=OR2(DR,SET) DI=MUX21(DRS,SI,SE) Q=DFF(DI,CLK) QN=INV1(Q) SO=BUF1(Q) ;
+SDFFASRX{1,2}$  input(D,CLK,RSTB,SETB,SE,SI) output(Q,QN) DR=AND2(D,RSTB) SET=INV1(SETB) DRS=OR2(DR,SET) DI=MUX21(DRS,SI,SE) Q=DFF(DI,CLK) QN=INV1(Q) ;
+SDFFASX{1,2}$   input(D,CLK,SETB,SE,SI)      output(Q,QN) SET=INV1(SETB) DS=OR2(D,SET) DI=MUX21(DS,SI,SE) Q=DFF(DI,CLK) QN=INV1(Q) ;
+SDFFSSRX{1,2}$  input(CLK,D,RSTB,SETB,SI,SE) output(Q,QN) DR=AND2(D,RSTB) SET=INV1(SETB) DRS=OR2(DR,SET) DI=MUX21(DRS,SI,SE) Q=DFF(DI,CLK) QN=INV1(Q) ;
+SDFFX{1,2}$     input(D,CLK,SE,SI)           output(Q,QN) DI=MUX21(D,SI,SE) Q=DFF(DI,CLK) QN=INV1(Q) ;
+
+LATCHX{1,2}$ input(D,CLK) output(Q,QN) Q=LATCH(D,CLK) QN=INV1(Q) ;
+""".replace('$','_RVT'))
+"""The SAED 32nm educational technology library.
+It defines all cells except: negative-edge flip-flops, tri-state, latches, clock gating, level shifters
+"""
+
+
+SAED90 = TechLib(r"""
+NBUFFX{2,4,8,16,32}$ input(INP) output(Z) Z=BUF1(INP) ;
+AOBUFX{1,2,4}$       input(INP) output(Z) Z=BUF1(INP) ;
+DELLN{1,2,3}X2$      input(INP) output(Z)Z=BUF1(INP) ;
+
+INVX{0,1,2,4,8,16,32}$ input(INP) output(ZN) ZN=INV1(INP) ;
+AOINVX{1,2,4}$         input(INP) output(ZN) ZN=INV1(INP) ;
+IBUFFX{2,4,8,16,32}$   input(INP) output(ZN) ZN=INV1(INP) ;
+
+TIEH$ output(Z)   Z=__const1__() ;
+TIEL$ output(ZN) ZN=__const0__() ;
+
+HEAD2X{2,4,8,16,32}$ input(SLEEP) output(SLEEPOUT) SLEEPOUT=BUF1(SLEEP) ;
+HEADX{2,4,8,16,32}$  input(SLEEP) ;
+
+ANTENNA$ input(INP)   ;
+CLOAD1$  input(INP)   ;
+DCAP$                 ;
+DHFILL{HLH,LHL}2      ;
+DHFILLHLHLS11$        ;
+SHFILL{1,2,3,64,128}$ ;
+
+AND2X{1,2,4}$    input(IN1,IN2)         output(Q)   Q=AND2(IN1,IN2)          ;
+AND3X{1,2,4}$    input(IN1,IN2,IN3)     output(Q)   Q=AND3(IN1,IN2,IN3)      ;
+AND4X{1,2,4}$    input(IN1,IN2,IN3,IN4) output(Q)   Q=AND4(IN1,IN2,IN3,IN4)  ;
+OR2X{1,2,4}$     input(IN1,IN2)         output(Q)   Q=OR2(IN1,IN2)           ;
+OR3X{1,2,4}$     input(IN1,IN2,IN3)     output(Q)   Q=OR3(IN1,IN2,IN3)       ;
+OR4X{1,2,4}$     input(IN1,IN2,IN3,IN4) output(Q)   Q=OR4(IN1,IN2,IN3,IN4)   ;
+XOR2X{1,2}$      input(IN1,IN2)         output(Q)   Q=XOR2(IN1,IN2)          ;
+XOR3X{1,2}$      input(IN1,IN2,IN3)     output(Q)   Q=XOR3(IN1,IN2,IN3)      ;
+NAND2X{0,1,2,4}$ input(IN1,IN2)         output(QN) QN=NAND2(IN1,IN2)         ;
+NAND3X{0,1,2,4}$ input(IN1,IN2,IN3)     output(QN) QN=NAND3(IN1,IN2,IN3)     ;
+NAND4X{0,1}$     input(IN1,IN2,IN3,IN4) output(QN) QN=NAND4(IN1,IN2,IN3,IN4) ;
+NOR2X{0,1,2,4}$  input(IN1,IN2)         output(QN) QN=NOR2(IN1,IN2)          ;
+NOR3X{0,1,2,4}$  input(IN1,IN2,IN3)     output(QN) QN=NOR3(IN1,IN2,IN3)      ;
+NOR4X{0,1}$      input(IN1,IN2,IN3,IN4) output(QN) QN=NOR4(IN1,IN2,IN3,IN4)  ;
+XNOR2X{1,2}$     input(IN1,IN2)         output(Q)   Q=XNOR2(IN1,IN2)         ;
+XNOR3X{1,2}$     input(IN1,IN2,IN3)     output(Q)   Q=XNOR3(IN1,IN2,IN3)     ;
+
+ISOLAND{,AO}X{1,2,4,8}$ input(ISO,D) output(Q) ISOB=NOT1(ISO) Q=AND2(ISOB,D) ;
+ISOLOR{,AO}X{1,2,4,8}$  input(ISO,D) output(Q) Q=OR2(ISO,D)  ;
+
+AO21X{1,2}$  input(IN1,IN2,IN3) output(Q)   Q=AO21(IN1,IN2,IN3)  ;
+OA21X{1,2}$  input(IN1,IN2,IN3) output(Q)   Q=OA21(IN1,IN2,IN3)  ;
+AOI21X{1,2}$ input(IN1,IN2,IN3) output(QN) QN=AOI21(IN1,IN2,IN3) ;
+OAI21X{1,2}$ input(IN1,IN2,IN3) output(QN) QN=OAI21(IN1,IN2,IN3) ;
+
+AO22X{1,2}$  input(IN1,IN2,IN3,IN4) output(Q)   Q=AO22(IN1,IN2,IN3,IN4)  ;
+OA22X{1,2}$  input(IN1,IN2,IN3,IN4) output(Q)   Q=OA22(IN1,IN2,IN3,IN4)  ;
+AOI22X{1,2}$ input(IN1,IN2,IN3,IN4) output(QN) QN=AOI22(IN1,IN2,IN3,IN4) ;
+OAI22X{1,2}$ input(IN1,IN2,IN3,IN4) output(QN) QN=OAI22(IN1,IN2,IN3,IN4) ;
+
+MUX21X{1,2}$ input(IN1,IN2,S) output(Q) Q=MUX21(IN1,IN2,S) ;
+
+AO221X{1,2}$  input(IN1,IN2,IN3,IN4,IN5) output(Q)  A=AO22(IN1,IN2,IN3,IN4)  Q=OR2(IN5,A)   ;
+OA221X{1,2}$  input(IN1,IN2,IN3,IN4,IN5) output(Q)  A=OA22(IN1,IN2,IN3,IN4)  Q=AND2(IN5,A)  ;
+AOI221X{1,2}$ input(IN1,IN2,IN3,IN4,IN5) output(QN) A=AO22(IN1,IN2,IN3,IN4) QN=NOR2(IN5,A)  ;
+OAI221X{1,2}$ input(IN1,IN2,IN3,IN4,IN5) output(QN) A=OA22(IN1,IN2,IN3,IN4) QN=NAND2(IN5,A) ;
+
+AO222X{1,2}$ input(IN1,IN2,IN3,IN4,IN5,IN6)  output(Q)  A=AO22(IN1,IN2,IN3,IN4)  Q=AO21(IN5,IN6,A)  ;
+OA222X{1,2}$ input(IN1,IN2,IN3,IN4,IN5,IN6)  output(Q)  A=OA22(IN1,IN2,IN3,IN4)  Q=OA21(IN5,IN6,A)  ;
+AOI222X{1,2}$ input(IN1,IN2,IN3,IN4,IN5,IN6) output(QN) A=AO22(IN1,IN2,IN3,IN4) QN=AOI21(IN5,IN6,A) ;
+OAI222X{1,2}$ input(IN1,IN2,IN3,IN4,IN5,IN6) output(QN) A=OA22(IN1,IN2,IN3,IN4) QN=OAI21(IN5,IN6,A) ;
+
+MUX41X{1,2}$ input(IN1,IN2,IN3,IN4,S0,S1) output(Q) A=MUX21(IN1,IN2,S0) B=MUX21(IN3,IN4,S0) Q=MUX21(A,B,S1) ;
+
+DEC24X{1,2}$ input(IN1,IN2) output(Q0,Q1,Q2,Q3) IN1B=INV1(IN1) IN2B=INV1(IN2) Q0=NOR2(IN1,IN2) Q1=AND(IN1,IN2B) Q2=AND(IN1B,IN2) Q3=AND(IN1,IN2) ;
+FADDX{1,2}$ input(A,B,CI) output(S,CO) AB=XOR2(A,B) CO=XOR2(AB,CI) S=AO22(AB,CI,A,B) ;
+HADDX{1,2}$ input(A0,B0) output(SO,C1) C1=XOR2(A0,B0) SO=AND2(A0,B0) ;
+
+{,AO}DFFARX{1,2}$ input(D,CLK,RSTB)      output(Q,QN) DR=AND2(D,RSTB) Q=DFF(DR,CLK) QN=INV1(Q) ;
+DFFASRX{1,2}$     input(D,CLK,RSTB,SETB) output(Q,QN) DR=AND2(D,RSTB) SET=INV1(SETB) DRS=OR2(DR,SET) Q=DFF(DRS,CLK) QN=INV1(Q) ;
+DFFASX{1,2}$      input(D,CLK,SETB)      output(Q,QN) SET=INV1(SETB) DS=OR2(D,SET) Q=DFF(DS,CLK) QN=INV1(Q) ;
+DFFSSRX{1,2}$     input(CLK,D,RSTB,SETB) output(Q,QN) DR=AND2(D,RSTB) SET=INV1(SETB) DRS=OR2(DR,SET) Q=DFF(DRS,CLK) QN=INV1(Q) ;
+DFFX{1,2}$        input(D,CLK)           output(Q,QN) Q=DFF(D,CLK) QN=INV1(Q) ;
+
+SDFFARX{1,2}$   input(D,CLK,RSTB,SE,SI)      output(Q,QN) DR=AND2(D,RSTB) DI=MUX21(DR,SI,SE) Q=DFF(DI,CLK) QN=INV1(Q) ;
+SDFFASRSX{1,2}$ input(D,CLK,RSTB,SETB,SE,SI) output(Q,QN,S0) DR=AND2(D,RSTB) SET=INV1(SETB) DRS=OR2(DR,SET) DI=MUX21(DRS,SI,SE) Q=DFF(DI,CLK) QN=INV1(Q) S0=BUF1(Q) ;
+SDFFASRX{1,2}$  input(D,CLK,RSTB,SETB,SE,SI) output(Q,QN) DR=AND2(D,RSTB) SET=INV1(SETB) DRS=OR2(DR,SET) DI=MUX21(DRS,SI,SE) Q=DFF(DI,CLK) QN=INV1(Q) ;
+SDFFASX{1,2}$   input(D,CLK,SETB,SE,SI)      output(Q,QN) SET=INV1(SETB) DS=OR2(D,SET) DI=MUX21(DS,SI,SE) Q=DFF(DI,CLK) QN=INV1(Q) ;
+SDFFSSRX{1,2}$  input(CLK,D,RSTB,SETB,SI,SE) output(Q,QN) DR=AND2(D,RSTB) SET=INV1(SETB) DRS=OR2(DR,SET) DI=MUX21(DRS,SI,SE) Q=DFF(DI,CLK) QN=INV1(Q) ;
+SDFFX{1,2}$     input(D,CLK,SE,SI)           output(Q,QN) DI=MUX21(D,SI,SE) Q=DFF(DI,CLK) QN=INV1(Q) ;
+
+LATCHX{1,2}$ input(D,CLK) output(Q,QN) Q=LATCH(D,CLK) QN=INV1(Q) ;
+""".replace('$','{,_LVT,_HVT}'))
+"""The SAED 90nm educational technology library.
+It defines all cells except: negative-edge flip-flops, tri-state, latches, clock gating, level shifters
+"""
diff --git a/src/kyupy/verilog.py b/src/kyupy/verilog.py
index e8a20bd..1c6a0d1 100644
--- a/src/kyupy/verilog.py
+++ b/src/kyupy/verilog.py
@@ -1,16 +1,16 @@
 """A simple and incomplete parser for Verilog files.
 
 The main purpose of this parser is to load synthesized, non-hierarchical (flat) gate-level netlists.
-It supports only a very limited subset of Verilog.
+It supports only a subset of Verilog.
 """
 
 from collections import namedtuple
 
-from lark import Lark, Transformer
+from lark import Lark, Transformer, Tree
 
 from . import log, readtext
 from .circuit import Circuit, Node, Line
-from .techlib import TechLib
+from .techlib import NANGATE
 
 Instantiation = namedtuple('Instantiation', ['type', 'name', 'pins'])
 
@@ -35,51 +35,89 @@ class SignalDeclaration:
 
 
 class VerilogTransformer(Transformer):
-    def __init__(self, branchforks=False, tlib=TechLib()):
+    def __init__(self, branchforks=False, tlib=NANGATE):
         super().__init__()
-        self._signal_declarations = {}
         self.branchforks = branchforks
         self.tlib = tlib
 
     @staticmethod
     def name(args):
         s = args[0].value
-        if s[0] == '\\':
-            s = s[1:-1]
-        return s
+        return s[1:-1] if s[0] == '\\' else s
+
+    @staticmethod
+    def namedpin(args):
+        return tuple(args) if len(args) > 1 else (args[0], None)
 
     @staticmethod
     def instantiation(args):
-        return Instantiation(args[0], args[1],
-                             dict((pin.children[0],
-                                   pin.children[1]) for pin in args[2:] if len(pin.children) > 1))
+        pinmap = {}
+        for idx, pin in enumerate(args[2:]):
+            p = pin.children[0]
+            if isinstance(p, tuple):  # named pin
+                if p[1] is not None:
+                    pinmap[p[0]] = p[1]
+            else:  # unnamed pin
+                pinmap[idx] = p
+        return Instantiation(args[0], args[1], pinmap)
 
     def range(self, args):
         left = int(args[0].value)
-        right = int(args[1].value)
+        right = int(args[1].value) if len(args) > 1 else left
         return range(left, right+1) if left <= right else range(left, right-1, -1)
 
+    def sigsel(self, args):
+        if len(args) > 1 and isinstance(args[1], range):
+            l = [f'{args[0]}[{i}]' for i in args[1]]
+            return l if len(l) > 1 else l[0]
+        elif "'" in args[0]:
+            width, rest = args[0].split("'")
+            width = int(width)
+            base, const = rest[0], rest[1:]
+            const = int(const, {'b': 2, 'd':10, 'h':16}[base.lower()])
+            l = []
+            for _ in range(width):
+                l.insert(0, "1'b1" if (const & 1) else "1'b0")
+                const >>= 1
+            return l if len(l) > 1 else l[0]
+        else:
+            return args[0]
+
+    def concat(self, args):
+        sigs = []
+        for a in args:
+            if isinstance(a, list):
+                sigs += a
+            else:
+                sigs.append(a)
+        return sigs
+
     def declaration(self, kind, args):
         rnge = None
         if isinstance(args[0], range):
             rnge = args[0]
             args = args[1:]
-        for sd in [SignalDeclaration(kind, signal, rnge) for signal in args]:
-            if kind != 'wire' or sd.basename not in self._signal_declarations:
-                self._signal_declarations[sd.basename] = sd
+        return [SignalDeclaration(kind, signal, rnge) for signal in args]
 
-    def input(self, args): self.declaration("input", args)
-    def output(self, args): self.declaration("output", args)
-    def inout(self, args): self.declaration("input", args)  # just treat as input
-    def wire(self, args): self.declaration("wire", args)
+    def input(self, args): return self.declaration("input", args)
+    def output(self, args): return self.declaration("output", args)
+    def inout(self, args): return self.declaration("input", args)  # just treat as input
+    def wire(self, args): return self.declaration("wire", args)
 
     def module(self, args):
         c = Circuit(args[0])
         positions = {}
         pos = 0
         const_count = 0
+        sig_decls = {}
+        for decls in args[2:]:  # pass 0: collect signal declarations
+            if isinstance(decls, list):
+                if len(decls) > 0 and isinstance(decls[0], SignalDeclaration):
+                    for decl in decls:
+                        if decl.basename not in sig_decls or sig_decls[decl.basename].kind == 'wire':
+                            sig_decls[decl.basename] = decl
         for intf_sig in args[1].children:
-            for name in self._signal_declarations[intf_sig].names:
+            for name in sig_decls[intf_sig].names:
                 positions[name] = pos
                 pos += 1
         assignments = []
@@ -88,28 +126,47 @@ class VerilogTransformer(Transformer):
                 n = Node(c, stmt.name, kind=stmt.type)
                 for p, s in stmt.pins.items():
                     if self.tlib.pin_is_output(n.kind, p):
+                        if s in sig_decls:
+                            s = sig_decls[s].names
+                            if isinstance(s, list) and len(s) == 1:
+                                s = s[0]
                         Line(c, (n, self.tlib.pin_index(stmt.type, p)), Node(c, s))
-            elif stmt is not None and stmt.data == 'assign':
+            elif hasattr(stmt, 'data') and stmt.data == 'assign':
                 assignments.append((stmt.children[0], stmt.children[1]))
-        for sd in self._signal_declarations.values():
+        for sd in sig_decls.values():
             if sd.kind == 'output' or sd.kind == 'input':
                 for name in sd.names:
                     n = Node(c, name, kind=sd.kind)
                     if name in positions:
-                        c.interface[positions[name]] = n
+                        c.io_nodes[positions[name]] = n
                     if sd.kind == 'input':
                         Line(c, n, Node(c, name))
-        for s1, s2 in assignments:  # pass 1.5: process signal assignments
-            if s1 in c.forks:
-                assert s2 not in c.forks, 'assignment between two driven signals'
-                Line(c, c.forks[s1], Node(c, s2))
-            elif s2 in c.forks:
-                assert s1 not in c.forks, 'assignment between two driven signals'
-                Line(c, c.forks[s2], Node(c, s1))
-            elif s2.startswith("1'b"):
-                cnode = Node(c, f'__const{s2[3]}_{const_count}__', f'__const{s2[3]}__')
-                const_count += 1
-                Line(c, cnode, Node(c, s1))
+        for target, source in assignments:  # pass 1.5: process signal assignments
+            target_sigs = []
+            if not isinstance(target, list): target = [target]
+            for s in target:
+                if s in sig_decls:
+                    target_sigs += sig_decls[s].names
+                else:
+                    target_sigs.append(s)
+            source_sigs = []
+            if not isinstance(source, list): source = [source]
+            for s in source:
+                if s in sig_decls:
+                    source_sigs += sig_decls[s].names
+                else:
+                    source_sigs.append(s)
+            for t, s in zip(target_sigs, source_sigs):
+                if t in c.forks:
+                    assert s not in c.forks, 'assignment between two driven signals'
+                    Line(c, c.forks[t], Node(c, s))
+                elif s in c.forks:
+                    assert t not in c.forks, 'assignment between two driven signals'
+                    Line(c, c.forks[s], Node(c, t))
+                elif s.startswith("1'b"):
+                    cnode = Node(c, f'__const{s[3]}_{const_count}__', f'__const{s[3]}__')
+                    const_count += 1
+                    Line(c, cnode, Node(c, t))
         for stmt in args[2:]:  # pass 2: connect signals to readers
             if isinstance(stmt, Instantiation):
                 for p, s in stmt.pins.items():
@@ -122,28 +179,34 @@ class VerilogTransformer(Transformer):
                         s = cname
                         Line(c, cnode, Node(c, s))
                     if s not in c.forks:
-                        log.warn(f'Signal not driven: {s}')
-                        Node(c, s)  # generate fork here
+                        if f'{s}[0]' in c.forks:  # actually a 1-bit bus?
+                            s = f'{s}[0]'
+                        else:
+                            log.warn(f'Signal not driven: {s}')
+                            Node(c, s)  # generate fork here
                     fork = c.forks[s]
                     if self.branchforks:
                         branchfork = Node(c, fork.name + "~" + n.name + "/" + p)
                         Line(c, fork, branchfork)
                         fork = branchfork
                     Line(c, fork, (n, self.tlib.pin_index(stmt.type, p)))
-        for sd in self._signal_declarations.values():
+        for sd in sig_decls.values():
             if sd.kind == 'output':
                 for name in sd.names:
                     if name not in c.forks:
-                        log.warn(f'Output not driven: {name}')
-                    else:
-                        Line(c, c.forks[name], c.cells[name])
+                        if f'{name}[0]' in c.forks:  # actually a 1-bit bus?
+                            name = f'{name}[0]'
+                        else:
+                            log.warn(f'Output not driven: {name}')
+                            continue
+                    Line(c, c.forks[name], c.cells[name])
         return c
 
     @staticmethod
     def start(args): return args[0] if len(args) == 1 else args
 
 
-GRAMMAR = """
+GRAMMAR = r"""
     start: (module)*
     module: "module" name parameters ";" (_statement)* "endmodule"
     parameters: "(" [ _namelist ] ")"
@@ -153,36 +216,45 @@ GRAMMAR = """
     inout: "inout" range? _namelist ";"
     tri: "tri" range? _namelist ";"
     wire: "wire" range? _namelist ";"
-    assign: "assign" name "=" name ";"
+    assign: "assign" sigsel "=" sigsel ";"
     instantiation: name name "(" [ pin ( "," pin )* ] ")" ";"
-    pin: "." name "(" name? ")"
-    range: "[" /[0-9]+/ ":" /[0-9]+/ "]"
-
+    pin: namedpin | sigsel
+    namedpin: "." name "(" sigsel? ")"
+    range: "[" /[0-9]+/ (":" /[0-9]+/)? "]"
+    sigsel: name range? | concat
+    concat: "{" sigsel ( "," sigsel )*  "}"
     _namelist: name ( "," name )*
-    name: ( /[a-z_][a-z0-9_\\[\\]]*/i | /\\\\[^\\t \\r\\n]+[\\t \\r\\n](\\[[0-9]+\\])?/i | /1'b0/i | /1'b1/i )
-    COMMENT: "//" /[^\\n]*/
-    %ignore ( /\\r?\\n/ | COMMENT )+
-    %ignore /[\\t \\f]+/
+    name: ( /[a-z_][a-z0-9_]*/i | /\\[^\t \r\n]+[\t \r\n]/i | /[0-9]+'[bdh][0-9a-f]+/i )
+    %import common.NEWLINE
+    COMMENT: /\/\*(\*(?!\/)|[^*])*\*\// | /\(\*(\*(?!\))|[^*])*\*\)/ |  "//" /(.)*/ NEWLINE
+    %ignore ( /\r?\n/ | COMMENT )+
+    %ignore /[\t \f]+/
     """
 
 
-def parse(text, *, branchforks=False, tlib=TechLib()):
+def parse(text, tlib=NANGATE, branchforks=False):
     """Parses the given ``text`` as Verilog code.
 
     :param text: A string with Verilog code.
+    :param tlib: A technology library object that defines all known cells.
+    :type tlib: :py:class:`~kyupy.techlib.TechLib`
     :param branchforks: If set to ``True``, the returned circuit will include additional `forks` on each fanout branch.
         These forks are needed to correctly annotate interconnect delays
-        (see :py:func:`kyupy.sdf.DelayFile.annotation`).
-    :param tlib: A technology library object that provides pin name mappings.
-    :type tlib: :py:class:`~kyupy.techlib.TechLib`
-    :return: A :class:`~kyupy.circuit.Circuit` object.
+        (see :py:func:`~kyupy.sdf.DelayFile.interconnects()`).
+    :return: A :py:class:`~kyupy.circuit.Circuit` object.
     """
     return Lark(GRAMMAR, parser="lalr", transformer=VerilogTransformer(branchforks, tlib)).parse(text)
 
 
-def load(file, *args, **kwargs):
+def load(file, tlib=NANGATE, branchforks=False):
     """Parses the contents of ``file`` as Verilog code.
 
-    The given file may be gzip compressed. Takes the same keyword arguments as :py:func:`parse`.
+    :param file: A file name or a file handle. Files with `.gz`-suffix are decompressed on-the-fly.
+    :param tlib: A technology library object that defines all known cells.
+    :type tlib: :py:class:`~kyupy.techlib.TechLib`
+    :param branchforks: If set to ``True``, the returned circuit will include additional `forks` on each fanout branch.
+        These forks are needed to correctly annotate interconnect delays
+        (see :py:func:`~kyupy.sdf.DelayFile.interconnects()`).
+    :return: A :py:class:`~kyupy.circuit.Circuit` object.
     """
-    return parse(readtext(file), *args, **kwargs)
+    return parse(readtext(file), tlib, branchforks)
diff --git a/src/kyupy/wave_sim.py b/src/kyupy/wave_sim.py
index 763e39f..93a107f 100644
--- a/src/kyupy/wave_sim.py
+++ b/src/kyupy/wave_sim.py
@@ -13,11 +13,10 @@ Two simulators are available: :py:class:`WaveSim` runs on the CPU, and the deriv
 """
 
 import math
-from bisect import bisect, insort_left
 
 import numpy as np
 
-from . import numba, cuda, hr_bytes
+from . import numba, cuda, sim, cdiv
 
 
 TMAX = np.float32(2 ** 127)
@@ -29,762 +28,428 @@ TMIN = np.float32(-2 ** 127)
 """A large negative 32-bit floating point value used at the beginning of waveforms that start with logic-1."""
 
 
-class Heap:
-    def __init__(self):
-        self.chunks = dict()  # map start location to chunk size
-        self.released = list()  # chunks that were released
-        self.current_size = 0
-        self.max_size = 0
-
-    def alloc(self, size):
-        for idx, loc in enumerate(self.released):
-            if self.chunks[loc] == size:
-                del self.released[idx]
-                return loc
-            if self.chunks[loc] > size:  # split chunk
-                chunksize = self.chunks[loc]
-                self.chunks[loc] = size
-                self.chunks[loc + size] = chunksize - size
-                self.released[idx] = loc + size  # move released pointer: loc -> loc+size
-                return loc
-        # no previously released chunk; make new one
-        loc = self.current_size
-        self.chunks[loc] = size
-        self.current_size += size
-        self.max_size = max(self.max_size, self.current_size)
-        return loc
-
-    def free(self, loc):
-        size = self.chunks[loc]
-        if loc + size == self.current_size:  # end of managed area, remove chunk
-            del self.chunks[loc]
-            self.current_size -= size
-            # check and remove prev chunk if free
-            if len(self.released) > 0:
-                prev = self.released[-1]
-                if prev + self.chunks[prev] == self.current_size:
-                    chunksize = self.chunks[prev]
-                    del self.chunks[prev]
-                    del self.released[-1]
-                    self.current_size -= chunksize
-            return
-        released_idx = bisect(self.released, loc)
-        if released_idx < len(self.released) and loc + size == self.released[released_idx]:  # next chunk is free, merge
-            chunksize = size + self.chunks[loc + size]
-            del self.chunks[loc + size]
-            self.chunks[loc] = chunksize
-            size = self.chunks[loc]
-            self.released[released_idx] = loc
-        else:
-            insort_left(self.released, loc)  # put in a new release
-        if released_idx > 0:  # check if previous chunk is free
-            prev = self.released[released_idx - 1]
-            if prev + self.chunks[prev] == loc:  # previous chunk is adjacent to freed one, merge
-                chunksize = size + self.chunks[prev]
-                del self.chunks[loc]
-                self.chunks[prev] = chunksize
-                del self.released[released_idx]
-
-    def __repr__(self):
-        r = []
-        for loc in sorted(self.chunks.keys()):
-            size = self.chunks[loc]
-            released_idx = bisect(self.released, loc)
-            is_released = released_idx > 0 and len(self.released) > 0 and self.released[released_idx - 1] == loc
-            r.append(f'{loc:5d}: {"free" if is_released else "used"} {size}')
-        return "\n".join(r)
-
-
-class WaveSim:
+class WaveSim(sim.SimOps):
     """A waveform-based combinational logic timing simulator running on CPU.
 
     :param circuit: The circuit to simulate.
-    :param timing: The timing annotation of the circuit (see :py:func:`kyupy.sdf.DelayFile.annotation` for details)
+    :param delays: One or more delay annotations for the circuit (see :py:func:`kyupy.sdf.DelayFile.iopaths` for details).
+        Each parallel simulation may use the same delays or different delays, depending on the use-case (see :py:attr:`simctl_int`).
     :param sims: The number of parallel simulations.
-    :param wavecaps: The number of floats available in each waveform. Waveforms are encoding the signal switching
-        history by storing transition times. The waveform capacity roughly corresponds to the number of transitions
+    :param c_caps: The number of floats available in each waveform. Values must be positive and a multiple of 4.
+        Waveforms encode the signal switching history by storing transition times.
+        The waveform capacity roughly corresponds to the number of transitions
         that can be stored. A capacity of ``n`` can store at least ``n-2`` transitions. If more transitions are
         generated during simulation, the latest glitch is removed (freeing up two transition times) and an overflow
         flag is set. If an integer is given, all waveforms are set to that same capacity. With an array of length
-        ``len(circuit.lines)`` the capacity can be controlled for each intermediate waveform individually.
+        ``len(circuit.lines)`` the capacity is set individually for each intermediate waveform.
+    :param a_ctrl: An integer array controlling the accumulation of weighted switching activity during simulation.
+        Its shape must be ``(len(circuit.lines), 3)``. ``a_ctrl[...,0]`` is the index into the accumulation buffer, -1 means ignore.
+        ``a_ctrl[...,1]`` is the (integer) weight for a rising transition, ``a_ctrl[...,2]`` is the (integer) weight for
+        a falling transition. The accumulation buffer (:py:attr:`abuf`) is allocated automatically if ``a_ctrl`` is given.
+    :param c_reuse: If enabled, memory of intermediate signal waveforms will be re-used. This greatly reduces
+        memory footprint, but intermediate signal waveforms may become unaccessible after a propagation.
     :param strip_forks: If enabled, the simulator will not evaluate fork nodes explicitly. This saves simulation time
-        by reducing the number of nodes to simulate, but (interconnect) delay annotations of lines read by fork nodes
+        and memory by reducing the number of nodes to simulate, but (interconnect) delay annotations of lines read by fork nodes
         are ignored.
-    :param keep_waveforms: If disabled, memory of intermediate signal waveforms will be re-used. This greatly reduces
-        memory footprint, but intermediate signal waveforms become unaccessible after a propagation.
     """
-    def __init__(self, circuit, timing, sims=8, wavecaps=16, strip_forks=False, keep_waveforms=True):
-        self.circuit = circuit
+    def __init__(self, circuit, delays, sims=8, c_caps=16, a_ctrl=None, c_reuse=False, strip_forks=False):
+        super().__init__(circuit, c_caps=c_caps, c_caps_min=4, a_ctrl=a_ctrl, c_reuse=c_reuse, strip_forks=strip_forks)
         self.sims = sims
-        self.overflows = 0
-        self.interface = list(circuit.interface) + [n for n in circuit.nodes if 'dff' in n.kind.lower()]
-
-        self.lst_eat_valid = False
-
-        self.cdata = np.zeros((len(self.interface), sims, 7), dtype='float32')
-                     
-        self.sdata = np.zeros((sims, 4), dtype='float32')
-        self.sdata[...,0] = 1.0
-
-        if isinstance(wavecaps, int):
-            wavecaps = [wavecaps] * len(circuit.lines)
-
-        intf_wavecap = 4  # sufficient for storing only 1 transition.
-
-        # indices for state allocation table (sat)
-        self.zero_idx = len(circuit.lines)
-        self.tmp_idx = self.zero_idx + 1
-        self.ppi_offset = self.tmp_idx + 1
-        self.ppo_offset = self.ppi_offset + len(self.interface)
-        self.sat_length = self.ppo_offset + len(self.interface)
-
-        # translate circuit structure into self.ops
-        ops = []
-        interface_dict = dict((n, i) for i, n in enumerate(self.interface))
-        for n in circuit.topological_order():
-            if n in interface_dict:
-                inp_idx = self.ppi_offset + interface_dict[n]
-                if len(n.outs) > 0 and n.outs[0] is not None:  # first output of a PI/PPI
-                    ops.append((0b1010, n.outs[0].index, inp_idx, self.zero_idx))
-                if 'dff' in n.kind.lower():  # second output of DFF is inverted
-                    if len(n.outs) > 1 and n.outs[1] is not None:
-                        ops.append((0b0101, n.outs[1].index, inp_idx, self.zero_idx))
-                else:  # if not DFF, no output is inverted.
-                    for o_line in n.outs[1:]:
-                        if o_line is not None:
-                            ops.append((0b1010, o_line.index, inp_idx, self.zero_idx))
-            else:  # regular node, not PI/PPI or PO/PPO
-                o0_idx = n.outs[0].index if len(n.outs) > 0 and n.outs[0] is not None else self.tmp_idx
-                i0_idx = n.ins[0].index if len(n.ins) > 0 and n.ins[0] is not None else self.zero_idx
-                i1_idx = n.ins[1].index if len(n.ins) > 1 and n.ins[1] is not None else self.zero_idx
-                kind = n.kind.lower()
-                if kind == '__fork__':
-                    if not strip_forks:
-                        for o_line in n.outs:
-                            if o_line is not None:
-                                ops.append((0b1010, o_line.index, i0_idx, i1_idx))
-                elif kind.startswith('nand'):
-                    ops.append((0b0111, o0_idx, i0_idx, i1_idx))
-                elif kind.startswith('nor'):
-                    ops.append((0b0001, o0_idx, i0_idx, i1_idx))
-                elif kind.startswith('and'):
-                    ops.append((0b1000, o0_idx, i0_idx, i1_idx))
-                elif kind.startswith('or'):
-                    ops.append((0b1110, o0_idx, i0_idx, i1_idx))
-                elif kind.startswith('xor'):
-                    ops.append((0b0110, o0_idx, i0_idx, i1_idx))
-                elif kind.startswith('xnor'):
-                    ops.append((0b1001, o0_idx, i0_idx, i1_idx))
-                elif kind.startswith('not') or kind.startswith('inv') or kind.startswith('ibuf'):
-                    ops.append((0b0101, o0_idx, i0_idx, i1_idx))
-                elif kind.startswith('buf') or kind.startswith('nbuf'):
-                    ops.append((0b1010, o0_idx, i0_idx, i1_idx))
-                elif kind.startswith('__const1__') or kind.startswith('tieh'):
-                    ops.append((0b0101, o0_idx, i0_idx, i1_idx))
-                elif kind.startswith('__const0__') or kind.startswith('tiel'):
-                    ops.append((0b1010, o0_idx, i0_idx, i1_idx))
-                else:
-                    print('unknown gate type', kind)
-        self.ops = np.asarray(ops, dtype='int32')
-
-        # create a map from fanout lines to stem lines for fork stripping
-        stems = np.zeros(self.sat_length, dtype='int32') - 1  # default to -1: 'no fanout line'
-        if strip_forks:
-            for f in circuit.forks.values():
-                prev_line = f.ins[0]
-                while prev_line.driver.kind == '__fork__':
-                    prev_line = prev_line.driver.ins[0]
-                stem_idx = prev_line.index
-                for ol in f.outs:
-                    stems[ol] = stem_idx
-
-        # calculate level (distance from PI/PPI) and reference count for each line
-        levels = np.zeros(self.sat_length, dtype='int32')
-        ref_count = np.zeros(self.sat_length, dtype='int32')
-        level_starts = [0]
-        current_level = 1
-        for i, op in enumerate(self.ops):
-            # if we fork-strip, always take the stems for determining fan-in level
-            i0_idx = stems[op[2]] if stems[op[2]] >= 0 else op[2]
-            i1_idx = stems[op[3]] if stems[op[3]] >= 0 else op[3]
-            if levels[i0_idx] >= current_level or levels[i1_idx] >= current_level:
-                current_level += 1
-                level_starts.append(i)
-            levels[op[1]] = current_level  # set level of the output line
-            ref_count[i0_idx] += 1
-            ref_count[i1_idx] += 1
-        self.level_starts = np.asarray(level_starts, dtype='int32')
-        self.level_stops = np.asarray(level_starts[1:] + [len(self.ops)], dtype='int32')
-
-        # state allocation table. maps line and interface indices to self.state memory locations
-        self.sat = np.zeros((self.sat_length, 3), dtype='int')
-        self.sat[:, 0] = -1
-
-        h = Heap()
-
-        # allocate and keep memory for special fields
-        self.sat[self.zero_idx] = h.alloc(intf_wavecap), intf_wavecap, 0
-        self.sat[self.tmp_idx] = h.alloc(intf_wavecap), intf_wavecap, 0
-        ref_count[self.zero_idx] += 1
-        ref_count[self.tmp_idx] += 1
-
-        # allocate and keep memory for PI/PPI, keep memory for PO/PPO (allocated later)
-        for i, n in enumerate(self.interface):
-            if len(n.outs) > 0:
-                self.sat[self.ppi_offset + i] = h.alloc(intf_wavecap), intf_wavecap, 0
-                ref_count[self.ppi_offset + i] += 1
-            if len(n.ins) > 0:
-                i0_idx = stems[n.ins[0]] if stems[n.ins[0]] >= 0 else n.ins[0]
-                ref_count[i0_idx] += 1
-
-        # allocate memory for the rest of the circuit
-        for op_start, op_stop in zip(self.level_starts, self.level_stops):
-            free_list = []
-            for op in self.ops[op_start:op_stop]:
-                # if we fork-strip, always take the stems
-                i0_idx = stems[op[2]] if stems[op[2]] >= 0 else op[2]
-                i1_idx = stems[op[3]] if stems[op[3]] >= 0 else op[3]
-                ref_count[i0_idx] -= 1
-                ref_count[i1_idx] -= 1
-                if ref_count[i0_idx] <= 0: free_list.append(self.sat[i0_idx, 0])
-                if ref_count[i1_idx] <= 0: free_list.append(self.sat[i1_idx, 0])
-                o_idx = op[1]
-                cap = wavecaps[o_idx]
-                self.sat[o_idx] = h.alloc(cap), cap, 0
-            if not keep_waveforms:
-                for loc in free_list:
-                    h.free(loc)
-
-        # copy memory location and capacity from stems to fanout lines
-        for lidx, stem in enumerate(stems):
-            if stem >= 0:  # if at a fanout line
-                self.sat[lidx] = self.sat[stem]
-
-        # copy memory location to PO/PPO area
-        for i, n in enumerate(self.interface):
-            if len(n.ins) > 0:
-                self.sat[self.ppo_offset + i] = self.sat[n.ins[0]]
-
-        # pad timing
-        self.timing = np.zeros((self.sat_length, 2, 2))
-        self.timing[:len(timing)] = timing
-
-        # allocate self.state
-        self.state = np.zeros((h.max_size, sims), dtype='float32') + TMAX
-
-        m1 = np.array([2 ** x for x in range(7, -1, -1)], dtype='uint8')
-        m0 = ~m1
-        self.mask = np.rollaxis(np.vstack((m0, m1)), 1)
+        if delays.ndim == 3: delays = np.expand_dims(delays, axis=0)
+        self.delays = np.zeros((len(delays), self.c_locs_len, 2, 2), dtype=delays.dtype)
+        self.delays[:, :delays.shape[1]] = delays
+
+        self.c = np.zeros((self.c_len, sims), dtype=np.float32) + TMAX
+        self.s = np.zeros((11, self.s_len, sims), dtype=np.float32)
+        """Information about the logic values and transitions around the sequential elements (flip-flops) and ports.
+
+        The first 3 values are read by :py:func:`s_to_c`.
+        The remaining values are written by :py:func:`c_to_s`.
+
+        The elements are as follows:
+
+        * ``s[0]`` (P)PI initial value
+        * ``s[1]`` (P)PI transition time
+        * ``s[2]`` (P)PI final value
+        * ``s[3]`` (P)PO initial value
+        * ``s[4]`` (P)PO earliest arrival time (EAT): The time at which the output transitioned from its initial value.
+        * ``s[5]`` (P)PO latest stabilization time (LST): The time at which the output settled to its final value.
+        * ``s[6]`` (P)PO final value
+        * ``s[7]`` (P)PO capture value: probability of capturing a 1 at a given capture time
+        * ``s[8]`` (P)PO sampled capture value: decided by random sampling according to a given seed.
+        * ``s[9]`` (P)PO sampled capture slack: (capture time - LST) - decided by random sampling according to a given seed.
+        * ``s[10]`` Overflow indicator: If non-zero, some signals in the input cone of this output had more
+          transitions than specified in ``c_caps``. Some transitions have been discarded, the
+          final values in the waveforms are still valid.
+        """
+
+        self.abuf_len = self.ops[:,6].max() + 1
+        self.abuf = np.zeros((self.abuf_len, sims), dtype=np.int32) if self.abuf_len > 0 else np.zeros((1, 1), dtype=np.int32)
+
+        self.simctl_int = np.zeros((2, sims), dtype=np.int32)
+        """Integer array for per-simulation delay configuration.
+
+        * ``simctl_int[0]`` delay dataset or random seed for picking a delay. By default, each sim has a unique seed.
+        * ``simctl_int[1]`` Method for picking a delay:
+            * 0: seed parameter of :py:func:`c_prop` directly specifies dataset for all simulations
+            * 1: ``simctl_int[0]`` specifies dataset on a per-simulation basis
+            * 2 (default): ``simctl_int[0]`` and seed parameter of :py:func:`c_prop` together are a random seed for picking a delay dataset.
+        """
+        self.simctl_int[0] = range(sims)  # unique seed for each sim by default, zero this to pick same delays for all sims.
+        self.simctl_int[1] = 2  # random picking by default.
+
+        self.nbytes = sum([a.nbytes for a in (self.c, self.s, self.c_locs, self.c_caps, self.ops, self.simctl_int)])
 
     def __repr__(self):
-        total_mem = self.state.nbytes + self.sat.nbytes + self.ops.nbytes + self.cdata.nbytes
-        return f'<WaveSim {self.circuit.name} sims={self.sims} ops={len(self.ops)} ' + \
-               f'levels={len(self.level_starts)} mem={hr_bytes(total_mem)}>'
-
-    def get_line_delay(self, line, polarity):
-        """Returns the current delay of the given ``line`` and ``polarity`` in the simulation model."""
-        return self.timing[line, 0, polarity]
-
-    def set_line_delay(self, line, polarity, delay):
-        """Sets a new ``delay`` for the given ``line`` and ``polarity`` in the simulation model."""
-        self.timing[line, 0, polarity] = delay
-
-    def assign(self, vectors, time=0.0, offset=0):
-        """Assigns new values to the primary inputs and state-elements.
-
-        :param vectors: The values to assign preferably in 8-valued logic. The values are converted to
-            appropriate waveforms with or one transition (``RISE``, ``FALL``) no transitions
-            (``ZERO``, ``ONE``, and others).
-        :type vectors: :py:class:`~kyupy.logic.BPArray`
-        :param time: The transition time of the generated waveforms.
-        :param offset: The offset into the vector set. The vector assigned to the first simulator is
-            ``vectors[offset]``.
+        dev = 'GPU' if hasattr(self.c, 'copy_to_host') else 'CPU'
+        return f'{{name: "{self.circuit.name}", device: "{dev}", sims: {self.sims}, ops: {len(self.ops)}, ' + \
+               f'levels: {len(self.level_starts)}, nbytes: {self.nbytes}}}'
+
+    def s_to_c(self):
+        """Transfers values of sequential elements and primary inputs to the combinational portion.
+
+        Waveforms are generated on the input lines of the combinational circuit based on the data in :py:attr:`s`.
         """
-        nvectors = min(len(vectors) - offset, self.sims)
-        for i in range(len(self.interface)):
-            ppi_loc = self.sat[self.ppi_offset + i, 0]
-            if ppi_loc < 0: continue
-            for p in range(nvectors):
-                vector = p + offset
-                a = vectors.data[i, :, vector // 8]
-                m = self.mask[vector % 8]
-                toggle = 0
-                if len(a) <= 2:
-                    if a[0] & m[1]:
-                        self.state[ppi_loc, p] = TMIN
-                        toggle += 1
-                else:
-                    if a[1] & m[1]:
-                        self.state[ppi_loc, p] = TMIN
-                        toggle += 1
-                    if (a[2] & m[1]) and ((a[0] & m[1]) != (a[1] & m[1])):
-                        self.state[ppi_loc + toggle, p] = time
-                        toggle += 1
-                self.state[ppi_loc + toggle, p] = TMAX
-
-    def propagate(self, sims=None, sd=0.0, seed=1):
+        sins = self.s[:, self.pippi_s_locs]
+        cond = (sins[2] != 0) + 2*(sins[0] != 0)  # choices order: 0 R F 1
+        self.c[self.pippi_c_locs] = np.choose(cond, [TMAX, sins[1], TMIN, TMIN])
+        self.c[self.pippi_c_locs+1] = np.choose(cond, [TMAX, TMAX, sins[1], TMAX])
+        self.c[self.pippi_c_locs+2] = TMAX
+
+    def c_prop(self, sims=None, seed=1):
         """Propagates all waveforms from the (pseudo) primary inputs to the (pseudo) primary outputs.
 
         :param sims: Number of parallel simulations to execute. If None, all available simulations are performed.
-        :param sd: Standard deviation for injection of random delay variation. Active, if value is positive.
-        :param seed: Random seed for delay variations.
+        :param seed: Seed for picking delays. See also: :py:attr:`simctl_int`.
         """
         sims = min(sims or self.sims, self.sims)
         for op_start, op_stop in zip(self.level_starts, self.level_stops):
-            self.overflows += level_eval(self.ops, op_start, op_stop, self.state, self.sat, 0, sims,
-                                         self.timing, self.sdata, sd, seed)
-        self.lst_eat_valid = False
-
-    def wave(self, line, vector):
-        # """Returns the desired waveform from the simulation state. Only valid, if simulator was
-        # instantiated with ``keep_waveforms=True``."""
-        if line < 0:
-            return [TMAX]
-        mem, wcap, _ = self.sat[line]
-        if mem < 0:
-            return [TMAX]
-        return self.state[mem:mem + wcap, vector]
-
-    def wave_ppi(self, i, vector):
-        return self.wave(self.ppi_offset + i, vector)
+            level_eval_cpu(self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, self.abuf, 0, sims, self.delays, self.simctl_int, seed)
 
-    def wave_ppo(self, o, vector):
-        return self.wave(self.ppo_offset + o, vector)
+    def c_to_s(self, time=TMAX, sd=0.0, seed=1):
+        """Simulates a capture operation at all sequential elements and primary outputs.
 
-    def capture(self, time=TMAX, sd=0.0, seed=1, cdata=None, offset=0):
-        """Simulates a capture operation at all state-elements and primary outputs.
-
-        The capture analyzes the propagated waveforms at and around the given capture time and returns
-        various results for each capture operation.
+        Propagated waveforms at the outputs of the combinational circuit at and around the given capture time are analyzed and
+        the results are stored in :py:attr:`s`.
 
         :param time: The desired capture time. By default, a capture of the settled value is performed.
         :param sd: A standard deviation for uncertainty in the actual capture time.
         :param seed: The random seed for a capture with uncertainty.
-        :param cdata: An array to copy capture data into (optional). See the return value for details.
-        :param offset: An offset into the supplied capture data array.
-        :return: The capture data as numpy array.
-
-            The 3-dimensional capture data array contains for each interface node (axis 0),
-            and each test (axis 1), seven values:
-
-            0. Probability of capturing a 1 at the given capture time (same as next value, if no
-               standard deviation given).
-            1. A capture value decided by random sampling according to above probability and given seed.
-            2. The final value (assume a very late capture time).
-            3. True, if there was a premature capture (capture error), i.e. final value is different
-               from captured value.
-            4. Earliest arrival time. The time at which the output transitioned from its initial value.
-            5. Latest stabilization time. The time at which the output transitioned to its final value.
-            6. Overflow indicator. If non-zero, some signals in the input cone of this output had more
-               transitions than specified in ``wavecaps``. Some transitions have been discarded, the
-               final values in the waveforms are still valid.
         """
-        for i, node in enumerate(self.interface):
-            if len(node.ins) == 0: continue
-            for p in range(self.sims):
-                self.cdata[i, p] = self.capture_wave(self.ppo_offset + i, p, time, sd, seed)
-        if cdata is not None:
-            assert offset < cdata.shape[1]
-            cap_dim = min(cdata.shape[1] - offset, self.sims)
-            cdata[:, offset:cap_dim + offset] = self.cdata[:, 0:cap_dim]
-        self.lst_eat_valid = True
-        return self.cdata
-
-    def reassign(self, time=0.0):
-        """Re-assigns the last capture to the appropriate pseudo-primary inputs. Generates a new set of
-        waveforms at the PPIs that start with the previous final value of that PPI, and transitions at the
-        given time to the value captured in a previous simulation. :py:func:`~WaveSim.capture` must be called
-        prior to this function. The final value of each PPI is taken from the randomly sampled concrete logic
-        values in the capture data.
+        for s_loc, c_loc, c_len in zip(self.poppo_s_locs, self.c_locs[self.ppo_offset+self.poppo_s_locs], self.c_caps[self.ppo_offset+self.poppo_s_locs]):
+            for vector in range(self.sims):
+                self.s[3:, s_loc, vector] = wave_capture_cpu(self.c, c_loc, c_len, vector, time=time, sd=sd, seed=seed)
+
+    def s_ppo_to_ppi(self, time=0.0):
+        """Re-assigns the last sampled capture of the PPOs to the appropriate pseudo-primary inputs (PPIs).
+        Each PPI transition is constructed from the final value of the previous assignment, the
+        given time, and the sampled captured value of its PPO. Reads and modifies :py:attr:`s`.
 
         :param time: The transition time at the inputs (usually 0.0).
         """
-        for i in range(len(self.interface)):
-            ppi_loc = self.sat[self.ppi_offset + i, 0]
-            ppo_loc = self.sat[self.ppo_offset + i, 0]
-            if ppi_loc < 0 or ppo_loc < 0: continue
-            for sidx in range(self.sims):
-                ival = self.val(self.ppi_offset + i, sidx, TMAX) > 0.5
-                oval = self.cdata[i, sidx, 1] > 0.5
-                toggle = 0
-                if ival:
-                    self.state[ppi_loc, sidx] = TMIN
-                    toggle += 1
-                if ival != oval:
-                    self.state[ppi_loc + toggle, sidx] = time
-                    toggle += 1
-                self.state[ppi_loc + toggle, sidx] = TMAX
-
-    def eat(self, line, vector):
-        eat = TMAX
-        for t in self.wave(line, vector):
-            if t >= TMAX: break
-            if t <= TMIN: continue
-            eat = min(eat, t)
-        return eat
-
-    def lst(self, line, vector):
-        lst = TMIN
-        for t in self.wave(line, vector):
-            if t >= TMAX: break
-            if t <= TMIN: continue
-            lst = max(lst, t)
-        return lst
-
-    def lst_ppo(self, o, vector):
-        if not self.lst_eat_valid:
-            self.capture()
-        return self.cdata[o, vector, 5]
-
-    def toggles(self, line, vector):
-        tog = 0
-        for t in self.wave(line, vector):
-            if t >= TMAX: break
-            if t <= TMIN: continue
-            tog += 1
-        return tog
-
-    def _vals(self, idx, vector, times, sd=0.0):
-        s_sqrt2 = sd * math.sqrt(2)
-        m = 0.5
-        accs = [0.0] * len(times)
-        values = [0] * len(times)
-        for t in self.wave(idx, vector):
-            if t >= TMAX: break
-            for idx, time in enumerate(times):
-                if t < time:
-                    values[idx] = values[idx] ^ 1
-            m = -m
-            if t <= TMIN: continue
-            if s_sqrt2 > 0:
-                for idx, time in enumerate(times):
-                    accs[idx] += m * (1 + math.erf((t - time) / s_sqrt2))
-        if (m < 0) and (s_sqrt2 > 0):
-            for idx, time in enumerate(times):
-                accs[idx] += 1
-        if s_sqrt2 == 0:
-            return values
-        return accs
-
-    def vals(self, line, vector, times, sd=0):
-        return self._vals(line, vector, times, sd)
-
-    def val(self, line, vector, time=TMAX, sd=0):
-        return self.capture_wave(line, vector, time, sd)[0]
-
-    def vals_ppo(self, o, vector, times, sd=0):
-        return self._vals(self.ppo_offset + o, vector, times, sd)
-
-    def val_ppo(self, o, vector, time=TMAX, sd=0):
-        if not self.lst_eat_valid:
-            self.capture(time, sd)
-        return self.cdata[o, vector, 0]
-
-    def capture_wave(self, line, vector, time=TMAX, sd=0.0, seed=1):
-        s_sqrt2 = sd * math.sqrt(2)
-        m = 0.5
-        acc = 0.0
-        eat = TMAX
-        lst = TMIN
-        tog = 0
-        ovl = 0
-        val = int(0)
-        final = int(0)
-        for t in self.wave(line, vector):
-            if t >= TMAX:
-                if t == TMAX_OVL:
-                    ovl = 1
-                break
-            m = -m
-            final ^= 1
-            if t < time:
-                val ^= 1
-            if t <= TMIN: continue
-            if s_sqrt2 > 0:
-                acc += m * (1 + math.erf((t - time) / s_sqrt2))
-            eat = min(eat, t)
-            lst = max(lst, t)
-            tog += 1
-        if s_sqrt2 > 0:
-            if m < 0:
-                acc += 1
-            if acc >= 0.99:
-                val = 1
-            elif acc > 0.01:
-                seed = (seed << 4) + (vector << 20) + (line-self.ppo_offset << 1)
-                seed = int(0xDEECE66D) * seed + 0xB
-                seed = int(0xDEECE66D) * seed + 0xB
-                rnd = float((seed >> 8) & 0xffffff) / float(1 << 24)
-                val = rnd < acc
-            else:
-                val = 0
-        else:
-            acc = val
+        self.s[0, self.ppio_s_locs] = self.s[2, self.ppio_s_locs]
+        self.s[1, self.ppio_s_locs] = time
+        self.s[2, self.ppio_s_locs] = self.s[8, self.ppio_s_locs]
 
-        return acc, val, final, (val != final), eat, lst, ovl
-
-
-@numba.njit
-def level_eval(ops, op_start, op_stop, state, sat, st_start, st_stop, line_times, sdata, sd, seed):
-    overflows = 0
-    for op_idx in range(op_start, op_stop):
-        op = ops[op_idx]
-        for st_idx in range(st_start, st_stop):
-            overflows += wave_eval(op, state, sat, st_idx, line_times, sdata[st_idx], sd, seed)
-    return overflows
 
-
-@numba.njit
-def rand_gauss(seed, sd):
-    clamp = 0.5
-    if sd <= 0.0:
-        return 1.0
-    while True:
-        x = -6.0
-        for _ in range(12):
-            seed = int(0xDEECE66D) * seed + 0xB
-            x += float((seed >> 8) & 0xffffff) / float(1 << 24)
-        x *= sd
-        if abs(x) <= clamp:
-            break
-    return x + 1.0
-
-
-@numba.njit
-def wave_eval(op, state, sat, st_idx, line_times, sdata, sd=0.0, seed=0):
-    lut, z_idx, a_idx, b_idx = op
+def _wave_eval(op, cbuf, c_locs, c_caps, sim, delays, simctl_int, seed=0):
     overflows = int(0)
 
-    _seed = (seed << 4) + (z_idx << 20) + (st_idx << 1)
+    lut = op[0]
+    z_idx = op[1]
+    a_idx = op[2]
+    b_idx = op[3]
+    c_idx = op[4]
+    d_idx = op[5]
+
+    if len(delays) > 1:
+        if simctl_int[1] == 0:
+            delays = delays[seed]
+        elif simctl_int[1] == 1:
+            delays = delays[simctl_int[0]]
+        else:
+            _rnd = (seed << 4) + (z_idx << 20) + simctl_int[0]
+            for _ in range(4):
+                _rnd = int(0xDEECE66D) * _rnd + 0xB
+            delays = delays[_rnd % len(delays)]
+    else:
+        delays = delays[0]
 
-    a_mem = sat[a_idx, 0]
-    b_mem = sat[b_idx, 0]
-    z_mem, z_cap, _ = sat[z_idx]
+    a_mem = c_locs[a_idx]
+    b_mem = c_locs[b_idx]
+    c_mem = c_locs[c_idx]
+    d_mem = c_locs[d_idx]
+    z_mem = c_locs[z_idx]
+    z_cap = c_caps[z_idx]
 
     a_cur = int(0)
     b_cur = int(0)
+    c_cur = int(0)
+    d_cur = int(0)
     z_cur = lut & 1
     if z_cur == 1:
-        state[z_mem, st_idx] = TMIN
+        cbuf[z_mem, sim] = TMIN
+
+    z_val = z_cur
+
+    a = cbuf[a_mem + a_cur, sim] + delays[a_idx, 0, z_val]
+    b = cbuf[b_mem + b_cur, sim] + delays[b_idx, 0, z_val]
+    c = cbuf[c_mem + c_cur, sim] + delays[c_idx, 0, z_val]
+    d = cbuf[d_mem + d_cur, sim] + delays[d_idx, 0, z_val]
 
-    a = state[a_mem, st_idx] + line_times[a_idx, 0, z_cur] * rand_gauss(_seed ^ a_mem ^ z_cur, sd) * sdata[0]
-    if int(sdata[1]) == a_idx: a += sdata[2+z_cur]
-    b = state[b_mem, st_idx] + line_times[b_idx, 0, z_cur] * rand_gauss(_seed ^ b_mem ^ z_cur, sd) * sdata[0]
-    if int(sdata[1]) == b_idx: b += sdata[2+z_cur]
-    
     previous_t = TMIN
 
-    current_t = min(a, b)
+    current_t = min(a, b, c, d)
     inputs = int(0)
 
     while current_t < TMAX:
-        z_val = z_cur & 1
-        if b < a:
+        if a == current_t:
+            a_cur += 1
+            inputs ^= 1
+            thresh = delays[a_idx, a_cur & 1, z_val]
+            a = cbuf[a_mem + a_cur, sim] + delays[a_idx, a_cur & 1, z_val]
+            next_t = cbuf[a_mem + a_cur, sim] + delays[a_idx, (a_cur & 1) ^ 1, z_val ^ 1]
+        elif b == current_t:
             b_cur += 1
-            b = state[b_mem + b_cur, st_idx]
-            b += line_times[b_idx, 0, z_val ^ 1] * rand_gauss(_seed ^ b_mem ^ z_val ^ 1, sd) * sdata[0]
-            thresh = line_times[b_idx, 1, z_val] * rand_gauss(_seed ^ b_mem ^ z_val, sd) * sdata[0]
-            if int(sdata[1]) == b_idx:
-                b += sdata[2+(z_val^1)]
-                thresh += sdata[2+z_val]
             inputs ^= 2
-            next_t = b
+            thresh = delays[b_idx, b_cur & 1, z_val]
+            b = cbuf[b_mem + b_cur, sim] + delays[b_idx, b_cur & 1, z_val]
+            next_t = cbuf[b_mem + b_cur, sim] + delays[b_idx, (b_cur & 1) ^ 1, z_val ^ 1]
+        elif c == current_t:
+            c_cur += 1
+            inputs ^= 4
+            thresh = delays[c_idx, c_cur & 1, z_val]
+            c = cbuf[c_mem + c_cur, sim] + delays[c_idx, c_cur & 1, z_val]
+            next_t = cbuf[c_mem + c_cur, sim] + delays[c_idx, (c_cur & 1) ^ 1, z_val ^ 1]
         else:
-            a_cur += 1
-            a = state[a_mem + a_cur, st_idx]
-            a += line_times[a_idx, 0, z_val ^ 1] * rand_gauss(_seed ^ a_mem ^ z_val ^ 1, sd) * sdata[0]
-            thresh = line_times[a_idx, 1, z_val] * rand_gauss(_seed ^ a_mem ^ z_val, sd) * sdata[0]
-            if int(sdata[1]) == a_idx:
-                a += sdata[2+(z_val^1)]
-                thresh += sdata[2+z_val]
-            inputs ^= 1
-            next_t = a
+            d_cur += 1
+            inputs ^= 8
+            thresh = delays[d_idx, d_cur & 1, z_val]
+            d = cbuf[d_mem + d_cur, sim] + delays[d_idx, d_cur & 1, z_val]
+            next_t = cbuf[d_mem + d_cur, sim] + delays[d_idx, (d_cur & 1) ^ 1, z_val ^ 1]
 
         if (z_cur & 1) != ((lut >> inputs) & 1):
-            # we generate a toggle in z_mem, if:
-            #   ( it is the first toggle in z_mem OR
-            #   following toggle is earlier OR
-            #   pulse is wide enough ) AND enough space in z_mem.
-            if z_cur == 0 or next_t < current_t or (current_t - previous_t) > thresh:
-                if z_cur < (z_cap - 1):
-                    state[z_mem + z_cur, st_idx] = current_t
+            # we generate an edge in z_mem, if ...
+            if (z_cur == 0                            # it is the first edge in z_mem ...
+                or next_t < current_t                 # -OR- the next edge on SAME input is EARLIER (need current edge to filter BOTH in next iteration) ...
+                or (current_t - previous_t) > thresh  # -OR- the generated hazard is wider than pulse threshold.
+                ):
+                if z_cur < (z_cap - 1):  # enough space in z_mem?
+                    cbuf[z_mem + z_cur, sim] = current_t
                     previous_t = current_t
                     z_cur += 1
                 else:
                     overflows += 1
-                    previous_t = state[z_mem + z_cur - 1, st_idx]
+                    previous_t = cbuf[z_mem + z_cur - 1, sim]
                     z_cur -= 1
             else:
                 z_cur -= 1
-                if z_cur > 0:
-                    previous_t = state[z_mem + z_cur - 1, st_idx]
-                else:
-                    previous_t = TMIN
-        current_t = min(a, b)
+                previous_t = cbuf[z_mem + z_cur - 1, sim] if z_cur > 0 else TMIN
 
-    if overflows > 0:
-        state[z_mem + z_cur, st_idx] = TMAX_OVL
-    else:
-        state[z_mem + z_cur, st_idx] = a if a > b else b  # propagate overflow flags by storing biggest TMAX from input
+            # output value of cell changed. update all delayed inputs.
+            z_val = z_val ^ 1
+            a = cbuf[a_mem + a_cur, sim] + delays[a_idx, a_cur & 1, z_val]
+            b = cbuf[b_mem + b_cur, sim] + delays[b_idx, b_cur & 1, z_val]
+            c = cbuf[c_mem + c_cur, sim] + delays[c_idx, c_cur & 1, z_val]
+            d = cbuf[d_mem + d_cur, sim] + delays[d_idx, d_cur & 1, z_val]
+
+        current_t = min(a, b, c, d)
+
+    # generate or propagate overflow flag
+    cbuf[z_mem + z_cur, sim] = TMAX_OVL if overflows > 0 else max(a, b, c, d)
 
-    return overflows
+    nrise = max(0, (z_cur+1) // 2 - (cbuf[z_mem, sim] == TMIN))
+    nfall = z_cur // 2
 
+    return nrise, nfall
+
+
+wave_eval_cpu = numba.njit(_wave_eval)
+
+
+@numba.njit
+def level_eval_cpu(ops, op_start, op_stop, c, c_locs, c_caps, abuf, sim_start, sim_stop, delays, simctl_int, seed):
+    for op_idx in range(op_start, op_stop):
+        op = ops[op_idx]
+        for sim in range(sim_start, sim_stop):
+            nrise, nfall = wave_eval_cpu(op, c, c_locs, c_caps, sim, delays, simctl_int[:, sim], seed)
+            a_loc = op[6]
+            a_wr = op[7]
+            a_wf = op[8]
+            if a_loc >= 0:
+                abuf[a_loc, sim] += nrise*a_wr + nfall*a_wf
+
+
+@numba.njit
+def wave_capture_cpu(c, c_loc, c_len, vector, time=TMAX, sd=0.0, seed=1):
+    s_sqrt2 = sd * math.sqrt(2)
+    m = 0.5
+    acc = 0.0
+    eat = TMAX
+    lst = TMIN
+    tog = 0
+    ovl = 0
+    val = int(0)
+    final = int(0)
+    w = c[c_loc:c_loc+c_len, vector]
+    for t in w:
+        if t >= TMAX:
+            if t == TMAX_OVL:
+                ovl = 1
+            break
+        m = -m
+        final ^= 1
+        if t < time:
+            val ^= 1
+        if t <= TMIN: continue
+        if s_sqrt2 > 0:
+            acc += m * (1 + math.erf((t - time) / s_sqrt2))
+        eat = min(eat, t)
+        lst = max(lst, t)
+        tog += 1
+    if s_sqrt2 > 0:
+        if m < 0:
+            acc += 1
+        if acc >= 0.99:
+            val = 1
+        elif acc > 0.01:
+            seed = (seed << 4) + (vector << 20) + c_loc
+            seed = int(0xDEECE66D) * seed + 0xB
+            seed = int(0xDEECE66D) * seed + 0xB
+            rnd = float((seed >> 8) & 0xffffff) / float(1 << 24)
+            val = rnd < acc
+        else:
+            val = 0
+    else:
+        acc = val
+
+    return (w[0] <= TMIN), eat, lst, final, acc, val, 0, ovl
 
 
 class WaveSimCuda(WaveSim):
     """A GPU-accelerated waveform-based combinational logic timing simulator.
 
-    The API is the same as for :py:class:`WaveSim`.
+    The API is identical to :py:class:`WaveSim`. See there for complete documentation.
+
     All internal memories are mirrored into GPU memory upon construction.
     Some operations like access to single waveforms can involve large communication overheads.
     """
-    def __init__(self, circuit, timing, sims=8, wavecaps=16, strip_forks=False, keep_waveforms=True):
-        super().__init__(circuit, timing, sims, wavecaps, strip_forks, keep_waveforms)
-
-        self.tdata = np.zeros((len(self.interface), 3, (sims - 1) // 8 + 1), dtype='uint8')
-
-        self.d_state = cuda.to_device(self.state)
-        self.d_sat = cuda.to_device(self.sat)
-        self.d_ops = cuda.to_device(self.ops)
-        self.d_timing = cuda.to_device(self.timing)
-        self.d_tdata = cuda.to_device(self.tdata)
-        self.d_cdata = cuda.to_device(self.cdata)
-        self.d_sdata = cuda.to_device(self.sdata)
+    def __init__(self, circuit, delays, sims=8, c_caps=16, a_ctrl=None, c_reuse=False, strip_forks=False):
+        super().__init__(circuit, delays, sims, c_caps, a_ctrl=a_ctrl, c_reuse=c_reuse, strip_forks=strip_forks)
+
+        self.c = cuda.to_device(self.c)
+        self.s = cuda.to_device(self.s)
+        self.ops = cuda.to_device(self.ops)
+        self.c_locs = cuda.to_device(self.c_locs)
+        self.c_caps = cuda.to_device(self.c_caps)
+        self.delays = cuda.to_device(self.delays)
+        self.simctl_int = cuda.to_device(self.simctl_int)
+        self.abuf = cuda.to_device(self.abuf)
 
         self._block_dim = (32, 16)
 
-    def __repr__(self):
-        total_mem = self.state.nbytes + self.sat.nbytes + self.ops.nbytes + self.timing.nbytes + \
-                    self.tdata.nbytes + self.cdata.nbytes
-        return f'<WaveSimCuda {self.circuit.name} sims={self.sims} ops={len(self.ops)} ' + \
-               f'levels={len(self.level_starts)} mem={hr_bytes(total_mem)}>'
-
-    def get_line_delay(self, line, polarity):
-        return self.d_timing[line, 0, polarity]
-
-    def set_line_delay(self, line, polarity, delay):
-        self.d_timing[line, 0, polarity] = delay
-                     
-    def sdata_to_device(self):
-        cuda.to_device(self.sdata, to=self.d_sdata)
-
-    def assign(self, vectors, time=0.0, offset=0):
-        assert (offset % 8) == 0
-        byte_offset = offset // 8
-        assert byte_offset < vectors.data.shape[-1]
-        pdim = min(vectors.data.shape[-1] - byte_offset, self.tdata.shape[-1])
-
-        self.tdata[..., 0:pdim] = vectors.data[..., byte_offset:pdim + byte_offset]
-        if vectors.m == 2:
-            self.tdata[:, 2, 0:pdim] = 0
-        cuda.to_device(self.tdata, to=self.d_tdata)
-
-        grid_dim = self._grid_dim(self.sims, len(self.interface))
-        assign_kernel[grid_dim, self._block_dim](self.d_state, self.d_sat, self.ppi_offset,
-                                                 len(self.interface), self.d_tdata, time)
-
-    def _grid_dim(self, x, y):
-        gx = math.ceil(x / self._block_dim[0])
-        gy = math.ceil(y / self._block_dim[1])
-        return gx, gy
-
-    def propagate(self, sims=None, sd=0.0, seed=1):
+    def __getstate__(self):
+        state = self.__dict__.copy()
+        state['c'] = np.array(self.c)
+        state['s'] = np.array(self.s)
+        state['ops'] = np.array(self.ops)
+        state['c_locs'] = np.array(self.c_locs)
+        state['c_caps'] = np.array(self.c_caps)
+        state['delays'] = np.array(self.delays)
+        state['simctl_int'] = np.array(self.simctl_int)
+        state['abuf'] = np.array(self.abuf)
+        return state
+
+    def __setstate__(self, state):
+        self.__dict__.update(state)
+        self.c = cuda.to_device(self.c)
+        self.s = cuda.to_device(self.s)
+        self.ops = cuda.to_device(self.ops)
+        self.c_locs = cuda.to_device(self.c_locs)
+        self.c_caps = cuda.to_device(self.c_caps)
+        self.delays = cuda.to_device(self.delays)
+        self.simctl_int = cuda.to_device(self.simctl_int)
+        self.abuf = cuda.to_device(self.abuf)
+
+    def s_to_c(self):
+        grid_dim = self._grid_dim(self.sims, self.s_len)
+        wave_assign_gpu[grid_dim, self._block_dim](self.c, self.s, self.c_locs, self.ppi_offset)
+
+    def _grid_dim(self, x, y): return cdiv(x, self._block_dim[0]), cdiv(y, self._block_dim[1])
+
+    def c_prop(self, sims=None, seed=1):
         sims = min(sims or self.sims, self.sims)
         for op_start, op_stop in zip(self.level_starts, self.level_stops):
             grid_dim = self._grid_dim(sims, op_stop - op_start)
-            wave_kernel[grid_dim, self._block_dim](self.d_ops, op_start, op_stop, self.d_state, self.sat, int(0),
-                                                   sims, self.d_timing, self.d_sdata, sd, seed)
-        cuda.synchronize()
-        self.lst_eat_valid = False
-
-    def wave(self, line, vector):
-        if line < 0:
-            return [TMAX]
-        mem, wcap, _ = self.sat[line]
-        if mem < 0:
-            return [TMAX]
-        return self.d_state[mem:mem + wcap, vector]
-
-    def capture(self, time=TMAX, sd=0, seed=1, cdata=None, offset=0):
-        grid_dim = self._grid_dim(self.sims, len(self.interface))
-        capture_kernel[grid_dim, self._block_dim](self.d_state, self.d_sat, self.ppo_offset,
-                                                  self.d_cdata, time, sd * math.sqrt(2), seed)
-        self.cdata[...] = self.d_cdata
-        if cdata is not None:
-            assert offset < cdata.shape[1]
-            cap_dim = min(cdata.shape[1] - offset, self.sims)
-            cdata[:, offset:cap_dim + offset] = self.cdata[:, 0:cap_dim]
-        self.lst_eat_valid = True
-        return self.cdata
-
-    def reassign(self, time=0.0):
-        grid_dim = self._grid_dim(self.sims, len(self.interface))
-        reassign_kernel[grid_dim, self._block_dim](self.d_state, self.d_sat, self.ppi_offset, self.ppo_offset,
-                                                   self.d_cdata, time)
+            wave_eval_gpu[grid_dim, self._block_dim](self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, self.abuf, int(0),
+                sims, self.delays, self.simctl_int, seed)
         cuda.synchronize()
 
-    def wavecaps(self):
-        gx = math.ceil(len(self.circuit.lines) / 512)
-        wavecaps_kernel[gx, 512](self.d_state, self.d_sat, self.sims)
-        self.sat[...] = self.d_sat
-        return self.sat[..., 2]
+    def c_to_s(self, time=TMAX, sd=0.0, seed=1):
+        grid_dim = self._grid_dim(self.sims, self.s_len)
+        wave_capture_gpu[grid_dim, self._block_dim](self.c, self.s, self.c_locs, self.c_caps, self.ppo_offset,
+            time, sd * math.sqrt(2), seed)
 
+    def s_ppo_to_ppi(self, time=0.0):
+        grid_dim = self._grid_dim(self.sims, self.s_len)
+        ppo_to_ppi_gpu[grid_dim, self._block_dim](self.s, self.c_locs, time, self.ppi_offset, self.ppo_offset)
 
-@cuda.jit()
-def wavecaps_kernel(state, sat, sims):
-    idx = cuda.grid(1)
-    if idx >= len(sat): return
 
-    lidx, lcap, _ = sat[idx]
-    if lidx < 0: return
+@cuda.jit()
+def wave_assign_gpu(c, s, c_locs, ppi_offset):
+    x, y = cuda.grid(2)
+    if y >= s.shape[1]: return
+    c_loc = c_locs[ppi_offset + y]
+    if c_loc < 0: return
+    if x >= c.shape[-1]: return
+    value = int(s[2, y, x] >= 0.5) | (2*int(s[0, y, x] >= 0.5))
+    ttime = s[1, y, x]
+    if value == 0:
+        c[c_loc, x] = TMAX
+        c[c_loc+1, x] = TMAX
+    elif value == 1:
+        c[c_loc, x] = ttime
+        c[c_loc+1, x] = TMAX
+    elif value == 2:
+        c[c_loc, x] = TMIN
+        c[c_loc+1, x] = ttime
+    else:
+        c[c_loc, x] = TMIN
+        c[c_loc+1, x] = TMAX
+    c[c_loc+2, x] = TMAX
 
-    wcap = 0
-    for sidx in range(sims):
-        for tidx in range(lcap):
-            t = state[lidx + tidx, sidx]
-            if tidx > wcap:
-                wcap = tidx
-            if t >= TMAX: break
 
-    sat[idx, 2] = wcap + 1
+_wave_eval_gpu = cuda.jit(_wave_eval, device=True)
 
 
 @cuda.jit()
-def reassign_kernel(state, sat, ppi_offset, ppo_offset, cdata, ppi_time):
-    vector, y = cuda.grid(2)
-    if vector >= state.shape[-1]: return
-    if ppo_offset + y >= len(sat): return
-
-    ppo, _, _ = sat[ppo_offset + y]
-    ppi, ppi_cap, _ = sat[ppi_offset + y]
-    if ppo < 0: return
-    if ppi < 0: return
-
-    ppo_val = int(cdata[y, vector, 1])
-    ppi_val = int(0)
-    for tidx in range(ppi_cap):
-        t = state[ppi + tidx, vector]
-        if t >= TMAX: break
-        ppi_val ^= 1
-
-    # make new waveform at PPI
-    toggle = 0
-    if ppi_val:
-        state[ppi + toggle, vector] = TMIN
-        toggle += 1
-    if ppi_val != ppo_val:
-        state[ppi + toggle, vector] = ppi_time
-        toggle += 1
-    state[ppi + toggle, vector] = TMAX
+def wave_eval_gpu(ops, op_start, op_stop, cbuf, c_locs, c_caps, abuf, sim_start, sim_stop, delays, simctl_int, seed):
+    x, y = cuda.grid(2)
+    sim = sim_start + x
+    op_idx = op_start + y
+    if sim >= sim_stop: return
+    if op_idx >= op_stop: return
+
+    op = ops[op_idx]
+    a_loc = op[6]
+    a_wr = op[7]
+    a_wf = op[8]
+
+    nrise, nfall = _wave_eval_gpu(op, cbuf, c_locs, c_caps, sim, delays, simctl_int[:, sim], seed)
+
+    # accumulate WSA into abuf
+    if a_loc >= 0:
+        cuda.atomic.add(abuf, (a_loc, sim), nrise*a_wr + nfall*a_wf)
 
 
 @cuda.jit()
-def capture_kernel(state, sat, ppo_offset, cdata, time, s_sqrt2, seed):
+def wave_capture_gpu(c, s, c_locs, c_caps, ppo_offset, time, s_sqrt2, seed):
     x, y = cuda.grid(2)
-    if ppo_offset + y >= len(sat): return
-    line, tdim, _ = sat[ppo_offset + y]
+    if ppo_offset + y >= len(c_locs): return
+    line = c_locs[ppo_offset + y]
+    tdim = c_caps[ppo_offset + y]
     if line < 0: return
-    if x >= state.shape[-1]: return
+    if x >= c.shape[-1]: return
     vector = x
     m = 0.5
     acc = 0.0
@@ -795,7 +460,7 @@ def capture_kernel(state, sat, ppo_offset, cdata, time, s_sqrt2, seed):
     val = int(0)
     final = int(0)
     for tidx in range(tdim):
-        t = state[line + tidx, vector]
+        t = c[line + tidx, vector]
         if t >= TMAX:
             if t == TMAX_OVL:
                 ovl = 1
@@ -826,136 +491,25 @@ def capture_kernel(state, sat, ppo_offset, cdata, time, s_sqrt2, seed):
     else:
         acc = val
 
-    cdata[y, vector, 0] = acc
-    cdata[y, vector, 1] = val
-    cdata[y, vector, 2] = final
-    cdata[y, vector, 3] = (val != final)
-    cdata[y, vector, 4] = eat
-    cdata[y, vector, 5] = lst
-    cdata[y, vector, 6] = ovl
-
-
-@cuda.jit()
-def assign_kernel(state, sat, ppi_offset, intf_len, tdata, time):
-    x, y = cuda.grid(2)
-    if y >= intf_len: return
-    line = sat[ppi_offset + y, 0]
-    if line < 0: return
-    sdim = state.shape[-1]
-    if x >= sdim: return
-    vector = x
-    a0 = tdata[y, 0, vector // 8]
-    a1 = tdata[y, 1, vector // 8]
-    a2 = tdata[y, 2, vector // 8]
-    m = np.uint8(1 << (7 - (vector % 8)))
-    toggle = 0
-    if a1 & m:
-        state[line + toggle, x] = TMIN
-        toggle += 1
-    if (a2 & m) and ((a0 & m) != (a1 & m)):
-        state[line + toggle, x] = time
-        toggle += 1
-    state[line + toggle, x] = TMAX
-
-
-@cuda.jit(device=True)
-def rand_gauss_dev(seed, sd):
-    clamp = 0.5
-    if sd <= 0.0:
-        return 1.0
-    while True:
-        x = -6.0
-        for _ in range(12):
-            seed = int(0xDEECE66D) * seed + 0xB
-            x += float((seed >> 8) & 0xffffff) / float(1 << 24)
-        x *= sd
-        if abs(x) <= clamp:
-            break
-    return x + 1.0
+    s[3, y, vector] = (c[line, vector] <= TMIN)
+    s[4, y, vector] = eat
+    s[5, y, vector] = lst
+    s[6, y, vector] = final
+    s[7, y, vector] = acc
+    s[8, y, vector] = val
+    s[9, y, vector] = 0  # TODO
+    s[10, y, vector] = ovl
 
 
 @cuda.jit()
-def wave_kernel(ops, op_start, op_stop, state, sat, st_start, st_stop, line_times, sdata, sd, seed):
+def ppo_to_ppi_gpu(s, c_locs, time, ppi_offset, ppo_offset):
     x, y = cuda.grid(2)
-    st_idx = st_start + x
-    op_idx = op_start + y
-    if st_idx >= st_stop: return
-    if op_idx >= op_stop: return
-    lut = ops[op_idx, 0]
-    z_idx = ops[op_idx, 1]
-    a_idx = ops[op_idx, 2]
-    b_idx = ops[op_idx, 3]
-    overflows = int(0)
-    sdata = sdata[st_idx]
-
-    _seed = (seed << 4) + (z_idx << 20) + (st_idx << 1)
+    if y >= s.shape[1]: return
+    if x >= s.shape[2]: return
 
-    a_mem = sat[a_idx, 0]
-    b_mem = sat[b_idx, 0]
-    z_mem, z_cap, _ = sat[z_idx]
-
-    a_cur = int(0)
-    b_cur = int(0)
-    z_cur = lut & 1
-    if z_cur == 1:
-        state[z_mem, st_idx] = TMIN
-
-    a = state[a_mem, st_idx] + line_times[a_idx, 0, z_cur] * rand_gauss_dev(_seed ^ a_mem ^ z_cur, sd) * sdata[0]
-    if int(sdata[1]) == a_idx: a += sdata[2+z_cur]
-    b = state[b_mem, st_idx] + line_times[b_idx, 0, z_cur] * rand_gauss_dev(_seed ^ b_mem ^ z_cur, sd) * sdata[0]
-    if int(sdata[1]) == b_idx: b += sdata[2+z_cur]
-    
-    previous_t = TMIN
-
-    current_t = min(a, b)
-    inputs = int(0)
+    if c_locs[ppi_offset + y] < 0: return
+    if c_locs[ppo_offset + y] < 0: return
 
-    while current_t < TMAX:
-        z_val = z_cur & 1
-        if b < a:
-            b_cur += 1
-            b = state[b_mem + b_cur, st_idx]
-            b += line_times[b_idx, 0, z_val ^ 1] * rand_gauss_dev(_seed ^ b_mem ^ z_val ^ 1, sd) * sdata[0]
-            thresh = line_times[b_idx, 1, z_val] * rand_gauss_dev(_seed ^ b_mem ^ z_val, sd) * sdata[0]
-            if int(sdata[1]) == b_idx:
-                b += sdata[2+(z_val^1)]
-                thresh += sdata[2+z_val]
-            inputs ^= 2
-            next_t = b
-        else:
-            a_cur += 1
-            a = state[a_mem + a_cur, st_idx]
-            a += line_times[a_idx, 0, z_val ^ 1] * rand_gauss_dev(_seed ^ a_mem ^ z_val ^ 1, sd) * sdata[0]
-            thresh = line_times[a_idx, 1, z_val] * rand_gauss_dev(_seed ^ a_mem ^ z_val, sd) * sdata[0]
-            if int(sdata[1]) == a_idx:
-                a += sdata[2+(z_val^1)]
-                thresh += sdata[2+z_val]
-            inputs ^= 1
-            next_t = a
-
-        if (z_cur & 1) != ((lut >> inputs) & 1):
-            # we generate a toggle in z_mem, if:
-            #   ( it is the first toggle in z_mem OR
-            #   following toggle is earlier OR
-            #   pulse is wide enough ) AND enough space in z_mem.
-            if z_cur == 0 or next_t < current_t or (current_t - previous_t) > thresh:
-                if z_cur < (z_cap - 1):
-                    state[z_mem + z_cur, st_idx] = current_t
-                    previous_t = current_t
-                    z_cur += 1
-                else:
-                    overflows += 1
-                    previous_t = state[z_mem + z_cur - 1, st_idx]
-                    z_cur -= 1
-            else:
-                z_cur -= 1
-                if z_cur > 0:
-                    previous_t = state[z_mem + z_cur - 1, st_idx]
-                else:
-                    previous_t = TMIN
-        current_t = min(a, b)
-
-    if overflows > 0:
-        state[z_mem + z_cur, st_idx] = TMAX_OVL
-    else:
-        state[z_mem + z_cur, st_idx] = a if a > b else b  # propagate overflow flags by storing biggest TMAX from input
+    s[0, y, x] = s[2, y, x]
+    s[1, y, x] = time
+    s[2, y, x] = s[8, y, x]
diff --git a/tests/b14.sdf.gz b/tests/b14.sdf.gz
deleted file mode 100644
index c8a2bef..0000000
Binary files a/tests/b14.sdf.gz and /dev/null differ
diff --git a/tests/b14.stuck.stil.gz b/tests/b14.stuck.stil.gz
deleted file mode 100644
index adcdfe2..0000000
Binary files a/tests/b14.stuck.stil.gz and /dev/null differ
diff --git a/tests/b14.transition.stil.gz b/tests/b14.transition.stil.gz
deleted file mode 100644
index 1b36e31..0000000
Binary files a/tests/b14.transition.stil.gz and /dev/null differ
diff --git a/tests/b14.v.gz b/tests/b14.v.gz
deleted file mode 100644
index c86b205..0000000
Binary files a/tests/b14.v.gz and /dev/null differ
diff --git a/tests/b15_2ig.sa_nf.stil.gz b/tests/b15_2ig.sa_nf.stil.gz
new file mode 100644
index 0000000..b0159f3
Binary files /dev/null and b/tests/b15_2ig.sa_nf.stil.gz differ
diff --git a/tests/b15_2ig.sdf.gz b/tests/b15_2ig.sdf.gz
new file mode 100644
index 0000000..52da6cf
Binary files /dev/null and b/tests/b15_2ig.sdf.gz differ
diff --git a/tests/b15_2ig.tf_nf.stil.gz b/tests/b15_2ig.tf_nf.stil.gz
new file mode 100644
index 0000000..bc3eced
Binary files /dev/null and b/tests/b15_2ig.tf_nf.stil.gz differ
diff --git a/tests/b15_2ig.v.gz b/tests/b15_2ig.v.gz
new file mode 100644
index 0000000..78d1a29
Binary files /dev/null and b/tests/b15_2ig.v.gz differ
diff --git a/tests/b15_4ig.sdf.gz b/tests/b15_4ig.sdf.gz
new file mode 100644
index 0000000..32db9bf
Binary files /dev/null and b/tests/b15_4ig.sdf.gz differ
diff --git a/tests/b15_4ig.v.gz b/tests/b15_4ig.v.gz
new file mode 100644
index 0000000..bc98cc1
Binary files /dev/null and b/tests/b15_4ig.v.gz differ
diff --git a/tests/conftest.py b/tests/conftest.py
index e82b2fc..c73bdc5 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,8 +1,20 @@
 import pytest
 
 
-@pytest.fixture
+@pytest.fixture(scope='session')
 def mydir():
     import os
     from pathlib import Path
     return Path(os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__))))
+
+@pytest.fixture(scope='session')
+def b15_2ig_circuit(mydir):
+    from kyupy import verilog
+    from kyupy.techlib import SAED32
+    return verilog.load(mydir / 'b15_2ig.v.gz', branchforks=True, tlib=SAED32)
+
+@pytest.fixture(scope='session')
+def b15_2ig_delays(mydir, b15_2ig_circuit):
+    from kyupy import sdf
+    from kyupy.techlib import SAED32
+    return sdf.load(mydir / 'b15_2ig.sdf.gz').iopaths(b15_2ig_circuit, tlib=SAED32)[1:2]
diff --git a/tests/rng_haltonBase2.synth_yosys.v b/tests/rng_haltonBase2.synth_yosys.v
new file mode 100644
index 0000000..dec16b1
--- /dev/null
+++ b/tests/rng_haltonBase2.synth_yosys.v
@@ -0,0 +1,335 @@
+/* Generated by Yosys 0.9 (git sha1 UNKNOWN, gcc 4.8.5 -fPIC -Os) */
+
+(* top =  1  *)
+(* src = "rng_haltonBase2.v:1" *)
+module rng1(clk, reset, o_output);
+  (* src = "rng_haltonBase2.v:7|rng_haltonBase2.v:19" *)
+  wire [11:0] _00_;
+  wire _01_;
+  wire _02_;
+  wire _03_;
+  wire _04_;
+  wire _05_;
+  wire _06_;
+  wire _07_;
+  wire _08_;
+  wire _09_;
+  wire _10_;
+  wire _11_;
+  wire _12_;
+  wire _13_;
+  wire _14_;
+  wire _15_;
+  wire _16_;
+  wire _17_;
+  wire _18_;
+  wire _19_;
+  wire _20_;
+  wire _21_;
+  wire _22_;
+  wire _23_;
+  wire _24_;
+  wire _25_;
+  wire _26_;
+  wire _27_;
+  wire _28_;
+  wire _29_;
+  wire _30_;
+  wire _31_;
+  wire _32_;
+  wire _33_;
+  wire _34_;
+  (* src = "rng_haltonBase2.v:2" *)
+  input clk;
+  (* src = "rng_haltonBase2.v:7|rng_haltonBase2.v:12" *)
+  wire \halton.clk ;
+  (* init = 12'h000 *)
+  (* src = "rng_haltonBase2.v:7|rng_haltonBase2.v:17" *)
+  wire [11:0] \halton.counter ;
+  (* src = "rng_haltonBase2.v:7|rng_haltonBase2.v:14" *)
+  wire [11:0] \halton.o_output ;
+  (* src = "rng_haltonBase2.v:7|rng_haltonBase2.v:13" *)
+  wire \halton.reset ;
+  (* src = "rng_haltonBase2.v:4" *)
+  output [11:0] o_output;
+  (* src = "rng_haltonBase2.v:3" *)
+  input reset;
+  AND2X1 _35_ (
+    .IN1(\halton.counter [1]),
+    .IN2(\halton.counter [0]),
+    .Q(_01_)
+  );
+  NOR2X0 _36_ (
+    .IN1(\halton.counter [1]),
+    .IN2(\halton.counter [0]),
+    .QN(_02_)
+  );
+  NOR3X0 _37_ (
+    .IN1(reset),
+    .IN2(_01_),
+    .IN3(_02_),
+    .QN(_00_[1])
+  );
+  AND2X1 _38_ (
+    .IN1(\halton.counter [2]),
+    .IN2(_01_),
+    .Q(_03_)
+  );
+  NOR2X0 _39_ (
+    .IN1(\halton.counter [2]),
+    .IN2(_01_),
+    .QN(_04_)
+  );
+  NOR3X0 _40_ (
+    .IN1(reset),
+    .IN2(_03_),
+    .IN3(_04_),
+    .QN(_00_[2])
+  );
+  AND4X1 _41_ (
+    .IN1(\halton.counter [1]),
+    .IN2(\halton.counter [0]),
+    .IN3(\halton.counter [2]),
+    .IN4(\halton.counter [3]),
+    .Q(_05_)
+  );
+  NOR2X0 _42_ (
+    .IN1(\halton.counter [3]),
+    .IN2(_03_),
+    .QN(_06_)
+  );
+  NOR3X0 _43_ (
+    .IN1(reset),
+    .IN2(_05_),
+    .IN3(_06_),
+    .QN(_00_[3])
+  );
+  AND2X1 _44_ (
+    .IN1(\halton.counter [4]),
+    .IN2(_05_),
+    .Q(_07_)
+  );
+  NOR2X0 _45_ (
+    .IN1(\halton.counter [4]),
+    .IN2(_05_),
+    .QN(_08_)
+  );
+  NOR3X0 _46_ (
+    .IN1(reset),
+    .IN2(_07_),
+    .IN3(_08_),
+    .QN(_00_[4])
+  );
+  AND2X1 _47_ (
+    .IN1(\halton.counter [5]),
+    .IN2(_07_),
+    .Q(_09_)
+  );
+  NOR2X0 _48_ (
+    .IN1(\halton.counter [5]),
+    .IN2(_07_),
+    .QN(_10_)
+  );
+  NOR3X0 _49_ (
+    .IN1(reset),
+    .IN2(_09_),
+    .IN3(_10_),
+    .QN(_00_[5])
+  );
+  AND4X1 _50_ (
+    .IN1(\halton.counter [4]),
+    .IN2(\halton.counter [5]),
+    .IN3(\halton.counter [6]),
+    .IN4(_05_),
+    .Q(_11_)
+  );
+  NOR2X0 _51_ (
+    .IN1(\halton.counter [6]),
+    .IN2(_09_),
+    .QN(_12_)
+  );
+  NOR3X0 _52_ (
+    .IN1(reset),
+    .IN2(_11_),
+    .IN3(_12_),
+    .QN(_00_[6])
+  );
+  AND2X1 _53_ (
+    .IN1(\halton.counter [7]),
+    .IN2(_11_),
+    .Q(_13_)
+  );
+  NOR2X0 _54_ (
+    .IN1(\halton.counter [7]),
+    .IN2(_11_),
+    .QN(_14_)
+  );
+  NOR3X0 _55_ (
+    .IN1(reset),
+    .IN2(_13_),
+    .IN3(_14_),
+    .QN(_00_[7])
+  );
+  AND3X1 _56_ (
+    .IN1(\halton.counter [7]),
+    .IN2(\halton.counter [8]),
+    .IN3(_11_),
+    .Q(_15_)
+  );
+  NOR2X0 _57_ (
+    .IN1(\halton.counter [8]),
+    .IN2(_13_),
+    .QN(_16_)
+  );
+  NOR3X0 _58_ (
+    .IN1(reset),
+    .IN2(_15_),
+    .IN3(_16_),
+    .QN(_00_[8])
+  );
+  AND4X1 _59_ (
+    .IN1(\halton.counter [7]),
+    .IN2(\halton.counter [8]),
+    .IN3(\halton.counter [9]),
+    .IN4(_11_),
+    .Q(_17_)
+  );
+  NOR2X0 _60_ (
+    .IN1(\halton.counter [9]),
+    .IN2(_15_),
+    .QN(_18_)
+  );
+  NOR3X0 _61_ (
+    .IN1(reset),
+    .IN2(_17_),
+    .IN3(_18_),
+    .QN(_00_[9])
+  );
+  AND2X1 _62_ (
+    .IN1(\halton.counter [10]),
+    .IN2(_17_),
+    .Q(_19_)
+  );
+  NOR2X0 _63_ (
+    .IN1(\halton.counter [10]),
+    .IN2(_17_),
+    .QN(_20_)
+  );
+  NOR3X0 _64_ (
+    .IN1(reset),
+    .IN2(_19_),
+    .IN3(_20_),
+    .QN(_00_[10])
+  );
+  AND3X1 _65_ (
+    .IN1(\halton.counter [10]),
+    .IN2(\halton.counter [11]),
+    .IN3(_17_),
+    .Q(_21_)
+  );
+  AOI21X1 _66_ (
+    .IN1(\halton.counter [10]),
+    .IN2(_17_),
+    .IN3(\halton.counter [11]),
+    .QN(_22_)
+  );
+  NOR3X0 _67_ (
+    .IN1(reset),
+    .IN2(_21_),
+    .IN3(_22_),
+    .QN(_00_[11])
+  );
+  NOR2X0 _68_ (
+    .IN1(reset),
+    .IN2(\halton.counter [0]),
+    .QN(_00_[0])
+  );
+  (* src = "rng_haltonBase2.v:7|rng_haltonBase2.v:19" *)
+  DFFX1 _69_ (
+    .CLK(clk),
+    .D(_00_[0]),
+    .Q(\halton.counter [0]),
+    .QN(_23_)
+  );
+  (* src = "rng_haltonBase2.v:7|rng_haltonBase2.v:19" *)
+  DFFX1 _70_ (
+    .CLK(clk),
+    .D(_00_[1]),
+    .Q(\halton.counter [1]),
+    .QN(_24_)
+  );
+  (* src = "rng_haltonBase2.v:7|rng_haltonBase2.v:19" *)
+  DFFX1 _71_ (
+    .CLK(clk),
+    .D(_00_[2]),
+    .Q(\halton.counter [2]),
+    .QN(_25_)
+  );
+  (* src = "rng_haltonBase2.v:7|rng_haltonBase2.v:19" *)
+  DFFX1 _72_ (
+    .CLK(clk),
+    .D(_00_[3]),
+    .Q(\halton.counter [3]),
+    .QN(_26_)
+  );
+  (* src = "rng_haltonBase2.v:7|rng_haltonBase2.v:19" *)
+  DFFX1 _73_ (
+    .CLK(clk),
+    .D(_00_[4]),
+    .Q(\halton.counter [4]),
+    .QN(_27_)
+  );
+  (* src = "rng_haltonBase2.v:7|rng_haltonBase2.v:19" *)
+  DFFX1 _74_ (
+    .CLK(clk),
+    .D(_00_[5]),
+    .Q(\halton.counter [5]),
+    .QN(_28_)
+  );
+  (* src = "rng_haltonBase2.v:7|rng_haltonBase2.v:19" *)
+  DFFX1 _75_ (
+    .CLK(clk),
+    .D(_00_[6]),
+    .Q(\halton.counter [6]),
+    .QN(_29_)
+  );
+  (* src = "rng_haltonBase2.v:7|rng_haltonBase2.v:19" *)
+  DFFX1 _76_ (
+    .CLK(clk),
+    .D(_00_[7]),
+    .Q(\halton.counter [7]),
+    .QN(_30_)
+  );
+  (* src = "rng_haltonBase2.v:7|rng_haltonBase2.v:19" *)
+  DFFX1 _77_ (
+    .CLK(clk),
+    .D(_00_[8]),
+    .Q(\halton.counter [8]),
+    .QN(_31_)
+  );
+  (* src = "rng_haltonBase2.v:7|rng_haltonBase2.v:19" *)
+  DFFX1 _78_ (
+    .CLK(clk),
+    .D(_00_[9]),
+    .Q(\halton.counter [9]),
+    .QN(_32_)
+  );
+  (* src = "rng_haltonBase2.v:7|rng_haltonBase2.v:19" *)
+  DFFX1 _79_ (
+    .CLK(clk),
+    .D(_00_[10]),
+    .Q(\halton.counter [10]),
+    .QN(_33_)
+  );
+  (* src = "rng_haltonBase2.v:7|rng_haltonBase2.v:19" *)
+  DFFX1 _80_ (
+    .CLK(clk),
+    .D(_00_[11]),
+    .Q(\halton.counter [11]),
+    .QN(_34_)
+  );
+  assign \halton.clk  = clk;
+  assign \halton.o_output  = { \halton.counter [0], \halton.counter [1], \halton.counter [2], \halton.counter [3], \halton.counter [4], \halton.counter [5], \halton.counter [6], \halton.counter [7], \halton.counter [8], \halton.counter [9], \halton.counter [10], \halton.counter [11] };
+  assign \halton.reset  = reset;
+  assign o_output = { \halton.counter [0], \halton.counter [1], \halton.counter [2], \halton.counter [3], \halton.counter [4], \halton.counter [5], \halton.counter [6], \halton.counter [7], \halton.counter [8], \halton.counter [9], \halton.counter [10], \halton.counter [11] };
+endmodule
diff --git a/tests/test_bench.py b/tests/test_bench.py
index 44ddf7c..8b9a533 100644
--- a/tests/test_bench.py
+++ b/tests/test_bench.py
@@ -12,4 +12,4 @@ def test_b01(mydir):
 def test_simple():
     c = bench.parse('input(a, b) output(z) z=and(a,b)')
     assert len(c.nodes) == 4
-    assert len(c.interface) == 3
+    assert len(c.io_nodes) == 3
diff --git a/tests/test_circuit.py b/tests/test_circuit.py
index 446ba90..5aa3074 100644
--- a/tests/test_circuit.py
+++ b/tests/test_circuit.py
@@ -1,7 +1,8 @@
 import pickle
 
 from kyupy.circuit import Circuit, Node, Line
-from kyupy import verilog
+from kyupy import verilog, bench
+from kyupy.techlib import SAED32
 
 def test_lines():
     c = Circuit()
@@ -43,7 +44,7 @@ def test_lines():
     assert c.lines[0].index == 0
     assert c.lines[1].index == 1
 
-    assert n1.outs[2] is None
+    assert len(n1.outs) == 2
     assert n2.ins[1] is None
     assert n2.ins[2] == line2
 
@@ -57,9 +58,9 @@ def test_circuit():
     assert 'in1' in c.cells
     assert 'and1' not in c.cells
 
-    c.interface[0] = in1
-    c.interface[1] = in2
-    c.interface[2] = out1
+    c.io_nodes[0] = in1
+    c.io_nodes[1] = in2
+    c.io_nodes[2] = out1
 
     and1 = Node(c, 'and1', kind='and')
     Line(c, in1, and1)
@@ -104,9 +105,29 @@ def test_circuit():
 
 
 def test_pickle(mydir):
-    c = verilog.load(mydir / 'b14.v.gz')
+    c = verilog.load(mydir / 'b15_4ig.v.gz', tlib=SAED32)
     assert c is not None
     cs = pickle.dumps(c)
     assert cs is not None
     c2 = pickle.loads(cs)
     assert c == c2
+
+
+def test_substitute():
+    c = bench.parse('input(i1, i2, i3, i4, i5) output(o1) aoi=AOI221(i1, i2, i3, i4, i5) o1=not(aoi)')
+    assert len(c.cells) == 2
+    assert len(c.io_nodes) == 6
+    aoi221_impl = bench.parse('input(in1, in2, in3, in4, in5) output(q) a1=and(in1, in2) a2=and(in3, in4) q=or(a1, a2, in5)')
+    assert len(aoi221_impl.cells) == 3
+    assert len(aoi221_impl.io_nodes) == 6
+    c.substitute(c.cells['aoi'], aoi221_impl)
+    assert len(c.cells) == 4
+    assert len(c.io_nodes) == 6
+
+
+def test_resolve(mydir):
+    c = verilog.load(mydir / 'b15_4ig.v.gz', tlib=SAED32)
+    s_names = [n.name for n in c.s_nodes]
+    c.resolve_tlib_cells(SAED32)
+    s_names_prim = [n.name for n in c.s_nodes]
+    assert s_names == s_names_prim, 'resolve_tlib_cells does not preserve names or order of s_nodes'
diff --git a/tests/test_logic.py b/tests/test_logic.py
index 27b61ae..b87f1fc 100644
--- a/tests/test_logic.py
+++ b/tests/test_logic.py
@@ -1,252 +1,75 @@
+import numpy as np
 import kyupy.logic as lg
+from kyupy.logic import mvarray, bparray, bp_to_mv, mv_to_bp
 
 
-def test_mvarray():
-
-    # instantiation with shape
-
-    ary = lg.MVArray(4)
-    assert ary.length == 1
-    assert len(ary) == 1
-    assert ary.width == 4
-
-    ary = lg.MVArray((3, 2))
-    assert ary.length == 2
-    assert len(ary) == 2
-    assert ary.width == 3
-
-    # instantiation with single vector
-
-    ary = lg.MVArray([1, 0, 1])
-    assert ary.length == 1
-    assert ary.width == 3
-    assert str(ary) == "['101']"
-    assert ary[0] == '101'
-
-    ary = lg.MVArray("10X-")
-    assert ary.length == 1
-    assert ary.width == 4
-    assert str(ary) == "['10X-']"
-    assert ary[0] == '10X-'
-
-    ary = lg.MVArray("1")
-    assert ary.length == 1
-    assert ary.width == 1
-
-    ary = lg.MVArray(["1"])
-    assert ary.length == 1
-    assert ary.width == 1
-
-    # instantiation with multiple vectors
-
-    ary = lg.MVArray([[0, 0], [0, 1], [1, 0], [1, 1]])
-    assert ary.length == 4
-    assert ary.width == 2
-
-    ary = lg.MVArray(["000", "001", "110", "---"])
-    assert ary.length == 4
-    assert ary.width == 3
-    assert str(ary) == "['000', '001', '110', '---']"
-    assert ary[2] == '110'
-
-    # casting to 2-valued logic
-
-    ary = lg.MVArray([0, 1, 2, None], m=2)
-    assert ary.data[0] == lg.ZERO
-    assert ary.data[1] == lg.ONE
-    assert ary.data[2] == lg.ZERO
-    assert ary.data[3] == lg.ZERO
-
-    ary = lg.MVArray("0-X1PRFN", m=2)
-    assert ary.data[0] == lg.ZERO
-    assert ary.data[1] == lg.ZERO
-    assert ary.data[2] == lg.ZERO
-    assert ary.data[3] == lg.ONE
-    assert ary.data[4] == lg.ZERO
-    assert ary.data[5] == lg.ONE
-    assert ary.data[6] == lg.ZERO
-    assert ary.data[7] == lg.ONE
-
-    # casting to 4-valued logic
-
-    ary = lg.MVArray([0, 1, 2, None, 'F'], m=4)
-    assert ary.data[0] == lg.ZERO
-    assert ary.data[1] == lg.ONE
-    assert ary.data[2] == lg.UNKNOWN
-    assert ary.data[3] == lg.UNASSIGNED
-    assert ary.data[4] == lg.ZERO
-
-    ary = lg.MVArray("0-X1PRFN", m=4)
-    assert ary.data[0] == lg.ZERO
-    assert ary.data[1] == lg.UNASSIGNED
-    assert ary.data[2] == lg.UNKNOWN
-    assert ary.data[3] == lg.ONE
-    assert ary.data[4] == lg.ZERO
-    assert ary.data[5] == lg.ONE
-    assert ary.data[6] == lg.ZERO
-    assert ary.data[7] == lg.ONE
-
-    # casting to 8-valued logic
-
-    ary = lg.MVArray([0, 1, 2, None, 'F'], m=8)
-    assert ary.data[0] == lg.ZERO
-    assert ary.data[1] == lg.ONE
-    assert ary.data[2] == lg.UNKNOWN
-    assert ary.data[3] == lg.UNASSIGNED
-    assert ary.data[4] == lg.FALL
-
-    ary = lg.MVArray("0-X1PRFN", m=8)
-    assert ary.data[0] == lg.ZERO
-    assert ary.data[1] == lg.UNASSIGNED
-    assert ary.data[2] == lg.UNKNOWN
-    assert ary.data[3] == lg.ONE
-    assert ary.data[4] == lg.PPULSE
-    assert ary.data[5] == lg.RISE
-    assert ary.data[6] == lg.FALL
-    assert ary.data[7] == lg.NPULSE
-
-    # copy constructor and casting
-
-    ary8 = lg.MVArray(ary, m=8)
-    assert ary8.length == 1
-    assert ary8.width == 8
-    assert ary8.data[7] == lg.NPULSE
-
-    ary4 = lg.MVArray(ary, m=4)
-    assert ary4.data[1] == lg.UNASSIGNED
-    assert ary4.data[7] == lg.ONE
-
-    ary2 = lg.MVArray(ary, m=2)
-    assert ary2.data[1] == lg.ZERO
-    assert ary2.data[7] == lg.ONE
-
-
-def test_mv_operations():
-    x1_2v = lg.MVArray("0011", m=2)
-    x2_2v = lg.MVArray("0101", m=2)
-    x1_4v = lg.MVArray("0000XXXX----1111", m=4)
-    x2_4v = lg.MVArray("0X-10X-10X-10X-1", m=4)
-    x1_8v = lg.MVArray("00000000XXXXXXXX--------11111111PPPPPPPPRRRRRRRRFFFFFFFFNNNNNNNN", m=8)
-    x2_8v = lg.MVArray("0X-1PRFN0X-1PRFN0X-1PRFN0X-1PRFN0X-1PRFN0X-1PRFN0X-1PRFN0X-1PRFN", m=8)
-
-    assert lg.mv_not(x1_2v)[0] == '1100'
-    assert lg.mv_not(x1_4v)[0] == '1111XXXXXXXX0000'
-    assert lg.mv_not(x1_8v)[0] == '11111111XXXXXXXXXXXXXXXX00000000NNNNNNNNFFFFFFFFRRRRRRRRPPPPPPPP'
-
-    assert lg.mv_or(x1_2v, x2_2v)[0] == '0111'
-    assert lg.mv_or(x1_4v, x2_4v)[0] == '0XX1XXX1XXX11111'
-    assert lg.mv_or(x1_8v, x2_8v)[0] == '0XX1PRFNXXX1XXXXXXX1XXXX11111111PXX1PRFNRXX1RRNNFXX1FNFNNXX1NNNN'
-
-    assert lg.mv_and(x1_2v, x2_2v)[0] == '0001'
-    assert lg.mv_and(x1_4v, x2_4v)[0] == '00000XXX0XXX0XX1'
-    assert lg.mv_and(x1_8v, x2_8v)[0] == '000000000XXXXXXX0XXXXXXX0XX1PRFN0XXPPPPP0XXRPRPR0XXFPPFF0XXNPRFN'
-
-    assert lg.mv_xor(x1_2v, x2_2v)[0] == '0110'
-    assert lg.mv_xor(x1_4v, x2_4v)[0] == '0XX1XXXXXXXX1XX0'
-    assert lg.mv_xor(x1_8v, x2_8v)[0] == '0XX1PRFNXXXXXXXXXXXXXXXX1XX0NFRPPXXNPRFNRXXFRPNFFXXRFNPRNXXPNFRP'
-
-    x30_2v = lg.MVArray("0000", m=2)
-    x31_2v = lg.MVArray("1111", m=2)
-    x30_4v = lg.MVArray("0000000000000000", m=4)
-    x31_4v = lg.MVArray("1111111111111111", m=4)
-    x30_8v = lg.MVArray("0000000000000000000000000000000000000000000000000000000000000000", m=8)
-    x31_8v = lg.MVArray("1111111111111111111111111111111111111111111111111111111111111111", m=8)
-
-    assert lg.mv_latch(x1_2v, x2_2v, x30_2v)[0] == '0001'
-    assert lg.mv_latch(x1_2v, x2_2v, x31_2v)[0] == '1011'
-    assert lg.mv_latch(x1_4v, x2_4v, x30_4v)[0] == '0XX00XXX0XXX0XX1'
-    assert lg.mv_latch(x1_4v, x2_4v, x31_4v)[0] == '1XX01XXX1XXX1XX1'
-    assert lg.mv_latch(x1_8v, x2_8v, x30_8v)[0] == '0XX000000XXXXXXX0XXXXXXX0XX10R110XX000000XXR0R0R0XXF001F0XX10R11'
-    assert lg.mv_latch(x1_8v, x2_8v, x31_8v)[0] == '1XX01F001XXXXXXX1XXXXXXX1XX111111XX01F001XXR110R1XXF1F1F1XX11111'
+def assert_equal_shape_and_contents(actual, desired):
+    desired = np.array(desired, dtype=np.uint8)
+    assert actual.shape == desired.shape
+    np.testing.assert_allclose(actual, desired)
+
+
+def test_mvarray_single_vector():
+    assert_equal_shape_and_contents(mvarray(1, 0, 1), [lg.ONE, lg.ZERO, lg.ONE])
+    assert_equal_shape_and_contents(mvarray([1, 0, 1]), [lg.ONE, lg.ZERO, lg.ONE])
+    assert_equal_shape_and_contents(mvarray('10X-RFPN'), [lg.ONE, lg.ZERO, lg.UNKNOWN, lg.UNASSIGNED, lg.RISE, lg.FALL, lg.PPULSE, lg.NPULSE])
+    assert_equal_shape_and_contents(mvarray(['1']), [lg.ONE])
+    assert_equal_shape_and_contents(mvarray('1'), [lg.ONE])
+
+
+def test_mvarray_multi_vector():
+    assert_equal_shape_and_contents(mvarray([0, 0], [0, 1], [1, 0], [1, 1]), [[lg.ZERO, lg.ZERO, lg.ONE, lg.ONE], [lg.ZERO, lg.ONE, lg.ZERO, lg.ONE]])
+    assert_equal_shape_and_contents(mvarray('10X', '--1'), [[lg.ONE, lg.UNASSIGNED], [lg.ZERO, lg.UNASSIGNED], [lg.UNKNOWN, lg.ONE]])
+
+
+def test_mv_ops():
+    x1_8v = mvarray('00000000XXXXXXXX--------11111111PPPPPPPPRRRRRRRRFFFFFFFFNNNNNNNN')
+    x2_8v = mvarray('0X-1PRFN'*8)
+
+    assert_equal_shape_and_contents(lg.mv_not(x1_8v), mvarray('11111111XXXXXXXXXXXXXXXX00000000NNNNNNNNFFFFFFFFRRRRRRRRPPPPPPPP'))
+    assert_equal_shape_and_contents(lg.mv_or(x1_8v, x2_8v), mvarray('0XX1PRFNXXX1XXXXXXX1XXXX11111111PXX1PRFNRXX1RRNNFXX1FNFNNXX1NNNN'))
+    assert_equal_shape_and_contents(lg.mv_and(x1_8v, x2_8v), mvarray('000000000XXXXXXX0XXXXXXX0XX1PRFN0XXPPPPP0XXRPRPR0XXFPPFF0XXNPRFN'))
+    assert_equal_shape_and_contents(lg.mv_xor(x1_8v, x2_8v), mvarray('0XX1PRFNXXXXXXXXXXXXXXXX1XX0NFRPPXXNPRFNRXXFRPNFFXXRFNPRNXXPNFRP'))
+
+    # TODO
+    #assert_equal_shape_and_contents(lg.mv_transition(x1_8v, x2_8v), mvarray('0XXR PRFNXXXXXXXXXXXXXXXX1XX0NFRPPXXNPRFNRXXFRPNFFXXRFNPRNXXPNFRP'))
+
+    x30_8v = mvarray('0000000000000000000000000000000000000000000000000000000000000000')
+    x31_8v = mvarray('1111111111111111111111111111111111111111111111111111111111111111')
+
+    assert_equal_shape_and_contents(lg.mv_latch(x1_8v, x2_8v, x30_8v), mvarray('0XX000000XXXXXXX0XXXXXXX0XX10R110XX000000XXR0R0R0XXF001F0XX10R11'))
+    assert_equal_shape_and_contents(lg.mv_latch(x1_8v, x2_8v, x31_8v), mvarray('1XX01F001XXXXXXX1XXXXXXX1XX111111XX01F001XXR110R1XXF1F1F1XX11111'))
 
 
 def test_bparray():
 
-    ary = lg.BPArray(4)
-    assert ary.length == 1
-    assert len(ary) == 1
-    assert ary.width == 4
-
-    ary = lg.BPArray((3, 2))
-    assert ary.length == 2
-    assert len(ary) == 2
-    assert ary.width == 3
-
-    assert lg.MVArray(lg.BPArray("01", m=2))[0] == '01'
-    assert lg.MVArray(lg.BPArray("0X-1", m=4))[0] == '0X-1'
-    assert lg.MVArray(lg.BPArray("0X-1PRFN", m=8))[0] == '0X-1PRFN'
-
-    x1_2v = lg.BPArray("0011", m=2)
-    x2_2v = lg.BPArray("0101", m=2)
-    x1_4v = lg.BPArray("0000XXXX----1111", m=4)
-    x2_4v = lg.BPArray("0X-10X-10X-10X-1", m=4)
-    x1_8v = lg.BPArray("00000000XXXXXXXX--------11111111PPPPPPPPRRRRRRRRFFFFFFFFNNNNNNNN", m=8)
-    x2_8v = lg.BPArray("0X-1PRFN0X-1PRFN0X-1PRFN0X-1PRFN0X-1PRFN0X-1PRFN0X-1PRFN0X-1PRFN", m=8)
-
-    out_2v = lg.BPArray((4, 1), m=2)
-    out_4v = lg.BPArray((16, 1), m=4)
-    out_8v = lg.BPArray((64, 1), m=8)
-
-    lg.bp_buf(out_2v.data, x1_2v.data)
-    lg.bp_buf(out_4v.data, x1_4v.data)
-    lg.bp_buf(out_8v.data, x1_8v.data)
-
-    assert lg.MVArray(out_2v)[0] == '0011'
-    assert lg.MVArray(out_4v)[0] == '0000XXXXXXXX1111'
-    assert lg.MVArray(out_8v)[0] == '00000000XXXXXXXXXXXXXXXX11111111PPPPPPPPRRRRRRRRFFFFFFFFNNNNNNNN'
-
-    lg.bp_not(out_2v.data, x1_2v.data)
-    lg.bp_not(out_4v.data, x1_4v.data)
-    lg.bp_not(out_8v.data, x1_8v.data)
-
-    assert lg.MVArray(out_2v)[0] == '1100'
-    assert lg.MVArray(out_4v)[0] == '1111XXXXXXXX0000'
-    assert lg.MVArray(out_8v)[0] == '11111111XXXXXXXXXXXXXXXX00000000NNNNNNNNFFFFFFFFRRRRRRRRPPPPPPPP'
-
-    lg.bp_or(out_2v.data, x1_2v.data, x2_2v.data)
-    lg.bp_or(out_4v.data, x1_4v.data, x2_4v.data)
-    lg.bp_or(out_8v.data, x1_8v.data, x2_8v.data)
-
-    assert lg.MVArray(out_2v)[0] == '0111'
-    assert lg.MVArray(out_4v)[0] == '0XX1XXX1XXX11111'
-    assert lg.MVArray(out_8v)[0] == '0XX1PRFNXXX1XXXXXXX1XXXX11111111PXX1PRFNRXX1RRNNFXX1FNFNNXX1NNNN'
-
-    lg.bp_and(out_2v.data, x1_2v.data, x2_2v.data)
-    lg.bp_and(out_4v.data, x1_4v.data, x2_4v.data)
-    lg.bp_and(out_8v.data, x1_8v.data, x2_8v.data)
-
-    assert lg.MVArray(out_2v)[0] == '0001'
-    assert lg.MVArray(out_4v)[0] == '00000XXX0XXX0XX1'
-    assert lg.MVArray(out_8v)[0] == '000000000XXXXXXX0XXXXXXX0XX1PRFN0XXPPPPP0XXRPRPR0XXFPPFF0XXNPRFN'
-
-    lg.bp_xor(out_2v.data, x1_2v.data, x2_2v.data)
-    lg.bp_xor(out_4v.data, x1_4v.data, x2_4v.data)
-    lg.bp_xor(out_8v.data, x1_8v.data, x2_8v.data)
-
-    assert lg.MVArray(out_2v)[0] == '0110'
-    assert lg.MVArray(out_4v)[0] == '0XX1XXXXXXXX1XX0'
-    assert lg.MVArray(out_8v)[0] == '0XX1PRFNXXXXXXXXXXXXXXXX1XX0NFRPPXXNPRFNRXXFRPNFFXXRFNPRNXXPNFRP'
-
-    x30_2v = lg.BPArray("0000", m=2)
-    x30_4v = lg.BPArray("0000000000000000", m=4)
-    x30_8v = lg.BPArray("0000000000000000000000000000000000000000000000000000000000000000", m=8)
-
-    lg.bp_latch(out_2v.data, x1_2v.data, x2_2v.data, x30_2v.data)
-    lg.bp_latch(out_4v.data, x1_4v.data, x2_4v.data, x30_4v.data)
-    lg.bp_latch(out_8v.data, x1_8v.data, x2_8v.data, x30_8v.data)
-
-    assert lg.MVArray(out_2v)[0] == '0001'
-    assert lg.MVArray(out_4v)[0] == '0XX00XXX0XXX0XX1'
-    assert lg.MVArray(out_8v)[0] == '0XX000000XXXXXXX0XXXXXXX0XX10R110XX000000XXR0R0R0XXF001F0XX10R11'
-
-    x31_2v = lg.BPArray("1111", m=2)
-    x31_4v = lg.BPArray("1111111111111111", m=4)
-    x31_8v = lg.BPArray("1111111111111111111111111111111111111111111111111111111111111111", m=8)
-
-    lg.bp_latch(out_2v.data, x1_2v.data, x2_2v.data, x31_2v.data)
-    lg.bp_latch(out_4v.data, x1_4v.data, x2_4v.data, x31_4v.data)
-    lg.bp_latch(out_8v.data, x1_8v.data, x2_8v.data, x31_8v.data)
-
-    assert lg.MVArray(out_2v)[0] == '1011'
-    assert lg.MVArray(out_4v)[0] == '1XX01XXX1XXX1XX1'
-    assert lg.MVArray(out_8v)[0] == '1XX01F001XXXXXXX1XXXXXXX1XX111111XX01F001XXR110R1XXF1F1F1XX11111'
+    bpa = bparray('0X-1PRFN')
+    assert bpa.shape == (8, 3, 1)
+
+    bpa = bparray('0X-1PRFN-')
+    assert bpa.shape == (9, 3, 1)
+
+    bpa = bparray('000', '001', '010', '011', '100', '101', '110', '111')
+    assert bpa.shape == (3, 3, 1)
+
+    bpa = bparray('000', '001', '010', '011', '100', '101', '110', '111', 'RFX')
+    assert bpa.shape == (3, 3, 2)
+
+    assert_equal_shape_and_contents(bp_to_mv(bparray('0X-1PRFN'))[:,0], mvarray('0X-1PRFN'))
+    assert_equal_shape_and_contents(bparray('0X-1PRFN'), mv_to_bp(mvarray('0X-1PRFN')))
+
+    x1_8v = bparray('00000000XXXXXXXX--------11111111PPPPPPPPRRRRRRRRFFFFFFFFNNNNNNNN')
+    x2_8v = bparray('0X-1PRFN'*8)
+
+    out_8v = np.empty((64, 3, 1), dtype=np.uint8)
+
+    assert_equal_shape_and_contents(bp_to_mv(lg.bp8v_buf(out_8v, x1_8v))[:,0], mvarray('00000000XXXXXXXXXXXXXXXX11111111PPPPPPPPRRRRRRRRFFFFFFFFNNNNNNNN'))
+    assert_equal_shape_and_contents(bp_to_mv(lg.bp8v_or(out_8v, x1_8v, x2_8v))[:,0], mvarray('0XX1PRFNXXX1XXXXXXX1XXXX11111111PXX1PRFNRXX1RRNNFXX1FNFNNXX1NNNN'))
+    assert_equal_shape_and_contents(bp_to_mv(lg.bp8v_and(out_8v, x1_8v, x2_8v))[:,0], mvarray('000000000XXXXXXX0XXXXXXX0XX1PRFN0XXPPPPP0XXRPRPR0XXFPPFF0XXNPRFN'))
+    assert_equal_shape_and_contents(bp_to_mv(lg.bp8v_xor(out_8v, x1_8v, x2_8v))[:,0], mvarray('0XX1PRFNXXXXXXXXXXXXXXXX1XX0NFRPPXXNPRFNRXXFRPNFFXXRFNPRNXXPNFRP'))
+
+    x30_8v = bparray('0000000000000000000000000000000000000000000000000000000000000000')
+    x31_8v = bparray('1111111111111111111111111111111111111111111111111111111111111111')
+
+    assert_equal_shape_and_contents(bp_to_mv(lg.bp8v_latch(out_8v, x1_8v, x2_8v, x30_8v))[:,0], mvarray('0XX000000XXXXXXX0XXXXXXX0XX10R110XX000000XXR0R0R0XXF001F0XX10R11'))
+    assert_equal_shape_and_contents(bp_to_mv(lg.bp8v_latch(out_8v, x1_8v, x2_8v, x31_8v))[:,0], mvarray('1XX01F001XXXXXXX1XXXXXXX1XX111111XX01F001XXR110R1XXF1F1F1XX11111'))
diff --git a/tests/test_logic_sim.py b/tests/test_logic_sim.py
index b581cb6..5849310 100644
--- a/tests/test_logic_sim.py
+++ b/tests/test_logic_sim.py
@@ -1,135 +1,175 @@
+import numpy as np
+
 from kyupy.logic_sim import LogicSim
-from kyupy import bench
-from kyupy.logic import MVArray, BPArray
+from kyupy import bench, logic, sim
+from kyupy.logic import mvarray, bparray, bp_to_mv, mv_to_bp
 
+def assert_equal_shape_and_contents(actual, desired):
+    desired = np.array(desired, dtype=np.uint8)
+    assert actual.shape == desired.shape
+    np.testing.assert_allclose(actual, desired)
 
 def test_2v():
-    c = bench.parse('input(x, y) output(a, o, n) a=and(x,y) o=or(x,y) n=not(x)')
-    s = LogicSim(c, 4, m=2)
-    assert len(s.interface) == 5
-    mva = MVArray(['00000', '01000', '10000', '11000'], m=2)
-    bpa = BPArray(mva)
-    s.assign(bpa)
-    s.propagate()
-    s.capture(bpa)
-    mva = MVArray(bpa)
-    assert mva[0] == '00001'
-    assert mva[1] == '01011'
-    assert mva[2] == '10010'
-    assert mva[3] == '11110'
+    c = bench.parse(f'''
+        input(i3, i2, i1, i0)
+        output({",".join([f"o{i:02d}" for i in range(33)])})
+        o00=BUF1(i0)
+        o01=INV1(i0)
+        o02=AND2(i0,i1)
+        o03=AND3(i0,i1,i2)
+        o04=AND4(i0,i1,i2,i3)
+        o05=NAND2(i0,i1)
+        o06=NAND3(i0,i1,i2)
+        o07=NAND4(i0,i1,i2,i3)
+        o08=OR2(i0,i1)
+        o09=OR3(i0,i1,i2)
+        o10=OR4(i0,i1,i2,i3)
+        o11=NOR2(i0,i1)
+        o12=NOR3(i0,i1,i2)
+        o13=NOR4(i0,i1,i2,i3)
+        o14=XOR2(i0,i1)
+        o15=XOR3(i0,i1,i2)
+        o16=XOR4(i0,i1,i2,i3)
+        o17=XNOR2(i0,i1)
+        o18=XNOR3(i0,i1,i2)
+        o19=XNOR4(i0,i1,i2,i3)
+        o20=AO21(i0,i1,i2)
+        o21=OA21(i0,i1,i2)
+        o22=AO22(i0,i1,i2,i3)
+        o23=OA22(i0,i1,i2,i3)
+        o24=AOI21(i0,i1,i2)
+        o25=OAI21(i0,i1,i2)
+        o26=AOI22(i0,i1,i2,i3)
+        o27=OAI22(i0,i1,i2,i3)
+        o28=AO211(i0,i1,i2,i3)
+        o29=OA211(i0,i1,i2,i3)
+        o30=AOI211(i0,i1,i2,i3)
+        o31=OAI211(i0,i1,i2,i3)
+        o32=MUX21(i0,i1,i2)
+    ''')
+    s = LogicSim(c, 16, m=2)
+    bpa = logic.bparray([f'{i:04b}'+('-'*(s.s_len-4)) for i in range(16)])
+    s.s[0] = bpa
+    s.s_to_c()
+    s.c_prop()
+    s.c_to_s()
+    mva = logic.bp_to_mv(s.s[1])
+    for res, exp in zip(logic.packbits(mva[4:], dtype=np.uint32), [
+            sim.BUF1, sim.INV1,
+            sim.AND2, sim.AND3, sim.AND4,
+            sim.NAND2, sim.NAND3, sim.NAND4,
+            sim.OR2, sim.OR3, sim.OR4,
+            sim.NOR2, sim.NOR3, sim.NOR4,
+            sim.XOR2, sim.XOR3, sim.XOR4,
+            sim.XNOR2, sim.XNOR3, sim.XNOR4,
+            sim.AO21, sim.OA21,
+            sim.AO22, sim.OA22,
+            sim.AOI21, sim.OAI21,
+            sim.AOI22, sim.OAI22,
+            sim.AO211, sim.OA211,
+            sim.AOI211, sim.OAI211,
+            sim.MUX21
+        ]):
+        assert res == exp, f'Mismatch for SimPrim {sim.names[exp]} res={bin(res)} exp={bin(exp)}'
 
 
 def test_4v():
     c = bench.parse('input(x, y) output(a, o, n) a=and(x,y) o=or(x,y) n=not(x)')
-    s = LogicSim(c, 16, m=4)
-    assert len(s.interface) == 5
-    mva = MVArray(['00000', '01000', '0-000', '0X000',
-                   '10000', '11000', '1-000', '1X000',
-                   '-0000', '-1000', '--000', '-X000',
-                   'X0000', 'X1000', 'X-000', 'XX000'], m=4)
-    bpa = BPArray(mva)
-    s.assign(bpa)
-    s.propagate()
-    s.capture(bpa)
-    mva = MVArray(bpa)
-    assert mva[0] == '00001'
-    assert mva[1] == '01011'
-    assert mva[2] == '0-0X1'
-    assert mva[3] == '0X0X1'
-    assert mva[4] == '10010'
-    assert mva[5] == '11110'
-    assert mva[6] == '1-X10'
-    assert mva[7] == '1XX10'
-    assert mva[8] == '-00XX'
-    assert mva[9] == '-1X1X'
-    assert mva[10] == '--XXX'
-    assert mva[11] == '-XXXX'
-    assert mva[12] == 'X00XX'
-    assert mva[13] == 'X1X1X'
-    assert mva[14] == 'X-XXX'
-    assert mva[15] == 'XXXXX'
+    s = LogicSim(c, 16, m=8)  # FIXME: m=4
+    assert s.s_len == 5
+    bpa = bparray(
+        '00---', '01---', '0----', '0X---',
+        '10---', '11---', '1----', '1X---',
+        '-0---', '-1---', '-----', '-X---',
+        'X0---', 'X1---', 'X----', 'XX---')
+    s.s[0] = bpa
+    s.s_to_c()
+    s.c_prop()
+    s.c_to_s()
+    mva = bp_to_mv(s.s[1])
+    assert_equal_shape_and_contents(mva, mvarray(
+        '--001', '--011', '--0X1', '--0X1',
+        '--010', '--110', '--X10', '--X10',
+        '--0XX', '--X1X', '--XXX', '--XXX',
+        '--0XX', '--X1X', '--XXX', '--XXX'))
 
 
 def test_8v():
     c = bench.parse('input(x, y) output(a, o, n, xo) a=and(x,y) o=or(x,y) n=not(x) xo=xor(x,y)')
     s = LogicSim(c, 64, m=8)
-    assert len(s.interface) == 6
-    mva = MVArray(['000010', '010111', '0-0X1X', '0X0X1X', '0R0R1R', '0F0F1F', '0P0P1P', '0N0N1N',
-                   '100101', '111100', '1-X10X', '1XX10X', '1RR10F', '1FF10R', '1PP10N', '1NN10P',
-                   '-00XXX', '-1X1XX', '--XXXX', '-XXXXX', '-RXXXX', '-FXXXX', '-PXXXX', '-NXXXX',
-                   'X00XXX', 'X1X1XX', 'X-XXXX', 'XXXXXX', 'XRXXXX', 'XFXXXX', 'XPXXXX', 'XNXXXX',
-                   'R00RFR', 'R1R1FF', 'R-XXFX', 'RXXXFX', 'RRRRFP', 'RFPNFN', 'RPPRFR', 'RNRNFF',
-                   'F00FRF', 'F1F1RR', 'F-XXRX', 'FXXXRX', 'FRPNRN', 'FFFFRP', 'FPPFRF', 'FNFNRR',
-                   'P00PNP', 'P1P1NN', 'P-XXNX', 'PXXXNX', 'PRPRNR', 'PFPFNF', 'PPPPNP', 'PNPNNN',
-                   'N00NPN', 'N1N1PP', 'N-XXPX', 'NXXXPX', 'NRRNPF', 'NFFNPR', 'NPPNPN', 'NNNNPP'], m=8)
-    bpa = BPArray(mva)
-    s.assign(bpa)
-    s.propagate()
-    resp_bp = BPArray(bpa)
-    s.capture(resp_bp)
-    resp = MVArray(resp_bp)
-
-    for i in range(64):
-        assert resp[i] == mva[i]
+    assert s.s_len == 6
+    mva = mvarray(
+        '000010', '010111', '0-0X1X', '0X0X1X', '0R0R1R', '0F0F1F', '0P0P1P', '0N0N1N',
+        '100101', '111100', '1-X10X', '1XX10X', '1RR10F', '1FF10R', '1PP10N', '1NN10P',
+        '-00XXX', '-1X1XX', '--XXXX', '-XXXXX', '-RXXXX', '-FXXXX', '-PXXXX', '-NXXXX',
+        'X00XXX', 'X1X1XX', 'X-XXXX', 'XXXXXX', 'XRXXXX', 'XFXXXX', 'XPXXXX', 'XNXXXX',
+        'R00RFR', 'R1R1FF', 'R-XXFX', 'RXXXFX', 'RRRRFP', 'RFPNFN', 'RPPRFR', 'RNRNFF',
+        'F00FRF', 'F1F1RR', 'F-XXRX', 'FXXXRX', 'FRPNRN', 'FFFFRP', 'FPPFRF', 'FNFNRR',
+        'P00PNP', 'P1P1NN', 'P-XXNX', 'PXXXNX', 'PRPRNR', 'PFPFNF', 'PPPPNP', 'PNPNNN',
+        'N00NPN', 'N1N1PP', 'N-XXPX', 'NXXXPX', 'NRRNPF', 'NFFNPR', 'NPPNPN', 'NNNNPP')
+    tests = np.copy(mva)
+    tests[2:] = logic.UNASSIGNED
+    bpa = mv_to_bp(tests)
+    s.s[0] = bpa
+    s.s_to_c()
+    s.c_prop()
+    s.c_to_s()
+    resp = bp_to_mv(s.s[1])
+
+    exp_resp = np.copy(mva)
+    exp_resp[:2] = logic.UNASSIGNED
+    np.testing.assert_allclose(resp, exp_resp)
 
 
 def test_loop():
     c = bench.parse('q=dff(d) d=not(q)')
     s = LogicSim(c, 4, m=8)
-    assert len(s.interface) == 1
-    mva = MVArray([['0'], ['1'], ['R'], ['F']], m=8)
+    assert s.s_len == 1
+    mva = mvarray([['0'], ['1'], ['R'], ['F']])
 
-    s.assign(BPArray(mva))
-    s.propagate()
-    resp_bp = BPArray((len(s.interface), s.sims))
-    s.capture(resp_bp)
-    resp = MVArray(resp_bp)
+    # TODO
+    # s.assign(BPArray(mva))
+    # s.propagate()
+    # resp_bp = BPArray((len(s.interface), s.sims))
+    # s.capture(resp_bp)
+    # resp = MVArray(resp_bp)
 
-    assert resp[0] == '1'
-    assert resp[1] == '0'
-    assert resp[2] == 'F'
-    assert resp[3] == 'R'
+    # assert resp[0] == '1'
+    # assert resp[1] == '0'
+    # assert resp[2] == 'F'
+    # assert resp[3] == 'R'
 
-    resp_bp = s.cycle(resp_bp)
-    resp = MVArray(resp_bp)
+    # resp_bp = s.cycle(resp_bp)
+    # resp = MVArray(resp_bp)
 
-    assert resp[0] == '0'
-    assert resp[1] == '1'
-    assert resp[2] == 'R'
-    assert resp[3] == 'F'
+    # assert resp[0] == '0'
+    # assert resp[1] == '1'
+    # assert resp[2] == 'R'
+    # assert resp[3] == 'F'
 
 
 def test_latch():
     c = bench.parse('input(d, t) output(q) q=latch(d, t)')
     s = LogicSim(c, 8, m=8)
-    assert len(s.interface) == 4
-    mva = MVArray(['00-0', '00-1', '01-0', '01-1', '10-0', '10-1', '11-0', '11-1'], m=8)
-    exp = MVArray(['0000', '0011', '0100', '0100', '1000', '1011', '1111', '1111'], m=8)
+    assert s.s_len == 4
+    mva = mvarray('00-0', '00-1', '01-0', '01-1', '10-0', '10-1', '11-0', '11-1')
+    exp = mvarray('0000', '0011', '0100', '0100', '1000', '1011', '1111', '1111')
 
-    resp = MVArray(s.cycle(BPArray(mva)))
+    # TODO
+    # resp = MVArray(s.cycle(BPArray(mva)))
 
-    for i in range(len(mva)):
-        assert resp[i] == exp[i]
+    # for i in range(len(mva)):
+    #     assert resp[i] == exp[i]
 
 
 def test_b01(mydir):
     c = bench.load(mydir / 'b01.bench')
 
-    # 2-valued
-    s = LogicSim(c, 8, m=2)
-    assert len(s.interface) == 9
-    mva = MVArray((len(s.interface), 8), m=2)
-    # mva.randomize()
-    bpa = BPArray(mva)
-    s.assign(bpa)
-    s.propagate()
-    s.capture(bpa)
-
     # 8-valued
     s = LogicSim(c, 8, m=8)
-    mva = MVArray((len(s.interface), 8), m=8)
-    # mva.randomize()
-    bpa = BPArray(mva)
-    s.assign(bpa)
-    s.propagate()
-    s.capture(bpa)
+    mva = np.zeros((s.s_len, 8), dtype=np.uint8)
+    s.s[0] = mv_to_bp(mva)
+    s.s_to_c()
+    s.c_prop()
+    s.c_to_s()
+    bp_to_mv(s.s[1])
diff --git a/tests/test_sdf.py b/tests/test_sdf.py
index b09469e..e94285b 100644
--- a/tests/test_sdf.py
+++ b/tests/test_sdf.py
@@ -1,5 +1,8 @@
-from kyupy import sdf, verilog
+import numpy as np
 
+from kyupy import sdf, verilog, bench
+from kyupy.wave_sim import WaveSim, TMAX, TMIN
+from kyupy.techlib import SAED32, SAED90
 
 def test_parse():
     test = '''
@@ -16,71 +19,70 @@ def test_parse():
     (TEMPERATURE 25.00:25.00:25.00)
     (TIMESCALE 1ns)
     (CELL
-      (CELLTYPE "b14")
-      (INSTANCE)
-      (DELAY
-        (ABSOLUTE
-        (INTERCONNECT U621/ZN U19246/IN1 (0.000:0.000:0.000))
-        (INTERCONNECT U13292/QN U19246/IN2 (0.001:0.001:0.001))
-        (INTERCONNECT U15050/QN U19247/IN1 (0.000:0.000:0.000))
-        (INTERCONNECT U13293/QN U19247/IN2 (0.000:0.000:0.000) (0.000:0.000:0.000))
+        (CELLTYPE "b14")
+        (INSTANCE)
+        (DELAY
+            (ABSOLUTE
+                (INTERCONNECT U621/ZN U19246/IN1 (0.000:0.000:0.000))
+                (INTERCONNECT U13292/QN U19246/IN2 (0.001:0.001:0.001))
+                (INTERCONNECT U15050/QN U19247/IN1 (0.000:0.000:0.000))
+                (INTERCONNECT U13293/QN U19247/IN2 (0.000:0.000:0.000) (0.000:0.000:0.000))
+            )
         )
-      )
     )
     (CELL
-      (CELLTYPE "INVX2")
-      (INSTANCE U78)
-      (DELAY
-        (ABSOLUTE
-        (IOPATH INP ZN (0.201:0.227:0.227) (0.250:0.271:0.271))
+        (CELLTYPE "INVX2")
+        (INSTANCE U78)
+        (DELAY
+            (ABSOLUTE
+                (IOPATH INP ZN (0.201:0.227:0.227) (0.250:0.271:0.271))
+            )
         )
-      )
     )
     (CELL
-      (CELLTYPE "SDFFARX1")
-      (INSTANCE reg3_reg_1_0)
-      (DELAY
-        (ABSOLUTE
-        (IOPATH (posedge CLK) Q (0.707:0.710:0.710) (0.737:0.740:0.740))
-        (IOPATH (negedge RSTB) Q () (0.909:0.948:0.948))
-        (IOPATH (posedge CLK) QN (0.585:0.589:0.589) (0.545:0.550:0.550))
-        (IOPATH (negedge RSTB) QN (1.546:1.593:1.593) ())
+        (CELLTYPE "SDFFARX1")
+        (INSTANCE reg3_reg_1_0)
+        (DELAY
+            (ABSOLUTE
+                (IOPATH (posedge CLK) Q (0.707:0.710:0.710) (0.737:0.740:0.740))
+                (IOPATH (negedge RSTB) Q () (0.909:0.948:0.948))
+                (IOPATH (posedge CLK) QN (0.585:0.589:0.589) (0.545:0.550:0.550))
+                (IOPATH (negedge RSTB) QN (1.546:1.593:1.593) ())
+            )
         )
-      )
-      (TIMINGCHECK
-        (WIDTH (posedge CLK) (0.284:0.284:0.284))
-        (WIDTH (negedge CLK) (0.642:0.642:0.642))
-        (SETUP (posedge D) (posedge CLK) (0.544:0.553:0.553))
-        (SETUP (negedge D) (posedge CLK) (0.620:0.643:0.643))
-        (HOLD (posedge D) (posedge CLK) (-0.321:-0.331:-0.331))
-        (HOLD (negedge D) (posedge CLK) (-0.196:-0.219:-0.219))
-        (RECOVERY (posedge RSTB) (posedge CLK) (-1.390:-1.455:-1.455))
-        (HOLD (posedge RSTB) (posedge CLK) (1.448:1.509:1.509))
-        (SETUP (posedge SE) (posedge CLK) (0.662:0.670:0.670))
-        (SETUP (negedge SE) (posedge CLK) (0.698:0.702:0.702))
-        (HOLD (posedge SE) (posedge CLK) (-0.435:-0.444:-0.444))
-        (HOLD (negedge SE) (posedge CLK) (-0.291:-0.295:-0.295))
-        (SETUP (posedge SI) (posedge CLK) (0.544:0.544:0.544))
-        (SETUP (negedge SI) (posedge CLK) (0.634:0.688:0.688))
-        (HOLD (posedge SI) (posedge CLK) (-0.317:-0.318:-0.318))
-        (HOLD (negedge SI) (posedge CLK) (-0.198:-0.247:-0.247))
-        (WIDTH (negedge RSTB) (0.345:0.345:0.345))
+        (TIMINGCHECK
+            (WIDTH (posedge CLK) (0.284:0.284:0.284))
+            (WIDTH (negedge CLK) (0.642:0.642:0.642))
+            (SETUP (posedge D) (posedge CLK) (0.544:0.553:0.553))
+            (SETUP (negedge D) (posedge CLK) (0.620:0.643:0.643))
+            (HOLD (posedge D) (posedge CLK) (-0.321:-0.331:-0.331))
+            (HOLD (negedge D) (posedge CLK) (-0.196:-0.219:-0.219))
+            (RECOVERY (posedge RSTB) (posedge CLK) (-1.390:-1.455:-1.455))
+            (HOLD (posedge RSTB) (posedge CLK) (1.448:1.509:1.509))
+            (SETUP (posedge SE) (posedge CLK) (0.662:0.670:0.670))
+            (SETUP (negedge SE) (posedge CLK) (0.698:0.702:0.702))
+            (HOLD (posedge SE) (posedge CLK) (-0.435:-0.444:-0.444))
+            (HOLD (negedge SE) (posedge CLK) (-0.291:-0.295:-0.295))
+            (SETUP (posedge SI) (posedge CLK) (0.544:0.544:0.544))
+            (SETUP (negedge SI) (posedge CLK) (0.634:0.688:0.688))
+            (HOLD (posedge SI) (posedge CLK) (-0.317:-0.318:-0.318))
+            (HOLD (negedge SI) (posedge CLK) (-0.198:-0.247:-0.247))
+            (WIDTH (negedge RSTB) (0.345:0.345:0.345))
     )))
     '''
     df = sdf.parse(test)
     assert df.name == 'test'
-    # print(f'DelayFile(name={df.name}, interconnects={len(df.interconnects)}, iopaths={len(df.iopaths)})')
 
 
-def test_b14(mydir):
-    df = sdf.load(mydir / 'b14.sdf.gz')
-    assert df.name == 'b14'
+def test_b15(mydir):
+    df = sdf.load(mydir / 'b15_2ig.sdf.gz')
+    assert df.name == 'b15'
 
 
 def test_gates(mydir):
-    c = verilog.load(mydir / 'gates.v')
+    c = verilog.load(mydir / 'gates.v', tlib=SAED90)
     df = sdf.load(mydir / 'gates.sdf')
-    lt = df.annotation(c, dataset=1)
+    lt = df.iopaths(c, tlib=SAED90)[1]
     nand_a = c.cells['nandgate'].ins[0]
     nand_b = c.cells['nandgate'].ins[1]
     and_a = c.cells['andgate'].ins[0]
@@ -97,3 +99,133 @@ def test_gates(mydir):
 
     assert lt[and_b, 0, 0] == 0.375
     assert lt[and_b, 0, 1] == 0.370
+
+
+def test_nand_xor():
+    c = bench.parse("""
+        input(A1,A2)
+        output(lt_1237_U91,lt_1237_U92)
+        lt_1237_U91 = NAND2X0_RVT(A1,A2)
+        lt_1237_U92 = XOR2X1_RVT(A1,A2)
+        """)
+    df = sdf.parse("""
+        (DELAYFILE
+            (CELL
+                (CELLTYPE "NAND2X0_RVT")
+                (INSTANCE lt_1237_U91)
+                (DELAY
+                    (ABSOLUTE
+                        (IOPATH A1 Y (0.018:0.022:0.021) (0.017:0.019:0.019))
+                        (IOPATH A2 Y (0.021:0.024:0.024) (0.018:0.021:0.021))
+                    )
+                )
+            )
+            (CELL
+                (CELLTYPE "XOR2X1_RVT")
+                (INSTANCE lt_1237_U92)
+                (DELAY
+                    (ABSOLUTE
+                        (IOPATH (posedge A1) Y (0.035:0.038:0.038) (0.037:0.062:0.062))
+                        (IOPATH (negedge A1) Y (0.035:0.061:0.061) (0.036:0.040:0.040))
+                        (IOPATH (posedge A2) Y (0.042:0.043:0.043) (0.051:0.064:0.064))
+                        (IOPATH (negedge A2) Y (0.041:0.066:0.066) (0.051:0.053:0.053))
+                    )
+                )
+            )
+        )
+        """)
+    d = df.iopaths(c, tlib=SAED32)[1]
+    c.resolve_tlib_cells(SAED32)
+    sim = WaveSim(c, delays=d, sims=16)
+
+    # input A1
+    sim.s[0,0] = [0,1,0,1] * 4  # initial values  0101010101010101
+    sim.s[1,0] = 0.0            # transition time
+    sim.s[2,0] = [0,0,1,1] * 4  # final values    0011001100110011
+
+    # input A2
+    sim.s[0,1] = ([0]*4 + [1]*4)*2  # initial values  0000111100001111
+    sim.s[1,1] = 0.0                # transition time
+    sim.s[2,1] = [0]*8 + [1]*8      # final values    0000000011111111
+
+    # A1:   0FR10FR10FR10FR1
+    # A2:   0000FFFFRRRR1111
+    # nand: 11111RNR1NFF1RF0
+    # xor:  0FR1FPPRRNPF1RF0
+
+    sim.s_to_c()
+    sim.c_prop()
+    sim.c_to_s()
+
+    eat = sim.s[4,2:]
+    lst = sim.s[5,2:]
+
+    # NAND-gate output
+    assert np.allclose(eat[0], [
+        TMAX, TMAX, TMAX, TMAX, TMAX,
+        0.022,  # FF -> rising Y: min(0.022, 0.024)
+        TMAX,   # RF: pulse filtered
+        0.024,  # falling A2 -> rising Y
+        TMAX,
+        TMAX,   # FR: pulse filtered
+        0.021,  # RR -> falling Y: max(0.019, 0.021)
+        0.021,  # rising A2 -> falling Y
+        TMAX,
+        0.022,  # falling A1 -> rising Y
+        0.019,  # rising A1 -> falling Y
+        TMAX
+    ])
+
+    assert np.allclose(lst[0], [
+        TMIN, TMIN, TMIN, TMIN, TMIN,
+        0.022,  # FF -> rising Y: min(0.022, 0.024)
+        TMIN,   # RF: pulse filtered
+        0.024,  # falling A2 -> rising Y
+        TMIN,
+        TMIN,   # FR: pulse filtered
+        0.021,  # RR -> falling Y: max(0.019, 0.021)
+        0.021,  # rising A2 -> falling Y
+        TMIN,
+        0.022,  # falling A1 -> rising Y
+        0.019,  # rising A1 -> falling Y
+        TMIN
+    ])
+
+    #XOR-gate output
+    assert np.allclose(eat[1], [
+        TMAX,
+        0.040,  # A1:F -> Y:F
+        0.038,  # A1:R -> Y:R
+        TMAX,
+        0.053,  # A2:F -> Y:F
+        TMAX,   # P filtered
+        TMAX,   # P filtered
+        0.066,  # A2:F -> Y:R
+        0.043,  # A2:R -> Y:R
+        TMAX,   # N filtered
+        TMAX,   # P filtered
+        0.064,  # A2:R -> Y:F
+        TMAX,
+        0.061,  # A1:F -> Y:R
+        0.062,  # A1:R -> Y:F
+        TMAX,
+    ])
+
+    assert np.allclose(lst[1], [
+        TMIN,
+        0.040,  # A1:F -> Y:F
+        0.038,  # A1:R -> Y:R
+        TMIN,
+        0.053,  # A2:F -> Y:F
+        TMIN,   # P filtered
+        TMIN,   # P filtered
+        0.066,  # A2:F -> Y:R
+        0.043,  # A2:R -> Y:R
+        TMIN,   # N filtered
+        TMIN,   # P filtered
+        0.064,  # A2:R -> Y:F
+        TMIN,
+        0.061,  # A1:F -> Y:R
+        0.062,  # A1:R -> Y:F
+        TMIN,
+    ])
\ No newline at end of file
diff --git a/tests/test_stil.py b/tests/test_stil.py
index 3bb0182..f179493 100644
--- a/tests/test_stil.py
+++ b/tests/test_stil.py
@@ -1,21 +1,21 @@
 from kyupy import stil, verilog
+from kyupy.techlib import SAED32
 
+def test_b15(mydir):
+    b15 = verilog.load(mydir / 'b15_2ig.v.gz', tlib=SAED32)
 
-def test_b14(mydir):
-    b14 = verilog.load(mydir / 'b14.v.gz')
-    
-    s = stil.load(mydir / 'b14.stuck.stil.gz')
+    s = stil.load(mydir / 'b15_2ig.sa_nf.stil.gz')
     assert len(s.signal_groups) == 10
     assert len(s.scan_chains) == 1
-    assert len(s.calls) == 2163
-    tests = s.tests(b14)
-    resp = s.responses(b14)
+    assert len(s.calls) == 1357
+    tests = s.tests(b15)
+    resp = s.responses(b15)
     assert len(tests) > 0
     assert len(resp) > 0
-    
-    s2 = stil.load(mydir / 'b14.transition.stil.gz')
-    tests = s2.tests_loc(b14)
-    resp = s2.responses(b14)
+
+    s2 = stil.load(mydir / 'b15_2ig.tf_nf.stil.gz')
+    tests = s2.tests_loc(b15)
+    resp = s2.responses(b15)
     assert len(tests) > 0
     assert len(resp) > 0
 
diff --git a/tests/test_verilog.py b/tests/test_verilog.py
index 366032a..87bbe73 100644
--- a/tests/test_verilog.py
+++ b/tests/test_verilog.py
@@ -1,8 +1,45 @@
 from kyupy import verilog
-
+from kyupy.techlib import SAED90, SAED32
 
 def test_b01(mydir):
     with open(mydir / 'b01.v', 'r') as f:
-        modules = verilog.parse(f.read())
-    assert modules is not None
-    assert verilog.load(mydir / 'b01.v') is not None
+        c = verilog.parse(f.read(), tlib=SAED90)
+    assert c is not None
+    assert verilog.load(mydir / 'b01.v', tlib=SAED90) is not None
+
+    assert len(c.nodes) == 139
+    assert len(c.lines) == 203
+    stats = c.stats
+    assert stats['input'] == 6
+    assert stats['output'] == 3
+    assert stats['__seq__'] == 5
+
+
+def test_b15(mydir):
+    c = verilog.load(mydir / 'b15_4ig.v.gz', tlib=SAED32)
+    assert len(c.nodes) == 12067
+    assert len(c.lines) == 20731
+    stats = c.stats
+    assert stats['input'] == 40
+    assert stats['output'] == 71
+    assert stats['__seq__'] == 417
+
+
+def test_gates(mydir):
+    c = verilog.load(mydir / 'gates.v', tlib=SAED90)
+    assert len(c.nodes) == 10
+    assert len(c.lines) == 10
+    stats = c.stats
+    assert stats['input'] == 2
+    assert stats['output'] == 2
+    assert stats['__seq__'] == 0
+
+
+def test_halton2(mydir):
+    c = verilog.load(mydir / 'rng_haltonBase2.synth_yosys.v', tlib=SAED90)
+    assert len(c.nodes) == 146
+    assert len(c.lines) == 210
+    stats = c.stats
+    assert stats['input'] == 2
+    assert stats['output'] == 12
+    assert stats['__seq__'] == 12
\ No newline at end of file
diff --git a/tests/test_wave_sim.py b/tests/test_wave_sim.py
index 724a415..9a09b32 100644
--- a/tests/test_wave_sim.py
+++ b/tests/test_wave_sim.py
@@ -1,150 +1,168 @@
 import numpy as np
 
-from kyupy.wave_sim import WaveSim, WaveSimCuda, wave_eval, TMIN, TMAX
+from kyupy.wave_sim import WaveSim, WaveSimCuda, wave_eval_cpu, TMIN, TMAX
 from kyupy.logic_sim import LogicSim
-from kyupy import verilog, sdf, logic
-from kyupy.logic import MVArray, BPArray
+from kyupy import logic, bench, sim
+from kyupy.logic import mvarray
 
+def test_nand_delays():
+    op = (sim.NAND4, 4, 0, 1, 2, 3, -1, 0, 0)
+    #op = (0b0111, 4, 0, 1)
+    c = np.full((5*16, 1), TMAX)  # 5 waveforms of capacity 16
+    c_locs = np.zeros((5,), dtype='int')
+    c_caps = np.zeros((5,), dtype='int')
+
+    for i in range(5): c_locs[i], c_caps[i] = i*16, 16  # 1:1 mapping
 
-def test_wave_eval():
     # SDF specifies IOPATH delays with respect to output polarity
     # SDF pulse rejection value is determined by IOPATH causing last transition and polarity of last transition
-    line_times = np.zeros((3, 2, 2))
-    line_times[0, 0, 0] = 0.1  # A -> Z rise delay
-    line_times[0, 0, 1] = 0.2  # A -> Z fall delay
-    line_times[0, 1, 0] = 0.1  # A -> Z negative pulse limit (terminate in rising Z)
-    line_times[0, 1, 1] = 0.2  # A -> Z positive pulse limit
-    line_times[1, 0, 0] = 0.3  # as above for B -> Z
-    line_times[1, 0, 1] = 0.4
-    line_times[1, 1, 0] = 0.3
-    line_times[1, 1, 1] = 0.4
-
-    state = np.zeros((3*16, 1)) + TMAX  # 3 waveforms of capacity 16
-    state[::16, 0] = 16  # first entry is capacity
-    a = state[0:16, 0]
-    b = state[16:32, 0]
-    z = state[32:, 0]
-    sat = np.zeros((3, 3), dtype='int')
-    sat[0] = 0, 16, 0
-    sat[1] = 16, 16, 0
-    sat[2] = 32, 16, 0
-
-    sdata = np.asarray([1, -1, 0, 0], dtype='float32')
-
-    wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times, sdata)
-    assert z[0] == TMIN
-
-    a[0] = TMIN
-    wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times, sdata)
-    assert z[0] == TMIN
-
-    b[0] = TMIN
-    wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times, sdata)
-    assert z[0] == TMAX
-
-    a[0] = 1  # A _/^^^
-    b[0] = 2  # B __/^^
-    wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times, sdata)
-    assert z[0] == TMIN  # ^^^\___ B -> Z fall delay
-    assert z[1] == 2.4
-    assert z[2] == TMAX
-
-    a[0] = TMIN  # A ^^^^^^
-    b[0] = TMIN  # B ^^^\__
-    b[1] = 2
-    wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times, sdata)
-    assert z[0] == 2.3  # ___/^^^ B -> Z rise delay
-    assert z[1] == TMAX
-
-    # pos pulse of 0.35 at B -> 0.45 after delays
-    a[0] = TMIN  # A ^^^^^^^^
-    b[0] = TMIN
-    b[1] = 2     # B ^^\__/^^
-    b[2] = 2.35
-    wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times, sdata)
-    assert z[0] == 2.3  # __/^^\__
-    assert z[1] == 2.75
-    assert z[2] == TMAX
-
-    # neg pulse of 0.45 at B -> 0.35 after delays
-    a[0] = TMIN  # A ^^^^^^^^
-    b[0] = 2  # B __/^^\__
-    b[1] = 2.45
-    b[2] = TMAX
-    wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times, sdata)
-    assert z[0] == TMIN  # ^^\__/^^
-    assert z[1] == 2.4
-    assert z[2] == 2.75
-    assert z[3] == TMAX
-
-    # neg pulse of 0.35 at B -> 0.25 after delays (filtered)
-    a[0] = TMIN  # A ^^^^^^^^
-    b[0] = 2  # B __/^^\__
-    b[1] = 2.35
-    b[2] = TMAX
-    wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times, sdata)
-    assert z[0] == TMIN  # ^^^^^^
-    assert z[1] == TMAX
-
-    # pos pulse of 0.25 at B -> 0.35 after delays (filtered)
-    a[0] = TMIN  # A ^^^^^^^^
-    b[0] = TMIN
-    b[1] = 2  # B ^^\__/^^
-    b[2] = 2.25
-    wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times, sdata)
-    assert z[0] == TMAX  # ______
-
-
-def compare_to_logic_sim(wsim):
-    tests = MVArray((len(wsim.interface), wsim.sims))
+    delays = np.zeros((1, 5, 2, 2))
+    delays[0, 0, 0, 0] = 0.1  # A -> Z rise delay
+    delays[0, 0, 0, 1] = 0.2  # A -> Z fall delay
+    delays[0, 0, 1, 0] = 0.1  # A -> Z negative pulse limit (terminate in rising Z)
+    delays[0, 0, 1, 1] = 0.2  # A -> Z positive pulse limit
+    delays[0, 1, :, 0] = 0.3  # as above for B -> Z
+    delays[0, 1, :, 1] = 0.4
+    delays[0, 2, :, 0] = 0.5  # as above for C -> Z
+    delays[0, 2, :, 1] = 0.6
+    delays[0, 3, :, 0] = 0.7  # as above for D -> Z
+    delays[0, 3, :, 1] = 0.8
+
+    simctl_int = np.asarray([0], dtype=np.int32)
+
+    def wave_assert(inputs, output):
+        for i, a in zip(inputs, c.reshape(-1,16)): a[:len(i)] = i
+        wave_eval_cpu(op, c, c_locs, c_caps, 0, delays, simctl_int)
+        for i, v in enumerate(output): np.testing.assert_allclose(c.reshape(-1,16)[4,i], v)
+
+    wave_assert([[TMAX,TMAX],[TMAX,TMAX],[TMIN,TMAX],[TMIN,TMAX]], [TMIN,TMAX]) # NAND(0,0,1,1) => 1
+    wave_assert([[TMIN,TMAX],[TMAX,TMAX],[TMIN,TMAX],[TMIN,TMAX]], [TMIN,TMAX]) # NAND(1,0,1,1) => 1
+    wave_assert([[TMIN,TMAX],[TMIN,TMAX],[TMIN,TMAX],[TMIN,TMAX]], [TMAX])      # NAND(1,1,1,1) => 0
+
+    # Keep inputs C=1 and D=1.
+    wave_assert([[1,TMAX],[2,TMAX]], [TMIN,2.4,TMAX])              # _/⎺⎺⎺ NAND __/⎺⎺ => ⎺⎺⎺\___ (B->Z fall delay)
+    wave_assert([[TMIN,TMAX],[TMIN,2,TMAX]],  [2.3,TMAX])          # ⎺⎺⎺⎺⎺ NAND ⎺⎺\__ => ___/⎺⎺⎺ (B->Z rise delay)
+    wave_assert([[TMIN,TMAX],[TMIN,2,2.35,TMAX]], [2.3,2.75,TMAX]) # ⎺⎺⎺⎺⎺ NAND ⎺\_/⎺ => __/⎺⎺\_ (pos pulse, .35@B -> .45@Z)
+    wave_assert([[TMIN,TMAX],[TMIN,2,2.25,TMAX]], [TMAX])          # ⎺⎺⎺⎺⎺ NAND ⎺\_/⎺ => _______ (pos pulse, .25@B -> .35@Z, filtered)
+    wave_assert([[TMIN,TMAX],[2,2.45,TMAX]], [TMIN,2.4,2.75,TMAX]) # ⎺⎺⎺⎺⎺ NAND _/⎺\_ => ⎺⎺\_/⎺⎺ (neg pulse, .45@B -> .35@Z)
+    wave_assert([[TMIN,TMAX],[2,2.35,TMAX]], [TMIN,TMAX])          # ⎺⎺⎺⎺⎺ NAND _/⎺\_ => ⎺⎺⎺⎺⎺⎺⎺ (neg pulse, .35@B -> .25@Z, filtered)
+
+
+def test_tiny_circuit():
+    c = bench.parse('input(x, y) output(a, o, n) a=and(x,y) o=or(x,y) n=not(x)')
+    delays = np.full((1, len(c.lines), 2, 2), 1.0)  # unit delay for all lines
+    wsim = WaveSim(c, delays)
+    assert wsim.s.shape[1] == 5
+
+    # values for x
+    wsim.s[:3,0,0] = 0, 10, 0
+    wsim.s[:3,0,1] = 0, 20, 1
+    wsim.s[:3,0,2] = 1, 30, 0
+    wsim.s[:3,0,3] = 1, 40, 1
+
+    # values for y
+    wsim.s[:3,1,0] = 1, 50, 0
+    wsim.s[:3,1,1] = 1, 60, 0
+    wsim.s[:3,1,2] = 1, 70, 0
+    wsim.s[:3,1,3] = 0, 80, 1
+
+    wsim.s_to_c()
+
+    x_c_loc = wsim.c_locs[wsim.ppi_offset+0] # check x waveforms
+    np.testing.assert_allclose(wsim.c[x_c_loc:x_c_loc+3, 0], [TMAX, TMAX, TMAX])
+    np.testing.assert_allclose(wsim.c[x_c_loc:x_c_loc+3, 1], [20, TMAX, TMAX])
+    np.testing.assert_allclose(wsim.c[x_c_loc:x_c_loc+3, 2], [TMIN, 30, TMAX])
+    np.testing.assert_allclose(wsim.c[x_c_loc:x_c_loc+3, 3], [TMIN, TMAX, TMAX])
+
+    y_c_loc = wsim.c_locs[wsim.ppi_offset+1] # check y waveforms
+    np.testing.assert_allclose(wsim.c[y_c_loc:y_c_loc+3, 0], [TMIN, 50, TMAX])
+    np.testing.assert_allclose(wsim.c[y_c_loc:y_c_loc+3, 1], [TMIN, 60, TMAX])
+    np.testing.assert_allclose(wsim.c[y_c_loc:y_c_loc+3, 2], [TMIN, 70, TMAX])
+    np.testing.assert_allclose(wsim.c[y_c_loc:y_c_loc+3, 3], [80, TMAX, TMAX])
+
+    wsim.c_prop()
+
+    a_c_loc = wsim.c_locs[wsim.ppo_offset+2] # check a waveforms
+    np.testing.assert_allclose(wsim.c[a_c_loc:a_c_loc+3, 0], [TMAX, TMAX, TMAX])
+    np.testing.assert_allclose(wsim.c[a_c_loc:a_c_loc+3, 1], [21, 61, TMAX])
+    np.testing.assert_allclose(wsim.c[a_c_loc:a_c_loc+3, 2], [TMIN, 31, TMAX])
+    np.testing.assert_allclose(wsim.c[a_c_loc:a_c_loc+3, 3], [81, TMAX, TMAX])
+
+    o_c_loc = wsim.c_locs[wsim.ppo_offset+3] # check o waveforms
+    np.testing.assert_allclose(wsim.c[o_c_loc:o_c_loc+3, 0], [TMIN, 51, TMAX])
+    np.testing.assert_allclose(wsim.c[o_c_loc:o_c_loc+3, 1], [TMIN, TMAX, TMAX])
+    np.testing.assert_allclose(wsim.c[o_c_loc:o_c_loc+3, 2], [TMIN, 71, TMAX])
+    np.testing.assert_allclose(wsim.c[o_c_loc:o_c_loc+3, 3], [TMIN, TMAX, TMAX])
+
+    n_c_loc = wsim.c_locs[wsim.ppo_offset+4] # check n waveforms
+    np.testing.assert_allclose(wsim.c[n_c_loc:n_c_loc+3, 0], [TMIN, TMAX, TMAX])
+    np.testing.assert_allclose(wsim.c[n_c_loc:n_c_loc+3, 1], [TMIN, 21, TMAX])
+    np.testing.assert_allclose(wsim.c[n_c_loc:n_c_loc+3, 2], [31, TMAX, TMAX])
+    np.testing.assert_allclose(wsim.c[n_c_loc:n_c_loc+3, 3], [TMAX, TMAX, TMAX])
+
+    wsim.c_to_s()
+
+    # check a captures
+    np.testing.assert_allclose(wsim.s[3:7, 2, 0], [0, TMAX, TMIN, 0])
+    np.testing.assert_allclose(wsim.s[3:7, 2, 1], [0, 21, 61, 0])
+    np.testing.assert_allclose(wsim.s[3:7, 2, 2], [1, 31, 31, 0])
+    np.testing.assert_allclose(wsim.s[3:7, 2, 3], [0, 81, 81, 1])
+
+    # check o captures
+    np.testing.assert_allclose(wsim.s[3:7, 3, 0], [1, 51, 51, 0])
+    np.testing.assert_allclose(wsim.s[3:7, 3, 1], [1, TMAX, TMIN, 1])
+    np.testing.assert_allclose(wsim.s[3:7, 3, 2], [1, 71, 71, 0])
+    np.testing.assert_allclose(wsim.s[3:7, 3, 3], [1, TMAX, TMIN, 1])
+
+    # check o captures
+    np.testing.assert_allclose(wsim.s[3:7, 4, 0], [1, TMAX, TMIN, 1])
+    np.testing.assert_allclose(wsim.s[3:7, 4, 1], [1, 21, 21, 0])
+    np.testing.assert_allclose(wsim.s[3:7, 4, 2], [0, 31, 31, 1])
+    np.testing.assert_allclose(wsim.s[3:7, 4, 3], [0, TMAX, TMIN, 0])
+
+
+def compare_to_logic_sim(wsim: WaveSim):
     choices = np.asarray([logic.ZERO, logic.ONE, logic.RISE, logic.FALL], dtype=np.uint8)
     rng = np.random.default_rng(10)
-    tests.data[...] = rng.choice(choices, tests.data.shape)
-    tests_bp = BPArray(tests)
-    wsim.assign(tests_bp)
-    wsim.propagate()
-    cdata = wsim.capture()
-
-    resp = MVArray(tests)
-
-    for iidx, inode in enumerate(wsim.interface):
-        if len(inode.ins) > 0:
-            for vidx in range(wsim.sims):
-                resp.data[iidx, vidx] = logic.ZERO if cdata[iidx, vidx, 0] < 0.5 else logic.ONE
-                # resp.set_value(vidx, iidx, 0 if cdata[iidx, vidx, 0] < 0.5 else 1)
-
-    lsim = LogicSim(wsim.circuit, len(tests_bp))
-    lsim.assign(tests_bp)
-    lsim.propagate()
-    exp_bp = BPArray(tests_bp)
-    lsim.capture(exp_bp)
-    exp = MVArray(exp_bp)
-
-    for i in range(8):
-        exp_str = exp[i].replace('R', '1').replace('F', '0').replace('P', '0').replace('N', '1')
-        res_str = resp[i].replace('R', '1').replace('F', '0').replace('P', '0').replace('N', '1')
-        assert res_str == exp_str
-
-
-def test_b14(mydir):
-    c = verilog.load(mydir / 'b14.v.gz', branchforks=True)
-    df = sdf.load(mydir / 'b14.sdf.gz')
-    lt = df.annotation(c)
-    wsim = WaveSim(c, lt, 8)
-    compare_to_logic_sim(wsim)
-
-
-def test_b14_strip_forks(mydir):
-    c = verilog.load(mydir / 'b14.v.gz', branchforks=True)
-    df = sdf.load(mydir / 'b14.sdf.gz')
-    lt = df.annotation(c)
-    wsim = WaveSim(c, lt, 8, strip_forks=True)
-    compare_to_logic_sim(wsim)
-
-
-def test_b14_cuda(mydir):
-    c = verilog.load(mydir / 'b14.v.gz', branchforks=True)
-    df = sdf.load(mydir / 'b14.sdf.gz')
-    lt = df.annotation(c)
-    wsim = WaveSimCuda(c, lt, 8)
-    compare_to_logic_sim(wsim)
+    tests = rng.choice(choices, (wsim.s_len, wsim.sims))
+
+    wsim.s[0] = (tests & 2) >> 1
+    wsim.s[3] = (tests & 2) >> 1
+    wsim.s[1] = 0.0
+    wsim.s[2] = tests & 1
+    wsim.s[6] = tests & 1
+
+    wsim.s_to_c()
+    wsim.c_prop()
+    wsim.c_to_s()
+
+    resp = np.array(wsim.s[6], dtype=np.uint8) | (np.array(wsim.s[3], dtype=np.uint8)<<1)
+    resp |= ((resp ^ (resp >> 1)) & 1) << 2  # transitions
+    resp[wsim.pi_s_locs] = logic.UNASSIGNED
+
+    lsim = LogicSim(wsim.circuit, tests.shape[-1])
+    lsim.s[0] = logic.mv_to_bp(tests)
+    lsim.s_to_c()
+    lsim.c_prop()
+    lsim.c_to_s()
+    exp = logic.bp_to_mv(lsim.s[1])
+
+    resp[resp == logic.PPULSE] = logic.ZERO
+    resp[resp == logic.NPULSE] = logic.ONE
+
+    exp[exp == logic.PPULSE] = logic.ZERO
+    exp[exp == logic.NPULSE] = logic.ONE
+
+    np.testing.assert_allclose(resp, exp)
+
+
+def test_b15(b15_2ig_circuit, b15_2ig_delays):
+    compare_to_logic_sim(WaveSim(b15_2ig_circuit, b15_2ig_delays, 8))
+
+
+def test_b15_strip_forks(b15_2ig_circuit, b15_2ig_delays):
+    compare_to_logic_sim(WaveSim(b15_2ig_circuit, b15_2ig_delays, 8, strip_forks=True))
+
+
+def test_b15_cuda(b15_2ig_circuit, b15_2ig_delays):
+    compare_to_logic_sim(WaveSimCuda(b15_2ig_circuit, b15_2ig_delays, 8, strip_forks=True))