diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c563798 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +**/__pycache__ +**/.ipynb_checkpoints +**/.pytest_cache +**/.DS_Store +**/*.pyc diff --git a/README.md b/README.md new file mode 100644 index 0000000..ccf43e7 --- /dev/null +++ b/README.md @@ -0,0 +1,33 @@ +KyuPy - Processing VLSI Circuits With Ease +========================================== + +KyuPy is a python package for high-performance processing and analysis of +non-hierarchical VLSI designs. Its purpose is to provide a rapid prototyping +platform to aid and accelerate research in the fields of VLSI test, diagnosis +and reliability. KyuPy is freely available under the MIT license. + +Main Features +------------- + +* Partial [lark](https://github.com/lark-parser/lark)-parsers for common files used with synthesized designs: bench, gate-level verilog, standard delay format (SDF), standard test interface language (STIL) +* Bit-parallel gate-level 2-, 4-, and 8-valued logic simulation +* GPU-accelerated high-throughput gate-level timing simulation +* High-performance through the use of [numpy](https://numpy.org) and [numba](https://numba.pydata.org) + + +Getting Started +--------------- + +KyuPy requires python 3.6+ and the following packages: +* [lark-parser](https://pypi.org/project/lark-parser) +* [numpy](https://pypi.org/project/numpy) +* [numba](https://pypi.org/project/numba) (required only for GPU/CUDA support) + +GPU/CUDA support may [require some additional setup](https://numba.pydata.org/numba-doc/latest/cuda/index.html). If CUDA or numba is not available, the package will automatically fall back to pure python execution. + +This repository contains tests that can be run with: +``` +pytest +``` + +Usage examples to get familiar with the API can be found in the Jupyter Notebook [UsageExamples.ipynb](UsageExamples.ipynb). diff --git a/UsageExamples.ipynb b/UsageExamples.ipynb new file mode 100644 index 0000000..75fbe06 --- /dev/null +++ b/UsageExamples.ipynb @@ -0,0 +1,645 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Loading and Exploring Gate-Level Circuits" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Example of parsing the bench data format to make simple gate-level circuits." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0000000.334 W Cuda unavailable. Falling back to pure python\n" + ] + } + ], + "source": [ + "from kyupy import bench\n", + "\n", + "# parse a file\n", + "b01 = bench.parse('tests/b01.bench')\n", + "\n", + "# ... or specify the circuit as string \n", + "mycircuit = bench.parse('input(a,b) output(o1,o2,o3) x=buf(a) o1=not(x) o2=buf(x) o3=buf(x)')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Circuits are objects of the class `Circuit`." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "b01" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mycircuit" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Circuits are containers for two types of elements: nodes and lines.\n", + "* A `Node` is a named entity in a circuit (e.g. a gate, a standard cell, a named signal, or a fan-out point) that has connections to other nodes.\n", + "* A `Line` is a directional 1:1 connection between two Nodes.\n", + "\n", + "Use the `dump()` method to get a string representation of all nodes and their connections." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "None(0,1,2,3,4)\n", + "0:__fork__\"a\" >1\n", + "1:__fork__\"b\" \n", + "2:__fork__\"o1\" <2 \n", + "3:__fork__\"o2\" <4 \n", + "4:__fork__\"o3\" <6 \n", + "5:buf\"x\" <1 >0\n", + "6:__fork__\"x\" <0 >3 >5 >7\n", + "7:not\"o1\" <3 >2\n", + "8:buf\"o2\" <5 >4\n", + "9:buf\"o3\" <7 >6\n" + ] + } + ], + "source": [ + "print(mycircuit.dump())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The first line of the dump starts with the circuit name (\"None\" for `mycircuit`), followed by the node-IDs of all the ports (inputs and outputs) of the circuit.\n", + "\n", + "Each of the following lines describes one node.\n", + "Each node in the circuit has a unique ID, a type, a name, and line-connections. This information is given on each line in that order.\n", + "\n", + "A line in the circuit has a unique ID, a driver node and a receiver node. The connections in the dump show the direction (\">\" for output, \"<\" for input) and the line-ID. For example in `mycircuit`: Node-0 has one output connected to Line-1, and this Line-1 is connected to the input of Node-5.\n", + "\n", + "The `interface` is the list of nodes forming the ports (inputs and outputs):" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[0:__fork__\"a\" >1,\n", + " 1:__fork__\"b\" ,\n", + " 2:__fork__\"o1\" <2 ,\n", + " 3:__fork__\"o2\" <4 ,\n", + " 4:__fork__\"o3\" <6 ]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mycircuit.interface" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Nodes\n", + "\n", + "There are two types of nodes: __forks__ and __cells__.\n", + "\n", + "Forks have the special type `__fork__` while cells can be of various types (`buf`, `not`, `and`, `nor`, etc.).\n", + "Forks are used to label signals with names and to connect a one cell to multiple other cells (fan-out).\n", + "The names among all forks and among all cells within a circuit are unique.\n", + "Thus, a fork and a cell are allowed to share the same name.\n", + "\n", + "Nodes in circuits can be accessed by ID or by name." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "7:not\"o1\" <3 >2" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mycircuit.nodes[7]" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "6:__fork__\"x\" <0 >3 >5 >7" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mycircuit.forks['x']" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "5:buf\"x\" <1 >0" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mycircuit.cells['x']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Nodes have an `index` (the node ID), a `kind` (the type), a `name`, as well as `ins` (input pins) and `outs` (output pins)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(6, '__fork__', 'x', [0], [3, 5, 7])" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "n = mycircuit.nodes[6]\n", + "n.index, n.kind, n.name, n.ins, n.outs" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The inputs and outputs of a node are lists containing `Line` objects." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "kyupy.circuit.Line" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(n.ins[0])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Lines\n", + "\n", + "A line is a directional connection between one driving node (`driver`) and one reading node (`reader`).\n", + "\n", + "A line also knows to which node pins it is connected to: `driver_pin`, `reader_pin`." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(5, 6:__fork__\"x\" <0 >3 >5 >7, 8:buf\"o2\" <5 >4, 1, 0)" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "l = mycircuit.nodes[6].outs[1]\n", + "l.index, l.driver, l.reader, l.driver_pin, l.reader_pin" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Basic Analysis Examples\n", + "### Cell type statistics" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "defaultdict(, {'DFF': 5, 'AND': 1, 'NAND': 28, 'OR': 1, 'NOT': 10})\n" + ] + } + ], + "source": [ + "from collections import defaultdict\n", + "\n", + "counts = defaultdict(int)\n", + "\n", + "for n in b01.cells.values():\n", + " counts[n.kind] += 1\n", + "\n", + "print(counts)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Tracing a scan chain" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from kyupy import verilog\n", + "\n", + "b14 = verilog.parse('tests/b14.v.gz')\n", + "b14" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "chain length 229\n", + "['Scan_Out', 'u04_opt1329', 'u04_opt1328', 'wr_reg', 'u04_opt11', 'state_reg_0_0', 'reg3_reg_28_0', 'reg3_reg_27_0', 'reg3_reg_26_0', 'reg3_reg_25_0', 'reg3_reg_24_0', 'u04_opt1123', 'reg3_reg_23_0', 'reg3_reg_22_0', 'reg3_reg_21_0', 'u04_opt1118', 'reg3_reg_20_0', 'reg3_reg_19_0', 'reg3_reg_18_0', 'reg3_reg_17_0', 'reg3_reg_16_0', 'reg3_reg_15_0', 'reg3_reg_14_0', 'reg3_reg_13_0', 'reg3_reg_12_0', 'reg3_reg_11_0', 'reg3_reg_10_0', 'reg3_reg_9_0', 'reg3_reg_8_0', 'reg3_reg_7_0', 'reg3_reg_6_0', 'reg3_reg_5_0', 'reg3_reg_4_0', 'reg3_reg_3_0', 'reg3_reg_2_0', 'reg3_reg_1_0', 'reg3_reg_0_0', 'reg2_reg_31_0', 'reg2_reg_30_0', 'reg2_reg_29_0', 'reg2_reg_28_0', 'reg2_reg_27_0', 'reg2_reg_26_0', 'reg2_reg_25_0', 'reg2_reg_24_0', 'reg2_reg_23_0', 'reg2_reg_22_0', 'reg2_reg_21_0', 'reg2_reg_20_0', 'reg2_reg_19_0', 'reg2_reg_18_0', 'reg2_reg_17_0', 'reg2_reg_16_0', 'reg2_reg_15_0', 'reg2_reg_14_0', 'reg2_reg_13_0', 'reg2_reg_12_0', 'reg2_reg_11_0', 'reg2_reg_10_0', 'reg2_reg_9_0', 'reg2_reg_8_0', 'reg2_reg_7_0', 'reg2_reg_6_0', 'reg2_reg_5_0', 'reg2_reg_4_0', 'reg2_reg_3_0', 'reg2_reg_2_0', 'reg2_reg_1_0', 'reg2_reg_0_0', 'reg1_reg_31_0', 'reg1_reg_30_0', 'reg1_reg_29_0', 'reg1_reg_28_0', 'reg1_reg_27_0', 'reg1_reg_26_0', 'reg1_reg_25_0', 'reg1_reg_24_0', 'reg1_reg_23_0', 'reg1_reg_22_0', 'reg1_reg_21_0', 'reg1_reg_20_0', 'reg1_reg_19_0', 'reg1_reg_18_0', 'reg1_reg_17_0', 'reg1_reg_16_0', 'reg1_reg_15_0', 'reg1_reg_14_0', 'reg1_reg_13_0', 'reg1_reg_12_0', 'reg1_reg_11_0', 'reg1_reg_10_0', 'reg1_reg_9_0', 'reg1_reg_8_0', 'reg1_reg_7_0', 'reg1_reg_6_0', 'reg1_reg_5_0', 'reg1_reg_4_0', 'reg1_reg_3_0', 'reg1_reg_2_0', 'reg1_reg_1_0', 'reg1_reg_0_0', 'reg0_reg_31_0', 'reg0_reg_30_0', 'reg0_reg_29_0', 'reg0_reg_28_0', 'reg0_reg_27_0', 'reg0_reg_26_0', 'reg0_reg_25_0', 'reg0_reg_24_0', 'reg0_reg_23_0', 'reg0_reg_22_0', 'reg0_reg_21_0', 'reg0_reg_20_0', 'reg0_reg_19_0', 'reg0_reg_18_0', 'reg0_reg_17_0', 'reg0_reg_16_0', 'reg0_reg_15_0', 'reg0_reg_14_0', 'reg0_reg_13_0', 'reg0_reg_12_0', 'reg0_reg_11_0', 'reg0_reg_10_0', 'reg0_reg_9_0', 'reg0_reg_8_0', 'reg0_reg_7_0', 'reg0_reg_6_0', 'reg0_reg_5_0', 'reg0_reg_4_0', 'reg0_reg_3_0', 'reg0_reg_2_0', 'reg0_reg_1_0', 'reg0_reg_0_0', 'rd_reg', 'datao_reg_31_0', 'datao_reg_30_0', 'datao_reg_29_0', 'datao_reg_28_0', 'datao_reg_27_0', 'datao_reg_26_0', 'datao_reg_25_0', 'datao_reg_24_0', 'datao_reg_23_0', 'datao_reg_22_0', 'datao_reg_21_0', 'datao_reg_20_0', 'datao_reg_19_0', 'datao_reg_18_0', 'datao_reg_17_0', 'datao_reg_16_0', 'datao_reg_15_0', 'datao_reg_14_0', 'datao_reg_13_0', 'datao_reg_12_0', 'datao_reg_11_0', 'datao_reg_10_0', 'datao_reg_9_0', 'datao_reg_8_0', 'datao_reg_7_0', 'datao_reg_6_0', 'datao_reg_5_0', 'datao_reg_4_0', 'datao_reg_3_0', 'datao_reg_2_0', 'datao_reg_1_0', 'datao_reg_0_0', 'd_reg_1_0', 'd_reg_0_0', 'addr_reg_19_0', 'addr_reg_18_0', 'addr_reg_17_0', 'addr_reg_16_0', 'addr_reg_15_0', 'addr_reg_14_0', 'addr_reg_13_0', 'addr_reg_12_0', 'addr_reg_11_0', 'addr_reg_10_0', 'addr_reg_9_0', 'addr_reg_8_0', 'addr_reg_7_0', 'addr_reg_6_0', 'addr_reg_5_0', 'addr_reg_4_0', 'addr_reg_3_0', 'addr_reg_2_0', 'addr_reg_1_0', 'addr_reg_0_0', 'u04_opt1491', 'u04_opt1492', 'u04_opt1364', 'u04_opt1411', 'IR_reg_31_0', 'IR_reg_30_0', 'IR_reg_29_0', 'u04_opt1427', 'IR_reg_28_0', 'IR_reg_27_0', 'IR_reg_26_0', 'IR_reg_25_0', 'IR_reg_24_0', 'IR_reg_23_0', 'IR_reg_22_0', 'IR_reg_21_0', 'IR_reg_20_0', 'IR_reg_19_0', 'IR_reg_18_0', 'IR_reg_17_0', 'IR_reg_16_0', 'IR_reg_15_0', 'IR_reg_14_0', 'IR_reg_13_0', 'IR_reg_12_0', 'IR_reg_11_0', 'IR_reg_10_0', 'IR_reg_9_0', 'IR_reg_8_0', 'IR_reg_7_0', 'IR_reg_6_0', 'IR_reg_5_0', 'IR_reg_4_0', 'IR_reg_3_0', 'IR_reg_2_0', 'u04_opt1347', 'IR_reg_1_0', 'U14573', 'IR_reg_0_0', 'B_reg', 'Scan_In']\n" + ] + } + ], + "source": [ + "chain = []\n", + "cell = b14.cells['Scan_Out']\n", + "chain.append(cell)\n", + "while len(cell.ins) > 0:\n", + " cell = cell.ins[2 if 'SDFF' in cell.kind else 0].driver\n", + " if '__fork__' not in cell.kind:\n", + " chain.append(cell)\n", + " \n", + "print('chain length', len(chain))\n", + "print([c.name for c in chain])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Loading SDFs and STILs" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "from kyupy import verilog, sdf\n", + "from kyupy.saed import pin_index\n", + "from kyupy import stil\n", + "\n", + "b14 = verilog.parse('tests/b14.v.gz')\n", + "df = sdf.parse('tests/b14.sdf.gz')\n", + "lt = df.annotation(b14, pin_index, interconnect=False)\n", + "s = stil.parse('tests/b14.stil.gz')\n", + "t = s.tests8v(b14)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[[0., 0.],\n", + " [0., 0.]],\n", + "\n", + " [[0., 0.],\n", + " [0., 0.]],\n", + "\n", + " [[0., 0.],\n", + " [0., 0.]],\n", + "\n", + " ...,\n", + "\n", + " [[0., 0.],\n", + " [0., 0.]],\n", + "\n", + " [[0., 0.],\n", + " [0., 0.]],\n", + "\n", + " [[0., 0.],\n", + " [0., 0.]]])" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lt" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'00-RFRF01F10FFRFF1FR1F1RR010F0F1RRR-------F------------------------------------------------11110110011100110111111110111000010000001111010111001111110110010101100100001000101001101010010011010000001111110111101110110001011010100011010001111010011101001000011111011101111101010111001100100011111100000101110'" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "t[0]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 32 Parallel Time Simulations with Waveform Capacity 16\n", + "\n", + "This code will fall back to pure python if no CUDA card is available. This will be quite slow.\n", + "\n", + "Instanciate simulator:" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "from kyupy.wave_sim_cuda import WaveSimCuda, TMAX\n", + "import numpy as np\n", + "\n", + "wsim = WaveSimCuda(b14, lt, sims=32, wavecaps=16)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Main Simulation Loop" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "nvectors = 32 #len(t)\n", + "r = np.zeros((len(wsim.interface), nvectors, 1))\n", + "\n", + "for offset in range(0, nvectors, wsim.sims):\n", + " wsim.assign(t, offset=offset)\n", + " wsim.propagate(sims=nvectors-offset)\n", + " cdata = wsim.capture(time=TMAX, offset=offset)\n", + " r = cdata[...,0]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Output some captures data" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(306, 32, 6)" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cdata.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[0., 0., 0., ..., 0., 0., 0.],\n", + " [0., 0., 0., ..., 0., 0., 0.],\n", + " [0., 0., 0., ..., 0., 0., 0.],\n", + " ...,\n", + " [1., 1., 1., ..., 1., 1., 1.],\n", + " [0., 0., 0., ..., 0., 0., 0.],\n", + " [0., 0., 0., ..., 1., 1., 1.]], dtype=float32)" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "r" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Check for CUDA Support\n", + "\n", + "Try this code to check if CUDA is set up correctly." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from numba import cuda\n", + "\n", + "cuda.detect()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/kyupy/__init__.py b/kyupy/__init__.py new file mode 100644 index 0000000..087659c --- /dev/null +++ b/kyupy/__init__.py @@ -0,0 +1,106 @@ +"""This package provides tools for high-performance processing and validation +of non-hierarchical VLSI circuits to aid rapid prototyping of research code +in the fields of VLSI test, diagnosis and reliability. +""" + +import time +import importlib.util + + +class Log: + def __init__(self): + self.start = time.perf_counter() + self.logfile = None + + def log(self, level, message): + t = time.perf_counter() - self.start + if self.logfile is None: + print(f'{t:011.3f} {level} {message}') + else: + self.logfile.write(f'{t:011.3f} {level} {message}\n') + self.logfile.flush() + + def info(self, message): self.log('-', message) + + def warn(self, message): self.log('W', message) + + def error(self, message): self.log('E', message) + + +log = Log() + + +class MockNumba: + @staticmethod + def njit(func): + def inner(*args, **kwargs): + return func(*args, **kwargs) + return inner + + +class MockCuda: + + def __init__(self): + self.x = 0 + self.y = 0 + + def jit(self, device=False): + outer = self + + def make_launcher(func): + class Launcher(object): + def __init__(self, funcc): + self.func = funcc + + def __call__(self, *args, **kwargs): + # print(f'device func call {self.func.__name__}') + return self.func(*args, **kwargs) + + def __getitem__(self, item): + grid_dim, block_dim = item + # print(f'kernel call {self.func.__name__} grid_dim:{grid_dim} block_dim:{block_dim}') + + def inner(*args, **kwargs): + for grid_x in range(grid_dim[0]): + for grid_y in range(grid_dim[1]): + for block_x in range(block_dim[0]): + for block_y in range(block_dim[1]): + outer.x = grid_x * block_dim[0] + block_x + outer.y = grid_y * block_dim[1] + block_y + self.func(*args, **kwargs) + return inner + return Launcher(func) + + return make_launcher + + @staticmethod + def to_device(array, to=None): + if to is not None: + to[...] = array + return to + return array.copy() + + def synchronize(self): + pass + + def grid(self, dims): + return self.x, self.y + + +if importlib.util.find_spec('numba') is not None: + import numba + import numba.cuda + from numba.cuda.cudadrv.error import CudaSupportError + try: + list(numba.cuda.gpus) + from numba import cuda + except CudaSupportError: + log.warn('Cuda unavailable. Falling back to pure python') + cuda = MockCuda() +else: + numba = MockNumba() + cuda = MockCuda() + log.warn('Numba unavailable. Falling back to pure python') + + + diff --git a/kyupy/bench.py b/kyupy/bench.py new file mode 100644 index 0000000..cf0662d --- /dev/null +++ b/kyupy/bench.py @@ -0,0 +1,43 @@ +from lark import Lark, Transformer +from .circuit import Circuit, Node, Line + + +class BenchTransformer(Transformer): + + def __init__(self, name): + super().__init__() + self.c = Circuit(name) + + def start(self, _): return self.c + + def parameters(self, args): return [self.c.get_or_add_fork(name) for name in args] + + def interface(self, args): self.c.interface.extend(args[0]) + + def assignment(self, args): + name, cell_type, drivers = args + cell = Node(self.c, str(name), str(cell_type)) + Line(self.c, cell, self.c.get_or_add_fork(str(name))) + [Line(self.c, d, cell) for d in drivers] + + +def parse(bench): + grammar = r""" + start: (statement)* + statement: input | output | assignment + input: ("INPUT" | "input") parameters -> interface + output: ("OUTPUT" | "output") parameters -> interface + assignment: NAME "=" NAME parameters + parameters: "(" [ NAME ( "," NAME )* ] ")" + NAME: /[-_a-z0-9]+/i + %ignore ( /\r?\n/ | "#" /[^\n]*/ | /[\t\f ]/ )+ + """ + name = None + if '(' not in str(bench): # No parentheses?: Assuming it is a file name. + name = str(bench).replace('.bench', '') + with open(bench, 'r') as f: + text = f.read() + else: + text = bench + return Lark(grammar, parser="lalr", transformer=BenchTransformer(name)).parse(text) + diff --git a/kyupy/bittools.py b/kyupy/bittools.py new file mode 100644 index 0000000..df4c033 --- /dev/null +++ b/kyupy/bittools.py @@ -0,0 +1,23 @@ +import numpy as np +import importlib.util +if importlib.util.find_spec('numba') is not None: + import numba +else: + from . import numba + print('Numba unavailable. Falling back to pure python') + + +_pop_count_lut = np.asarray([bin(x).count('1') for x in range(256)]) + + +def popcount(a): + return np.sum(_pop_count_lut[a]) + + +_bit_in_lut = np.array([2 ** x for x in range(7, -1, -1)], dtype='uint8') + + +@numba.njit +def bit_in(a, pos): + return a[pos >> 3] & _bit_in_lut[pos & 7] + diff --git a/kyupy/circuit.py b/kyupy/circuit.py new file mode 100644 index 0000000..bad9a67 --- /dev/null +++ b/kyupy/circuit.py @@ -0,0 +1,236 @@ +from collections import deque + + +class GrowingList(list): + def __setitem__(self, index, value): + if index >= len(self): + self.extend([None] * (index + 1 - len(self))) + super().__setitem__(index, value) + + +class IndexList(list): + def __delitem__(self, index): + if index == len(self) - 1: + super().__delitem__(index) + else: + replacement = self.pop() + replacement.index = index + super().__setitem__(index, replacement) + + +class Node: + """A Node is a named entity in a circuit (e.g. a gate, a standard cell, + a named signal, or a fan-out point) that has connections to other nodes. + Each node contains: + + * `self.index`: a circuit-unique integer index. + * `self.kind`: a type describing its function (e.g. 'AND', 'NOR'). + The type '__fork__' is special. It signifies a named signal + or a fan-out in the circuit. Any other type is considered a physical cell. + * `self.name`: a name. Names must be unique among all forks and all cells + in the circuit. However, a fork (`self.kind=='__fork__'`) and a cell with + the same name may coexist. + * `self.ins`: a list of input connections (objects of class `Line`) + * `self.outs`: a list of output connections (objects of class `Line`). + """ + def __init__(self, circuit, name, kind='__fork__'): + if kind == '__fork__': + if name in circuit.forks: + raise ValueError(f'fork of name {name} already exists.') + circuit.forks[name] = self + else: + if name in circuit.cells: + raise ValueError(f'cell of name {name} already exists.') + circuit.cells[name] = self + self.index = len(circuit.nodes) + circuit.nodes.append(self) + self.circuit = circuit + self.name = name + self.kind = kind + self.ins = GrowingList() + self.outs = GrowingList() + + def __repr__(self): + ins = ' '.join([f'<{line.index}' if line is not None else '{line.index}' if line is not None else '>None' for line in self.outs]) + return f'{self.index}:{self.kind}"{self.name}" {ins} {outs}' + + def remove(self): + if self.circuit is not None: + del self.circuit.nodes[self.index] + if self.kind == '__fork__': + del self.circuit.forks[self.name] + else: + del self.circuit.cells[self.name] + self.circuit = None + + +class Line: + """A Line is a directional 1:1 connection between two Nodes. It always + connects an output of a node (called `driver`) to an input of a node + (called `reader`) and has a circuit-unique index (`self.index`). + + Furthermore, `self.driver_pin` and `self.reader_pin` are the + integer indices of the connected pins of the nodes. They always correspond + to the positions of the line in the connection lists of the nodes: + + * `self.driver.outs[self.driver_pin] == self` + * `self.reader.ins[self.reader_pin] == self` + + A Line always connects a single driver to a single reader. If a signal fans out to + multiple readers, a '__fork__' Node needs to be added. + """ + def __init__(self, circuit, driver, reader): + self.index = len(circuit.lines) + circuit.lines.append(self) + if type(driver) is Node: + self.driver = driver + self.driver_pin = len(driver.outs) + for pin, line in enumerate(driver.outs): + if line is None: + self.driver_pin = pin + break + else: + self.driver, self.driver_pin = driver + if type(reader) is Node: + self.reader = reader + self.reader_pin = len(reader.ins) + for pin, line in enumerate(reader.ins): + if line is None: + self.reader_pin = pin + break + else: + self.reader, self.reader_pin = reader + self.driver.outs[self.driver_pin] = self + self.reader.ins[self.reader_pin] = self + + def remove(self): + circuit = None + if self.driver is not None: + self.driver.outs[self.driver_pin] = None + circuit = self.driver.circuit + if self.reader is not None: + self.reader.ins[self.reader_pin] = None + circuit = self.reader.circuit + if circuit is not None: + del circuit.lines[self.index] + self.driver = None + self.reader = None + + def __repr__(self): + return f'{self.index}' + + def __lt__(self, other): + return self.index < other.index + + +class Circuit: + """A Circuit is a container for interconnected nodes and lines. + + All contained lines have unique indices, so have all contained nodes. + These indices can be used to store additional data about nodes or lines + by allocating an array `my_data` of length `len(self.nodes)` and then + accessing it by `my_data[n.index]`. The indices may change iff lines or + nodes are removed from the circuit. + + Nodes come in two flavors (cells and forks, see `Node`). The names of + these nodes are kept unique within these two flavors. + """ + def __init__(self, name=None): + self.name = name + self.nodes = IndexList() + self.lines = IndexList() + self.interface = GrowingList() + self.cells = {} + self.forks = {} + + def get_or_add_fork(self, name): + return self.forks[name] if name in self.forks else Node(self, name) + + def copy(self): + c = Circuit(self.name) + for node in self.nodes: + Node(c, node.name, node.kind) + for line in self.lines: + d = c.forks[line.driver.name] if line.driver.kind == '__fork__' else c.cells[line.driver.name] + r = c.forks[line.reader.name] if line.reader.kind == '__fork__' else c.cells[line.reader.name] + Line(c, (d, line.driver_pin), (r, line.reader_pin)) + for node in self.interface: + if node.kind == '__fork__': + n = c.forks[node.name] + else: + n = c.cells[node.name] + c.interface.append(n) + return c + + def dump(self): + header = f'{self.name}({",".join([str(n.index) for n in self.interface])})\n' + return header + '\n'.join([str(n) for n in self.nodes]) + + def __repr__(self): + name = f" '{self.name}'" if self.name else '' + return f'' + + def topological_order(self): + visit_count = [0] * len(self.nodes) + queue = deque(n for n in self.nodes if len(n.ins) == 0 or 'DFF' in n.kind) + while len(queue) > 0: + n = queue.popleft() + for line in n.outs: + if line is None: continue + succ = line.reader + visit_count[succ.index] += 1 + if visit_count[succ.index] == len(succ.ins) and 'DFF' not in succ.kind: + queue.append(succ) + yield n + + def topological_line_order(self): + for n in self.topological_order(): + for line in n.outs: + if line is not None: + yield line + + def reversed_topological_order(self): + visit_count = [0] * len(self.nodes) + queue = deque(n for n in self.nodes if len(n.outs) == 0 or 'DFF' in n.kind) + while len(queue) > 0: + n = queue.popleft() + for line in n.ins: + pred = line.driver + visit_count[pred.index] += 1 + if visit_count[pred.index] == len(pred.outs) and 'DFF' not in pred.kind: + queue.append(pred) + yield n + + def fanin(self, origin_nodes): + marks = [False] * len(self.nodes) + for n in origin_nodes: + marks[n.index] = True + for n in self.reversed_topological_order(): + if not marks[n.index]: + for line in n.outs: + if line is not None: + marks[n.index] |= marks[line.reader.index] + if marks[n.index]: + yield n + + def fanout_free_regions(self): + for stem in self.reversed_topological_order(): + if len(stem.outs) == 1 and 'DFF' not in stem.kind: continue + region = [] + if 'DFF' in stem.kind: + n = stem.ins[0] + if len(n.driver.outs) == 1 and 'DFF' not in n.driver.kind: + queue = deque([n.driver]) + else: + queue = deque() + else: + queue = deque(n.driver for n in stem.ins + if len(n.driver.outs) == 1 and 'DFF' not in n.driver.kind) + while len(queue) > 0: + n = queue.popleft() + preds = [pred.driver for pred in n.ins + if len(pred.driver.outs) == 1 and 'DFF' not in pred.driver.kind] + queue.extend(preds) + region.append(n) + yield stem, region diff --git a/kyupy/logic_sim.py b/kyupy/logic_sim.py new file mode 100644 index 0000000..1ede8ee --- /dev/null +++ b/kyupy/logic_sim.py @@ -0,0 +1,418 @@ +import numpy as np +from . import packed_vectors + + +class LogicSim: + """A bit-parallel naive combinational logic simulator supporting 1, 4, or 8-valued logics. + """ + def __init__(self, circuit, nvectors=1, vdim=1): + self.circuit = circuit + self.nvectors = nvectors + nbytes = (nvectors - 1) // 8 + 1 + self.interface = list(circuit.interface) + [n for n in circuit.nodes if 'dff' in n.kind.lower()] + self.state = np.zeros((len(circuit.lines), vdim, nbytes), dtype='uint8') + self.state_epoch = np.zeros(len(circuit.nodes), dtype='int8') - 1 + self.tmp = np.zeros((5, vdim, nbytes), dtype='uint8') + self.zero = np.zeros((vdim, nbytes), dtype='uint8') + if vdim > 1: + self.zero[1] = 255 + self.epoch = 0 + + self.fork_vd1 = self.fork_vdx + self.const0_vd1 = self.const0_vdx + self.input_vd1 = self.fork_vd1 + self.output_vd1 = self.fork_vd1 + self.inv_vd1 = self.not_vd1 + self.nbuff_vd1 = self.fork_vd1 + self.xor2_vd1 = self.xor_vd1 + + self.fork_vd2 = self.fork_vdx + self.const0_vd2 = self.const0_vdx + self.input_vd2 = self.fork_vd2 + self.output_vd2 = self.fork_vd2 + self.inv_vd2 = self.not_vd2 + self.nbuff_vd2 = self.fork_vd2 + self.xor2_vd2 = self.xor_vd2 + + self.fork_vd3 = self.fork_vdx + self.const0_vd3 = self.const0_vdx + self.input_vd3 = self.fork_vd3 + self.output_vd3 = self.fork_vd3 + self.inv_vd3 = self.not_vd3 + self.nbuff_vd3 = self.fork_vd3 + self.xor2_vd3 = self.xor_vd3 + + known_fct = [(f[:-4], getattr(self, f)) for f in dir(self) if f.endswith(f'_vd{vdim}')] + self.node_fct = [] + for n in circuit.nodes: + t = n.kind.lower().replace('__fork__', 'fork') + t = t.replace('__const0__', 'const0') + t = t.replace('__const1__', 'const1') + t = t.replace('tieh', 'const1') + # t = t.replace('xor', 'or').replace('xnor', 'nor') + fcts = [f for n, f in known_fct if t.startswith(n)] + if len(fcts) < 1: + raise ValueError(f'Unknown node kind {n.kind}') + self.node_fct.append(fcts[0]) + + def assign(self, stimuli): + if isinstance(stimuli, packed_vectors.PackedVectors): + stimuli = stimuli.bits + for (stim, node) in zip(stimuli, self.interface): + if len(node.outs) == 0: continue + outputs = [self.state[line.index] if line else self.tmp[3] for line in node.outs] + self.node_fct[node.index]([stim], outputs) + for line in node.outs: + if line: + self.state_epoch[line.reader.index] = self.epoch + for n in self.circuit.nodes: + if (n.kind == '__const1__') or (n.kind == '__const0__'): + outputs = [self.state[line.index] if line else self.tmp[3] for line in n.outs] + self.node_fct[n.index]([], outputs) + # print('assign const') + for line in n.outs: + if line: + self.state_epoch[line.reader.index] = self.epoch + + def capture(self, responses): + if isinstance(responses, packed_vectors.PackedVectors): + responses = responses.bits + for (resp, node) in zip(responses, self.interface): + if len(node.ins) == 0: continue + resp[...] = self.state[node.ins[0].index] + + def propagate(self): + for node in self.circuit.topological_order(): + if self.state_epoch[node.index] != self.epoch: continue + inputs = [self.state[line.index] if line else self.zero for line in node.ins] + outputs = [self.state[line.index] if line else self.tmp[3] for line in node.outs] + # print('sim', node) + self.node_fct[node.index](inputs, outputs) + for line in node.outs: + self.state_epoch[line.reader.index] = self.epoch + self.epoch = (self.epoch + 1) % 128 + + @staticmethod + def fork_vdx(inputs, outputs): + for o in outputs: o[...] = inputs[0] + + def const0_vdx(self, _, outputs): + for o in outputs: o[...] = self.zero + + # 2-valued simulation + + @staticmethod + def not_vd1(inputs, outputs): + outputs[0][0] = ~inputs[0][0] + + def const1_vd1(self, _, outputs): + for o in outputs: o[...] = self.zero + self.not_vd1(outputs, outputs) + + @staticmethod + def and_vd1(inputs, outputs): + o = outputs[0] + o[0] = inputs[0][0] + for i in inputs[1:]: o[0] &= i[0] + + @staticmethod + def or_vd1(inputs, outputs): + o = outputs[0] + o[0] = inputs[0][0] + for i in inputs[1:]: o[0] |= i[0] + + @staticmethod + def xor_vd1(inputs, outputs): + o = outputs[0] + o[0] = inputs[0][0] + for i in inputs[1:]: o[0] ^= i[0] + + @staticmethod + def sdff_vd1(inputs, outputs): + outputs[0][0] = inputs[0][0] + if len(outputs) > 1: + outputs[1][0] = ~inputs[0][0] + + @staticmethod + def dff_vd1(inputs, outputs): + outputs[0][0] = inputs[0][0] + if len(outputs) > 1: + outputs[1][0] = ~inputs[0][0] + + def nand_vd1(self, inputs, outputs): + self.and_vd1(inputs, outputs) + self.not_vd1(outputs, outputs) + + def nor_vd1(self, inputs, outputs): + self.or_vd1(inputs, outputs) + self.not_vd1(outputs, outputs) + + def xnor_vd1(self, inputs, outputs): + self.xor_vd1(inputs, outputs) + self.not_vd1(outputs, outputs) + + # 4-valued simulation + # sym [0] [1] (value, care) + # 0 0 1 + # 1 1 1 + # - 0 0 + # X 1 0 + + @staticmethod + def not_vd2(inputs, outputs): + # 4-valued not: + # i: 0 1 - X + # o: 1 0 X X + # o0 1 0 1 1 + # o1 1 1 0 0 + + outputs[0][0] = ~inputs[0][0] | ~inputs[0][1] # value = 0 or DC + outputs[0][1] = inputs[0][1] # care = C + + def and_vd2(self, inputs, outputs): + # 4-valued: o[0]: o[1]: + # 0 1 - X 0 1 - X 0 1 - X + # 0 0 0 0 0 0 0 0 0 1 1 1 1 + # 1 0 1 X X 0 1 1 1 1 1 0 0 + # - 0 X X X 0 1 1 1 1 0 0 0 + # X 0 X X X 0 1 1 1 1 0 0 0 + + i = inputs[0] + any0 = self.tmp[0] + anyd = self.tmp[1] + any0[0] = ~i[0] & i[1] + anyd[0] = ~i[1] + for i in inputs[1:]: + any0[0] |= ~i[0] & i[1] + anyd[0] |= ~i[1] + o = outputs[0] + o[0] = ~any0[0] # value = no0 + o[1] = any0[0] | ~anyd[0] # care = any0 or noDC + + def or_vd2(self, inputs, outputs): + # 4-valued: o[0]: o[1]: + # 0 1 - X 0 1 - X 0 1 - X + # 0 0 1 X X 0 1 1 1 1 1 0 0 + # 1 1 1 1 1 1 1 1 1 1 1 1 1 + # - X 1 X X 1 1 1 1 0 1 0 0 + # X X 1 X X 1 1 1 1 0 1 0 0 + + i = inputs[0] + any1 = self.tmp[0] + anyd = self.tmp[1] + any1[0] = i[0] & i[1] + anyd[0] = ~i[1] + for i in inputs[1:]: + any1[0] |= i[0] & i[1] + anyd[0] |= ~i[1] + o = outputs[0] + o[0] = any1[0] | anyd[0] # value = any1 or anyDC + o[1] = any1[0] | ~anyd[0] # care = any1 or noDC + + def xor_vd2(self, inputs, outputs): + # 4-valued: o[0]: o[1]: + # 0 1 - X 0 1 - X 0 1 - X + # 0 0 1 X X 0 1 1 1 1 1 0 0 + # 1 1 0 X X 1 0 1 1 1 1 0 0 + # - X X X X 1 1 1 1 0 0 0 0 + # X X X X X 1 1 1 1 0 0 0 0 + + i = inputs[0] + odd1 = self.tmp[0] + anyd = self.tmp[1] + odd1[0] = i[0] & i[1] + anyd[0] = ~i[1] + for i in inputs[1:]: + odd1[0] ^= i[0] & i[1] + anyd[0] |= ~i[1] + o = outputs[0] + o[0] = odd1[0] | anyd[0] # value = odd1 or anyDC + o[1] = ~anyd[0] # care = noDC + + def sdff_vd2(self, inputs, outputs): + self.dff_vd2(inputs, outputs) + if len(outputs) > 1: + outputs[1][0] = ~inputs[0][0] | ~inputs[0][1] # value = 0 or DC + outputs[1][1] = inputs[0][1] # care = C + + @staticmethod + def dff_vd2(inputs, outputs): + outputs[0][0] = inputs[0][0] | ~inputs[0][1] # value = 1 or DC + outputs[0][1] = inputs[0][1] # care = C + + def nand_vd2(self, inputs, outputs): + self.and_vd2(inputs, outputs) + self.not_vd2(outputs, outputs) + + def nor_vd2(self, inputs, outputs): + self.or_vd2(inputs, outputs) + self.not_vd2(outputs, outputs) + + def xnor_vd2(self, inputs, outputs): + self.xor_vd2(inputs, outputs) + self.not_vd2(outputs, outputs) + + def const1_vd2(self, _, outputs): + for o in outputs: o[...] = self.zero + self.not_vd2(outputs, outputs) + + # 8-valued simulation + # sym [0] [1] [2] (initial value, ~final value, toggles present?) + # 0 0 1 0 + # 1 1 0 0 + # - 0 0 0 + # X 1 1 0 + # R 0 0 1 _/" + # F 1 1 1 "\_ + # P 0 1 1 _/\_ + # N 1 0 1 "\/" + + def not_vd3(self, inputs, outputs): + # 8-valued not: + # i: 0 1 - X R F P N + # i0 0 1 0 1 0 1 0 1 + # i1 1 0 0 1 0 1 1 0 + # i2 0 0 0 0 1 1 1 1 + # o: 1 0 X X F R N P + # o0 1 0 1 1 1 0 1 0 + # o1 0 1 1 1 1 0 0 1 + # o2 0 0 0 0 1 1 1 1 + i = inputs[0] + dc = self.tmp[0] + dc[0] = ~(i[0] ^ i[1]) & ~i[2] + dc = self.tmp[0] + outputs[0][0] = ~i[0] | dc[0] # init.v = ~i0 or DC + outputs[0][1] = ~i[1] | dc[0] # init.v = ~i1 or DC + outputs[0][2] = i[2] # toggles = i2 + + def and_vd3(self, inputs, outputs): + # 8-valued: o[0]: o[1]: o[2]: + # 0 1 - X R F P N 0 1 - X R F P N 0 1 - X R F P N 0 1 - X R F P N + # 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 + # 1 0 1 X X R F P N 0 1 1 1 0 1 0 1 1 0 1 1 0 1 1 0 0 0 0 0 1 1 1 1 + # - 0 X X X X X X X 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 + # X 0 X X X X X X X 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 + # R 0 R X X R R P R 0 0 1 1 0 0 0 0 1 0 1 1 0 0 1 0 0 1 0 0 1 1 1 1 + # F 0 F X X R F P F 0 1 1 1 0 1 0 1 1 1 1 1 0 1 1 1 0 1 0 0 1 1 1 1 + # P 0 P X X P P P P 0 0 1 1 0 0 0 0 1 1 1 1 1 1 1 1 0 1 0 0 1 1 1 1 + # N 0 N X X R F P N 0 1 1 1 0 1 0 1 1 0 1 1 0 1 1 0 0 1 0 0 1 1 1 1 + i = inputs[0] + anyi0 = self.tmp[0] + anyf0 = self.tmp[1] + anyd = self.tmp[2] + any0 = self.tmp[3] + any_t = self.tmp[4] + anyd[0] = ~(i[0] ^ i[1]) & ~i[2] + anyi0[0] = ~i[0] & ~anyd[0] + anyf0[0] = i[1] & ~anyd[0] + any_t[0] = i[2] + any0[0] = anyi0[0] & anyf0[0] & ~i[2] + for i in inputs[1:]: + dc = ~(i[0] ^ i[1]) & ~i[2] + anyd[0] |= dc + anyi0[0] |= ~i[0] & ~dc + anyf0[0] |= i[1] & ~dc + any_t[0] |= i[2] + any0[0] |= ~i[0] & ~dc & i[1] & ~i[2] + o = outputs[0] + o[0] = (~anyi0[0] | anyd[0]) & ~any0[0] # initial = no_i0 or DC + o[1] = anyf0[0] | anyd[0] # ~final = ~no_f0 or DC + o[2] = any_t[0] & ~(anyd[0] | any0[0]) # toggle = anyT and noDC and no0 + + def or_vd3(self, inputs, outputs): + # 8-valued: o[0]: o[1]: o[2]: + # 0 1 - X R F P N 0 1 - X R F P N 0 1 - X R F P N 0 1 - X R F P N + # 0 0 1 X X R F P N 0 1 1 1 0 1 0 1 1 0 1 1 0 1 1 0 0 0 0 0 1 1 1 1 + # 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + # - X 1 X X X X X X 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 0 0 0 0 0 0 0 0 + # X X 1 X X X X X X 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 0 0 0 0 0 0 0 0 + # R R 1 X X R N R R 0 1 1 1 0 1 0 0 0 0 1 1 0 0 0 0 1 0 0 0 1 1 1 1 + # F F 1 X X N F F F 1 1 1 1 1 1 1 1 1 0 1 1 0 1 1 1 1 0 0 0 1 1 1 1 + # P P 1 X X R F P N 0 1 1 1 0 1 0 1 1 0 1 1 0 1 1 0 1 0 0 0 1 1 1 1 + # N N 1 X X R F N N 1 1 1 1 0 1 1 1 0 0 1 1 0 1 0 0 1 0 0 0 1 1 1 1 + i = inputs[0] + anyi1 = self.tmp[0] + anyf1 = self.tmp[1] + anyd = self.tmp[2] + any1 = self.tmp[3] + any_t = self.tmp[4] + anyd[0] = ~(i[0] ^ i[1]) & ~i[2] + anyi1[0] = i[0] & ~anyd[0] + anyf1[0] = ~i[1] & ~anyd[0] + any_t[0] = i[2] + any1[0] = (anyi1[0] & anyf1[0]) & ~i[2] + for i in inputs[1:]: + dc = ~(i[0] ^ i[1]) & ~i[2] + anyd[0] |= dc + anyi1[0] |= i[0] & ~dc + anyf1[0] |= ~i[1] & ~dc + any_t[0] |= i[2] + any1[0] |= i[0] & ~dc & ~i[1] & ~i[2] + o = outputs[0] + o[0] = anyi1[0] | anyd[0] # initial = i1 or DC + o[1] = (~anyf1[0] | anyd[0]) & ~any1[0] # ~final = f1 or DC + o[2] = any_t[0] & ~(anyd[0] | any1[0]) # toggle = anyT and no(DC or 1) + + def xor_vd3(self, inputs, outputs): + # 8-valued: o[0]: o[1]: o[2]: + # 0 1 - X R F P N 0 1 - X R F P N 0 1 - X R F P N 0 1 - X R F P N + # 0 0 1 X X R F P N 0 1 1 1 0 1 0 1 1 0 1 1 0 1 1 0 0 0 0 0 1 1 1 1 + # 1 1 0 X X F R N P 1 0 1 1 1 0 1 0 0 1 1 1 1 0 0 1 0 0 0 0 1 1 1 1 + # - X X X X X X X X 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 + # X X X X X X X X X 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 + # R R F X X P N R F 0 1 1 1 0 1 0 1 0 1 1 1 1 0 0 1 1 1 0 0 1 1 1 1 + # F F R X X N P F R 1 0 1 1 1 0 1 0 1 0 1 1 0 1 1 0 1 1 0 0 1 1 1 1 + # P P N X X R F P N 0 1 1 1 0 1 0 1 1 0 1 1 0 1 1 0 1 1 0 0 1 1 1 1 + # N N P X X F R N P 1 0 1 1 1 0 1 0 0 1 1 1 1 0 0 1 1 1 0 0 1 1 1 1 + i = inputs[0] + odd0 = self.tmp[0] + odd1 = self.tmp[1] + anyd = self.tmp[2] + anyt = self.tmp[3] + odd0[0] = i[0] + odd1[0] = i[1] + anyd[0] = ~(i[0] ^ i[1]) & ~i[2] + anyt[0] = i[2] + for i in inputs[1:]: + odd0[0] ^= i[0] + odd1[0] ^= i[1] + anyd[0] |= ~(i[0] ^ i[1]) & ~i[2] + anyt[0] |= i[2] + o = outputs[0] + o[0] = odd0[0] | anyd[0] + o[1] = ~odd1[0] | anyd[0] + o[2] = anyt[0] & ~anyd[0] + + def sdff_vd3(self, inputs, outputs): + self.dff_vd3(inputs, outputs) + if len(outputs) > 1: + i = inputs[0] + dc = self.tmp[0] + dc[0] = ~(i[0] ^ i[1]) & ~i[2] + outputs[1][0] = ~i[0] | dc[0] # value = 1 or DC + outputs[1][1] = ~i[1] | dc[0] # value = 1 or DC + outputs[1][2] = i[2] # toggle = T + + def dff_vd3(self, inputs, outputs): + i = inputs[0] + dc = self.tmp[0] + dc[0] = ~(i[0] ^ i[1]) & ~i[2] + outputs[0][0] = i[0] | dc[0] # value = 1 or DC + outputs[0][1] = i[1] | dc[0] # value = 1 or DC + outputs[0][2] = i[2] # toggle = T + + def nand_vd3(self, inputs, outputs): + self.and_vd3(inputs, outputs) + self.not_vd3(outputs, outputs) + + def nor_vd3(self, inputs, outputs): + self.or_vd3(inputs, outputs) + self.not_vd3(outputs, outputs) + + def xnor_vd3(self, inputs, outputs): + self.xor_vd3(inputs, outputs) + self.not_vd3(outputs, outputs) + + def const1_vd3(self, _, outputs): + for o in outputs: o[...] = self.zero + self.not_vd3(outputs, outputs) diff --git a/kyupy/packed_vectors.py b/kyupy/packed_vectors.py new file mode 100644 index 0000000..ffd3dda --- /dev/null +++ b/kyupy/packed_vectors.py @@ -0,0 +1,299 @@ +import numpy as np +from .bittools import popcount, bit_in + + +class PackedVectors: + def __init__(self, nvectors=8, width=1, vdim=1, from_cache=None): + if from_cache is not None: + self.bits = np.array(from_cache) + self.width, self.vdim, nbytes = self.bits.shape + else: + self.bits = np.zeros((width, vdim, (nvectors - 1) // 8 + 1), dtype='uint8') + self.vdim = vdim + self.width = width + self.nvectors = nvectors + m1 = np.array([2 ** x for x in range(7, -1, -1)], dtype='uint8') + m0 = ~m1 + self.mask = np.rollaxis(np.vstack((m0, m1)), 1) + + @classmethod + def from_pair(cls, init, final): + assert init.nvectors == final.nvectors + assert len(init.bits) == len(final.bits) + init_v = init.bits[:, 0] + if init.vdim == 3: + init_c = (init.bits[:, 0] ^ init.bits[:, 1]) | init.bits[:, 2] + elif init.vdim == 2: + init_c = init.bits[:, 1] + else: + init_c = ~np.zeros_like(init.bits[:, 0]) + final_v = final.bits[:, 0] + if final.vdim == 3: + final_c = (final.bits[:, 0] ^ final.bits[:, 1]) | final.bits[:, 2] + final_v = ~final.bits[:, 1] + elif final.vdim == 2: + final_c = final.bits[:, 1] + else: + final_c = ~np.zeros_like(final.bits[:, 0]) + c = init_c & final_c + a0 = init_v & c + a1 = ~final_v & c + a2 = (init_v ^ final_v) & c + p = PackedVectors(init.nvectors, len(init.bits), 3) + p.bits[:, 0] = a0 + p.bits[:, 1] = a1 + p.bits[:, 2] = a2 + return p + + def transition_vectors(self): + a = PackedVectors(self.nvectors-1, self.width, 3) + for pos in range(self.width): + for vidx in range(self.nvectors-1): + tr = self.get_value(vidx, pos) + self.get_value(vidx+1, pos) + if tr == '00': + a.set_value(vidx, pos, '0') + elif tr == '11': + a.set_value(vidx, pos, '1') + elif tr == '01': + a.set_value(vidx, pos, 'R') + elif tr == '10': + a.set_value(vidx, pos, 'F') + elif tr == '--': + a.set_value(vidx, pos, '-') + else: + a.set_value(vidx, pos, 'X') + return a + + def __add__(self, other): + a = PackedVectors(self.nvectors + other.nvectors, self.width, max(self.vdim, other.vdim)) + # a.bits[:self.bits.shape[0], 0] = self.bits[:, 0] + # if self.vdim == 2: + # a.bits[:self.bits.shape[0], 1] = self.care_bits + # elif self.vdim == 3: + # a.bits[:self.bits.shape[0], 1] = ~self.value_bits + # a.bits[:self.bits.shape[0], 2] = self.toggle_bits + for i in range(self.nvectors): + a[i] = self[i] + for i in range(len(other)): + a[self.nvectors+i] = other[i] + return a + + def __len__(self): + return self.nvectors + + def randomize(self, one_probability=0.5): + for data in self.bits: + data[0] = np.packbits((np.random.rand(self.nvectors) < one_probability).astype(int)) + if self.vdim == 2: + data[1] = 255 + elif self.vdim == 3: + data[1] = ~np.packbits((np.random.rand(self.nvectors) < one_probability).astype(int)) + data[2] = data[0] ^ ~data[1] + + def copy(self, selection_mask=None): + if selection_mask is not None: + cpy = PackedVectors(popcount(selection_mask), len(self.bits), self.vdim) + cur = 0 + for vidx in range(self.nvectors): + if bit_in(selection_mask, vidx): + cpy[cur] = self[vidx] + cur += 1 + else: + cpy = PackedVectors(self.nvectors, len(self.bits), self.vdim) + np.copyto(cpy.bits, self.bits) + return cpy + + @property + def care_bits(self): + if self.vdim == 1: + return self.bits[:, 0] | 255 + elif self.vdim == 2: + return self.bits[:, 1] + elif self.vdim == 3: + return (self.bits[:, 0] ^ self.bits[:, 1]) | self.bits[:, 2] + + @property + def initial_bits(self): + return self.bits[:, 0] + + @property + def value_bits(self): + if self.vdim == 3: + return ~self.bits[:, 1] + else: + return self.bits[:, 0] + + @property + def toggle_bits(self): + if self.vdim == 3: + return self.bits[:, 2] + else: + return self.bits[:, 0] & 0 + + def get_value(self, vector, position): + if vector >= self.nvectors: + raise IndexError(f'vector out of range: {vector} >= {self.nvectors}') + a = self.bits[position, :, vector // 8] + m = self.mask[vector % 8] + if self.vdim == 1: + return '1' if a[0] & m[1] else '0' + elif self.vdim == 2: + if a[0] & m[1]: + return '1' if a[1] & m[1] else 'X' + else: + return '0' if a[1] & m[1] else '-' + elif self.vdim == 3: + if a[2] & m[1]: + if a[0] & m[1]: + return 'F' if a[1] & m[1] else 'N' + else: + return 'P' if a[1] & m[1] else 'R' + else: + if a[0] & m[1]: + return 'X' if a[1] & m[1] else '1' + else: + return '0' if a[1] & m[1] else '-' + + def get_values_for_position(self, position): + return ''.join(self.get_value(x, position) for x in range(self.nvectors)) + + def set_value(self, vector, position, v): + if vector >= self.nvectors: + raise IndexError(f'vector out of range: {vector} >= {self.nvectors}') + a = self.bits[position, :, vector // 8] + m = self.mask[vector % 8] + if self.vdim == 1: + self._set_value_vd1(a, m, v) + elif self.vdim == 2: + self._set_value_vd2(a, m, v) + elif self.vdim == 3: + self._set_value_vd3(a, m, v) + + def set_values(self, vector, v, mapping=None, inversions=None): + if vector >= self.nvectors: + raise IndexError(f'vector out of range: {vector} >= {self.nvectors}') + if not mapping: + mapping = [y for y in range(len(v))] + if inversions is None: + inversions = [False] * len(v) + for i, c in enumerate(v): + if inversions[i]: + if c == '1': + c = '0' + elif c == '0': + c = '1' + elif c == 'H': + c = 'L' + elif c == 'L': + c = 'H' + elif c == 'R': + c = 'F' + elif c == 'F': + c = 'R' + self.set_value(vector, mapping[i], c) + + def set_values_for_position(self, position, values): + for i, v in enumerate(values): + self.set_value(i, position, v) + + def __setitem__(self, vector, value): + for i, c in enumerate(value): + self.set_value(vector, i, c) + + def __getitem__(self, vector): + if isinstance(vector, slice): + first = self.get_values_for_position(0)[vector] + ret = PackedVectors(len(first), self.width, self.vdim) + ret.set_values_for_position(0, first) + for pos in range(1, self.width): + ret.set_values_for_position(pos, self.get_values_for_position(pos)[vector]) + return ret + return ''.join(self.get_value(vector, pos) for pos in range(len(self.bits))) + + @staticmethod + def _set_value_vd1(a, m, v): + if v in [True, 1, '1', 'H', 'h']: + a[0] |= m[1] + else: + a[0] &= m[0] + + @staticmethod + def _set_value_vd2(a, m, v): + if v in [True, 1, '1', 'H', 'h']: + a[0] |= m[1] + a[1] |= m[1] + elif v in [False, 0, '0', 'L', 'l']: + a[0] &= m[0] + a[1] |= m[1] + elif v in ['X', 'x']: + a[0] |= m[1] + a[1] &= m[0] + else: + a[0] &= m[0] + a[1] &= m[0] + + # i fb act + # a 0 1 2 + # - 0 0 0 None, '-' + # 0 0 1 0 False, 0, '0', 'l', 'L' + # 1 1 0 0 True, 1, '1', 'h', 'H' + # X 1 1 0 'x', 'X' + # / 0 0 1 '/', 'r', 'R' + # ^ 0 1 1 '^', 'p', 'P' + # v 1 0 1 'v', 'n', 'N' + # \ 1 1 1 '\', 'f', 'F' + @staticmethod + def _set_value_vd3(a, m, v): + if v in [False, 0, '0', 'L', 'l']: + a[0] &= m[0] + a[1] |= m[1] + a[2] &= m[0] + elif v in [True, 1, '1', 'H', 'h']: + a[0] |= m[1] + a[1] &= m[0] + a[2] &= m[0] + elif v in ['X', 'x']: + a[0] |= m[1] + a[1] |= m[1] + a[2] &= m[0] + elif v in ['/', 'r', 'R']: + a[0] &= m[0] + a[1] &= m[0] + a[2] |= m[1] + elif v in ['^', 'p', 'P']: + a[0] &= m[0] + a[1] |= m[1] + a[2] |= m[1] + elif v in ['v', 'n', 'N']: + a[0] |= m[1] + a[1] &= m[0] + a[2] |= m[1] + elif v in ['\\', 'f', 'F']: + a[0] |= m[1] + a[1] |= m[1] + a[2] |= m[1] + else: + a[0] &= m[0] + a[1] &= m[0] + a[2] &= m[0] + + def __repr__(self): + return f'' + + def __str__(self): + lst = [] + for p in range(self.nvectors): + lst.append(''.join(self.get_value(p, w) for w in range(len(self.bits)))) + if len(lst) == 0: return '' + if len(lst[0]) > 64: + lst = [s[:32] + '...' + s[-32:] for s in lst] + if len(lst) <= 16: + return '\n'.join(lst) + else: + return '\n'.join(lst[:8]) + '\n...\n' + '\n'.join(lst[-8:]) + + def diff(self, other, out=None): + if out is None: + out = np.zeros((self.width, self.bits.shape[-1]), dtype='uint8') + out[...] = (self.value_bits ^ other.value_bits) & self.care_bits & other.care_bits + return out diff --git a/kyupy/saed.py b/kyupy/saed.py new file mode 100644 index 0000000..10fca85 --- /dev/null +++ b/kyupy/saed.py @@ -0,0 +1,185 @@ +from kyupy.circuit import Node, Line + + +def pin_index(cell_type, pin): + if cell_type.startswith('MUX21') and pin == 'S': return 2 + if cell_type.startswith('SDFF') and pin == 'QN': return 1 + if cell_type.startswith('DFF') and pin == 'QN': return 1 + if cell_type.startswith('DFF') and pin == 'CLK': return 1 + if pin in ['A2', 'IN2', 'SE', 'B', 'CO']: return 1 + if pin in ['A3', 'IN3', 'SI', 'CI']: return 2 + if pin == 'A4' or pin == 'IN4' or pin == 'CLK': return 3 # CLK for scan cells SDFF + if pin == 'A5' or pin == 'IN5' or pin == 'RSTB': return 4 + if pin == 'A6' or pin == 'IN6' or pin == 'SETB': return 5 + return 0 + + +def pin_is_output(kind, pin): + if 'MUX' in kind and pin == 'S': + return False + return pin in ['Q', 'QN', 'Z', 'ZN', 'Y', 'CO', 'S'] + + +def add_and_connect(circuit, name, kind, in1=None, in2=None, out=None): + n = Node(circuit, name, kind) + if in1 is not None: + n.ins[0] = in1 + in1.reader = n + in1.reader_pin = 0 + if in2 is not None: + n.ins[1] = in2 + in2.reader = n + in2.reader_pin = 1 + if out is not None: + n.outs[0] = out + out.driver = n + out.driver_pin = 0 + return n + + +def split_complex_gates(circuit): + node_list = circuit.nodes + for n in node_list: + name = n.name + ins = n.ins + outs = n.outs + if n.kind.startswith('AO21X'): + n.remove() + n_and = add_and_connect(circuit, name+'~and', 'AND2', ins[0], ins[1], None) + n_or = add_and_connect(circuit, name+'~or', 'OR2', None, ins[2], outs[0]) + Line(circuit, n_and, n_or) + elif n.kind.startswith('AOI21X'): + n.remove() + n_and = add_and_connect(circuit, name+'~and', 'AND2', ins[0], ins[1], None) + n_nor = add_and_connect(circuit, name+'~nor', 'NOR2', None, ins[2], outs[0]) + Line(circuit, n_and, n_nor) + elif n.kind.startswith('OA21X'): + n.remove() + n_or = add_and_connect(circuit, name+'~or', 'OR2', ins[0], ins[1], None) + n_and = add_and_connect(circuit, name+'~and', 'AND2', None, ins[2], outs[0]) + Line(circuit, n_or, n_and) + elif n.kind.startswith('OAI21'): + n.remove() + n_or = add_and_connect(circuit, name+'~or', 'OR2', ins[0], ins[1], None) + n_nand = add_and_connect(circuit, name+'~nand', 'NAND2', None, ins[2], outs[0]) + Line(circuit, n_or, n_nand) + elif n.kind.startswith('OA22X'): + n.remove() + n_or0 = add_and_connect(circuit, name+'~or0', 'OR2', ins[0], ins[1], None) + n_or1 = add_and_connect(circuit, name+'~or1', 'OR2', ins[2], ins[3], None) + n_and = add_and_connect(circuit, name+'~and', 'AND2', None, None, outs[0]) + Line(circuit, n_or0, n_and) + Line(circuit, n_or1, n_and) + elif n.kind.startswith('AO22X'): + n.remove() + n_and0 = add_and_connect(circuit, name+'~and0', 'AND2', ins[0], ins[1], None) + n_and1 = add_and_connect(circuit, name+'~and1', 'AND2', ins[2], ins[3], None) + n_or = add_and_connect(circuit, name+'~or', 'OR2', None, None, outs[0]) + Line(circuit, n_and0, n_or) + Line(circuit, n_and1, n_or) + elif n.kind.startswith('AO221X'): + n.remove() + n_and0 = add_and_connect(circuit, name+'~and0', 'AND2', ins[0], ins[1], None) + n_and1 = add_and_connect(circuit, name+'~and1', 'AND2', ins[2], ins[3], None) + n_or0 = add_and_connect(circuit, name+'~or0', 'OR2', None, None, None) + n_or1 = add_and_connect(circuit, name+'~or1', 'OR2', None, ins[4], outs[0]) + Line(circuit, n_and0, n_or0) + Line(circuit, n_and1, n_or0) + Line(circuit, n_or0, n_or1) + elif n.kind.startswith('AOI221X'): + n.remove() + n_and0 = add_and_connect(circuit, name+'~and0', 'AND2', ins[0], ins[1], None) + n_and1 = add_and_connect(circuit, name+'~and1', 'AND2', ins[2], ins[3], None) + n_or = add_and_connect(circuit, name+'~or', 'OR2', None, None, None) + n_nor = add_and_connect(circuit, name+'~nor', 'NOR2', None, ins[4], outs[0]) + Line(circuit, n_and0, n_or) + Line(circuit, n_and1, n_or) + Line(circuit, n_or, n_nor) + elif n.kind.startswith('OA221X'): + n.remove() + n_or0 = add_and_connect(circuit, name+'~or0', 'OR2', ins[0], ins[1], None) + n_or1 = add_and_connect(circuit, name+'~or1', 'OR2', ins[2], ins[3], None) + n_and0 = add_and_connect(circuit, name+'~and0', 'AND2', None, None, None) + n_and1 = add_and_connect(circuit, name+'~and1', 'AND2', None, ins[4], outs[0]) + Line(circuit, n_or0, n_and0) + Line(circuit, n_or1, n_and0) + Line(circuit, n_and0, n_and1) + elif n.kind.startswith('AO222X'): + n.remove() + n_and0 = add_and_connect(circuit, name+'~and0', 'AND2', ins[0], ins[1], None) + n_and1 = add_and_connect(circuit, name+'~and1', 'AND2', ins[2], ins[3], None) + n_and2 = add_and_connect(circuit, name+'~and2', 'AND2', ins[4], ins[5], None) + n_or0 = add_and_connect(circuit, name+'~or0', 'OR2', None, None, None) + n_or1 = add_and_connect(circuit, name+'~or1', 'OR2', None, None, outs[0]) + Line(circuit, n_and0, n_or0) + Line(circuit, n_and1, n_or0) + Line(circuit, n_and2, n_or1) + Line(circuit, n_or0, n_or1) + elif n.kind.startswith('AOI222X'): + n.remove() + n_and0 = add_and_connect(circuit, name+'~and0', 'AND2', ins[0], ins[1], None) + n_and1 = add_and_connect(circuit, name+'~and1', 'AND2', ins[2], ins[3], None) + n_and2 = add_and_connect(circuit, name+'~and2', 'AND2', ins[4], ins[5], None) + n_or0 = add_and_connect(circuit, name+'~or0', 'OR2', None, None, None) + n_nor1 = add_and_connect(circuit, name+'~nor1', 'NOR2', None, None, outs[0]) + Line(circuit, n_and0, n_or0) + Line(circuit, n_and1, n_or0) + Line(circuit, n_and2, n_nor1) + Line(circuit, n_or0, n_nor1) + elif n.kind.startswith('OA222X'): + n.remove() + n_or0 = add_and_connect(circuit, name+'~or0', 'OR2', ins[0], ins[1], None) + n_or1 = add_and_connect(circuit, name+'~or1', 'OR2', ins[2], ins[3], None) + n_or2 = add_and_connect(circuit, name+'~or2', 'OR2', ins[4], ins[5], None) + n_and0 = add_and_connect(circuit, name+'~and0', 'AND2', None, None, None) + n_and1 = add_and_connect(circuit, name+'~and1', 'AND2', None, None, outs[0]) + Line(circuit, n_or0, n_and0) + Line(circuit, n_or1, n_and0) + Line(circuit, n_or2, n_and1) + Line(circuit, n_and0, n_and1) + elif n.kind.startswith('NOR3X'): + n.remove() + n_or = add_and_connect(circuit, name+'~or', 'OR2', ins[0], ins[1], None) + n_nor = add_and_connect(circuit, name+'~nor', 'NOR2', None, ins[2], outs[0]) + Line(circuit, n_or, n_nor) + elif n.kind.startswith('FADDX'): + n.remove() + # forks for fan-outs + f_a = add_and_connect(circuit, name + '~fork0', '__fork__', ins[0]) + f_b = add_and_connect(circuit, name + '~fork1', '__fork__', ins[1]) + f_ci = add_and_connect(circuit, name + '~fork2', '__fork__', ins[2]) + f_ab = Node(circuit, name + '~fork3') + # sum-block + n_xor0 = Node(circuit, name + '~xor0', 'XOR2') + Line(circuit, f_a, n_xor0) + Line(circuit, f_b, n_xor0) + Line(circuit, n_xor0, f_ab) + if len(outs) > 0 and outs[0] is not None: + n_xor1 = add_and_connect(circuit, name + '~xor1', 'XOR2', None, None, outs[0]) + Line(circuit, f_ab, n_xor1) + Line(circuit, f_ci, n_xor1) + # carry-block + if len(outs) > 1 and outs[1] is not None: + n_and0 = Node(circuit, name + '~and0', 'AND2') + Line(circuit, f_ab, n_and0) + Line(circuit, f_ci, n_and0) + n_and1 = Node(circuit, name + '~and1', 'AND2') + Line(circuit, f_a, n_and1) + Line(circuit, f_b, n_and1) + n_or = add_and_connect(circuit, name + '~or0', 'OR2', None, None, outs[1]) + Line(circuit, n_and0, n_or) + Line(circuit, n_and1, n_or) + elif n.kind.startswith('MUX21X'): + n.remove() + f_s = add_and_connect(circuit, name + '~fork0', '__fork__', ins[2]) + n_not = Node(circuit, name + '~not', 'INV') + Line(circuit, f_s, n_not) + n_and0 = add_and_connect(circuit, name + '~and0', 'AND2', ins[0]) + n_and1 = add_and_connect(circuit, name + '~and1', 'AND2', ins[1]) + n_or0 = add_and_connect(circuit, name + '~or0', 'OR2', None, None, outs[0]) + Line(circuit, n_not, n_and0) + Line(circuit, f_s, n_and1) + Line(circuit, n_and0, n_or0) + Line(circuit, n_and1, n_or0) + + diff --git a/kyupy/sdf.py b/kyupy/sdf.py new file mode 100644 index 0000000..e6db7f7 --- /dev/null +++ b/kyupy/sdf.py @@ -0,0 +1,213 @@ +import numpy as np +from lark import Lark, Transformer +from collections import namedtuple +from . import log +import gzip + +Interconnect = namedtuple('Interconnect', ['orig', 'dest', 'r', 'f']) +IOPath = namedtuple('IOPath', ['ipin', 'opin', 'r', 'f']) + + +class DelayFile: + def __init__(self, name, cells): + self.name = name + if None in cells: + self.interconnects = cells[None] + else: + self.interconnects = None + self.cells = dict((n, l) for n, l in cells.items() if n) + + def __repr__(self): + return '\n'.join(f'{n}: {l}' for n, l in self.cells.items()) + '\n' + \ + '\n'.join(str(i) for i in self.interconnects) + + def annotation(self, circuit, pin_index_f, dataset=1, interconnect=True, ffdelays=True): + """ + Constructs an 3-dimensional array with timing data for each line in `circuit`. + Dimension 1 of the returned array is the line index. + Dimension 2 is the type of timing data: 0:`delay`, 1:`pulse rejection limit`. + Dimension 3 is the polarity at the output of the reading node: 0:`rising`, 1:`falling`. + + The polarity for pulse rejection is determined by the latter transition of the pulse. + E.g., timing[42,1,0] is the rejection limit of a negative pulse at the output of the reader of line 42. + + An IOPATH delay for a node is annotated to the line connected to the input pin specified in the IOPATH. + + Currently, only ABSOLUTE IOPATH and INTERCONNECT delays are supported. + Pulse rejection limits are derived from absolute delays, explicit declarations (PATHPULSE etc.) are ignored. + + + :param ffdelays: + :param interconnect: + :param pin_index_f: + :param circuit: + :type dataset: int or tuple + """ + def select_del(_delvals, idx): + if type(dataset) is tuple: + s = 0 + for d in dataset: + s += _delvals[idx][d] + return s / len(dataset) + else: + return _delvals[idx][dataset] + + def find_cell(name): + if name not in circuit.cells: + name = name.replace('\\', '') + if name not in circuit.cells: + name = name.replace('[', '_').replace(']', '_') + if name not in circuit.cells: + return None + return circuit.cells[name] + + timing = np.zeros((len(circuit.lines), 2, 2)) + for cn, iopaths in self.cells.items(): + for ipn, opn, *delvals in iopaths: + delvals = [d if len(d) > 0 else [0, 0, 0] for d in delvals] + if max(max(delvals)) == 0: + continue + cell = find_cell(cn) + if cell is None: + log.warn(f'Cell from SDF not found in circuit: {cn}') + continue + ipin = pin_index_f(cell.kind, ipn) + opin = pin_index_f(cell.kind, opn) + kind = cell.kind.lower() + + ipn2 = ipn.replace('(posedge A1)', 'A1').replace('(negedge A1)', 'A1')\ + .replace('(posedge A2)', 'A2').replace('(negedge A2)', 'A2') + + def add_delays(_line): + if _line is not None: + timing[_line.index, :, 0] += select_del(delvals, 0) + timing[_line.index, :, 1] += select_del(delvals, 1) + + take_avg = False + if kind.startswith('sdff'): + if not ipn.startswith('(posedge CLK'): + continue + if ffdelays and (len(cell.outs) > opin): + add_delays(cell.outs[opin]) + else: + if kind.startswith(('xor', 'xnor')): + ipin = pin_index_f(cell.kind, ipn2) + # print(ipn, ipin, times[cell.i_lines[ipin].index, 0, 0]) + take_avg = timing[cell.ins[ipin].index].sum() > 0 + add_delays(cell.ins[ipin]) + if take_avg: + timing[cell.ins[ipin].index] /= 2 + + if not interconnect or self.interconnects is None: + return timing + + for n1, n2, *delvals in self.interconnects: + delvals = [d if len(d) > 0 else [0, 0, 0] for d in delvals] + if max(max(delvals)) == 0: + continue + if '/' in n1: + i = n1.rfind('/') + cn1 = n1[0:i] + pn1 = n1[i+1:] + else: + cn1, pn1 = (n1, 'Z') + if '/' in n2: + i = n2.rfind('/') + cn2 = n2[0:i] + pn2 = n2[i+1:] + else: + cn2, pn2 = (n2, 'IN') + c1 = find_cell(cn1) + if c1 is None: + log.warn(f'Cell from SDF not found in circuit: {cn1}') + continue + c2 = find_cell(cn2) + if c2 is None: + log.warn(f'Cell from SDF not found in circuit: {cn2}') + continue + p1, p2 = pin_index_f(c1.kind, pn1), pin_index_f(c2.kind, pn2) + line = None + f1, f2 = c1.outs[p1].reader, c2.ins[p2].driver + if f1 != f2: # possible branchfork + assert len(f2.ins) == 1 + line = f2.ins[0] + assert f1.outs[f2.ins[0].driver_pin] == line + elif len(f2.outs) == 1: # no fanout? + line = f2.ins[0] + if line is not None: + timing[line.index, :, 0] += select_del(delvals, 0) + timing[line.index, :, 1] += select_del(delvals, 1) + else: + log.warn(f'No branchfork for annotating interconnect delay {c1.name}/{p1}->{c2.name}/{p2}') + return timing + + +def sanitize(args): + if len(args) == 3: args.append(args[2]) + return [str(args[0]), str(args[1])] + args[2:] + + +class SdfTransformer(Transformer): + @staticmethod + def triple(args): return [float(a.value[:-1]) if len(a.value) > 1 else 0.0 for a in args] + + @staticmethod + def interconnect(args): return Interconnect(*sanitize(args)) + + @staticmethod + def iopath(args): return IOPath(*sanitize(args)) + + @staticmethod + def cell(args): + name = next((a for a in args if isinstance(a, str)), None) + entries = [e for a in args if hasattr(a, 'children') for e in a.children] + return name, entries + + @staticmethod + def start(args): + name = next((a for a in args if isinstance(a, str)), None) + cells = dict(t for t in args if isinstance(t, tuple)) + return DelayFile(name, cells) + + +def parse(sdf) -> DelayFile: + grammar = r""" + start: "(DELAYFILE" ( "(SDFVERSION" _NOB ")" + | "(DESIGN" "\"" NAME "\"" ")" + | "(DATE" _NOB ")" + | "(VENDOR" _NOB ")" + | "(PROGRAM" _NOB ")" + | "(VERSION" _NOB ")" + | "(DIVIDER" _NOB ")" + | "(VOLTAGE" _NOB ")" + | "(PROCESS" _NOB? ")" + | "(TEMPERATURE" _NOB ")" + | "(TIMESCALE" _NOB ")" + | cell )* ")" + cell: "(CELL" ( "(CELLTYPE" _NOB ")" + | "(INSTANCE" ID? ")" + | "(TIMINGCHECK" _ignore* ")" + | delay )* ")" + delay: "(DELAY" "(ABSOLUTE" (interconnect | iopath)* ")" ")" + interconnect: "(INTERCONNECT" ID ID triple* ")" + iopath: "(IOPATH" ID_OR_EDGE ID_OR_EDGE triple* ")" + NAME: /[^"]+/ + ID_OR_EDGE: ( /[^() ]+/ | "(" /[^)]+/ ")" ) + ID: ( /[^"() ]+/ | "\"" /[^"]+/ "\"" ) + triple: "(" ( /[-.0-9]*:/ /[-.0-9]*:/ /[-.0-9]*\)/ | ")" ) + _ignore: "(" _NOB? _ignore* ")" _NOB? + _NOB: /[^()]+/ + COMMENT: "//" /[^\n]*/ + %ignore ( /\r?\n/ | COMMENT )+ + %ignore /[\t\f ]+/ + """ + if '\n' not in str(sdf): # One line?: Assuming it is a file name. + if str(sdf).endswith('.gz'): + with gzip.open(sdf, 'rt') as f: + text = f.read() + else: + with open(sdf, 'r') as f: + text = f.read() + else: + text = str(sdf) + return Lark(grammar, parser="lalr", transformer=SdfTransformer()).parse(text) diff --git a/kyupy/stil.py b/kyupy/stil.py new file mode 100644 index 0000000..a0081b4 --- /dev/null +++ b/kyupy/stil.py @@ -0,0 +1,249 @@ +from lark import Lark, Transformer +from collections import namedtuple +import re +import gzip +from .packed_vectors import PackedVectors +from .logic_sim import LogicSim + + +Call = namedtuple('Call', ['name', 'parameters']) +ScanPattern = namedtuple('ScanPattern', ['load', 'launch', 'capture', 'unload']) + + +class StilFile: + def __init__(self, version, signal_groups, scan_chains, calls): + self.version = version + self.signal_groups = signal_groups + self.scan_chains = scan_chains + self.si_ports = dict((v[0], k) for k, v in scan_chains.items()) + self.so_ports = dict((v[-1], k) for k, v in scan_chains.items()) + self.calls = calls + self.patterns = [] + launch = {} + capture = {} + load = {} + for call in self.calls: + if call.name == 'load_unload': + unload = {} + for so_port in self.so_ports: + if so_port in call.parameters: + unload[so_port] = call.parameters[so_port].replace('\n', '') + if len(capture) > 0: + self.patterns.append(ScanPattern(load, launch, capture, unload)) + capture = {} + launch = {} + load = {} + for si_port in self.si_ports: + if si_port in call.parameters: + load[si_port] = call.parameters[si_port].replace('\n', '') + if call.name.endswith('_launch') or call.name.endswith('_capture'): + if len(launch) == 0: + launch = dict((k, v.replace('\n', '')) for k, v in call.parameters.items()) + else: + capture = dict((k, v.replace('\n', '')) for k, v in call.parameters.items()) + + def _maps(self, c): + interface = list(c.interface) + [n for n in c.nodes if 'DFF' in n.kind] + intf_pos = dict([(n.name, i) for i, n in enumerate(interface)]) + pi_map = [intf_pos[n] for n in self.signal_groups['_pi']] + po_map = [intf_pos[n] for n in self.signal_groups['_po']] + scan_maps = {} + scan_inversions = {} + for chain_name, chain in self.scan_chains.items(): + scan_map = [] + scan_in_inversion = [] + scan_out_inversion = [] + inversion = False + for n in chain[1:-1]: + if n == '!': + inversion = not inversion + else: + scan_in_inversion.append(inversion) + scan_in_inversion = list(reversed(scan_in_inversion)) + inversion = False + for n in reversed(chain[1:-1]): + if n == '!': + inversion = not inversion + else: + scan_map.append(intf_pos[n]) + scan_out_inversion.append(inversion) + scan_maps[chain[0]] = scan_map + scan_maps[chain[-1]] = scan_map + scan_inversions[chain[0]] = scan_in_inversion + scan_inversions[chain[-1]] = scan_out_inversion + return interface, pi_map, po_map, scan_maps, scan_inversions + + def tests(self, c): + interface, pi_map, po_map, scan_maps, scan_inversions = self._maps(c) + tests = PackedVectors(len(self.patterns), len(interface), 2) + for i, p in enumerate(self.patterns): + for si_port in self.si_ports.keys(): + tests.set_values(i, p.load[si_port], scan_maps[si_port], scan_inversions[si_port]) + tests.set_values(i, p.launch['_pi'], pi_map) + return tests + + def tests8v(self, c): + interface, pi_map, po_map, scan_maps, scan_inversions = self._maps(c) + init = PackedVectors(len(self.patterns), len(interface), 2) + for i, p in enumerate(self.patterns): + # init.set_values(i, '0' * len(interface)) + for si_port in self.si_ports.keys(): + init.set_values(i, p.load[si_port], scan_maps[si_port], scan_inversions[si_port]) + init.set_values(i, p.launch['_pi'], pi_map) + sim4v = LogicSim(c, len(init), 2) + sim4v.assign(init) + sim4v.propagate() + launch = init.copy() + sim4v.capture(launch) + for i, p in enumerate(self.patterns): + # if there was no launch clock, then init = launch + if ('P' not in p.launch['_pi']) or ('P' not in p.capture['_pi']): + for si_port in self.si_ports.keys(): + launch.set_values(i, p.load[si_port], scan_maps[si_port], scan_inversions[si_port]) + if 'P' in p.capture['_pi']: + launch.set_values(i, p.capture['_pi'], pi_map) + + return PackedVectors.from_pair(init, launch) + + def responses(self, c): + interface, pi_map, po_map, scan_maps, scan_inversions = self._maps(c) + resp = PackedVectors(len(self.patterns), len(interface), 2) + for i, p in enumerate(self.patterns): + resp.set_values(i, p.capture['_po'], po_map) + for so_port in self.so_ports.keys(): + resp.set_values(i, p.unload[so_port], scan_maps[so_port], scan_inversions[so_port]) + return resp + + +class StilTransformer(Transformer): + def __init__(self): + super().__init__() + self._signal_groups = None + self._calls = None + self._scan_chains = None + + @staticmethod + def quoted(args): return args[0][1:-1] + + @staticmethod + def call(args): return Call(args[0], dict(args[1:])) + + @staticmethod + def call_parameter(args): return args[0], args[1].value + + @staticmethod + def signal_group(args): return args[0], args[1:] + + @staticmethod + def scan_chain(args): + scan_in = None + scan_cells = None + scan_out = None + for t in args[1:]: + if t.data == 'scan_in': + scan_in = t.children[0] + elif t.data == 'scan_out': + scan_out = t.children[0] + if t.data == 'scan_cells': + scan_cells = [n.replace('.SI', '') for n in t.children] + scan_cells = [re.sub(r'.*\.', '', s) if '.' in s else s for s in scan_cells] + return args[0], ([scan_in] + scan_cells + [scan_out]) + + def signal_groups(self, args): self._signal_groups = dict(args) + + def pattern(self, args): self._calls = [c for c in args if isinstance(c, Call)] + + def scan_structures(self, args): self._scan_chains = dict(args) + + def start(self, args): + return StilFile(float(args[0]), self._signal_groups, self._scan_chains, self._calls) + + +def parse(stil): + grammar = r""" + start: "STIL" FLOAT _ignore _block* + _block: signal_groups | scan_structures | pattern + | "Header" _ignore + | "Signals" _ignore + | "Timing" _ignore + | "PatternBurst" quoted _ignore + | "PatternExec" _ignore + | "Procedures" _ignore + | "MacroDefs" _ignore + + signal_groups: "SignalGroups" "{" signal_group* "}" + signal_group: quoted "=" "'" quoted ( "+" quoted)* "'" _ignore? ";"? + + scan_structures: "ScanStructures" "{" scan_chain* "}" + scan_chain: "ScanChain" quoted "{" ( scan_length + | scan_in | scan_out | scan_inversion | scan_cells | scan_master_clock )* "}" + scan_length: "ScanLength" /[0-9]+/ ";" + scan_in: "ScanIn" quoted ";" + scan_out: "ScanOut" quoted ";" + scan_inversion: "ScanInversion" /[0-9]+/ ";" + scan_cells: "ScanCells" (quoted | /!/)* ";" + scan_master_clock: "ScanMasterClock" quoted ";" + + pattern: "Pattern" quoted "{" ( label | w | c | macro | ann | call )* "}" + label: quoted ":" + w: "W" quoted ";" + c: "C" _ignore + macro: "Macro" quoted ";" + ann: "Ann" _ignore + call: "Call" quoted "{" call_parameter* "}" + call_parameter: quoted "=" /[^;]+/ ";" + + quoted: /"[^"]*"/ + FLOAT: /[-0-9.]+/ + _ignore: "{" _NOB? _ignore_inner* "}" + _ignore_inner: "{" _NOB? _ignore_inner* "}" _NOB? + _NOB: /[^{}]+/ + %ignore ( /\r?\n/ | "//" /[^\n]*/ | /[\t\f ]/ )+ + """ + if '\n' not in str(stil): # One line?: Assuming it is a file name. + if str(stil).endswith('.gz'): + with gzip.open(stil, 'rt') as f: + text = f.read() + else: + with open(stil, 'r') as f: + text = f.read() + else: + text = str(stil) + return Lark(grammar, parser="lalr", transformer=StilTransformer()).parse(text) + + +def extract_scan_pattens(stil_calls): + pats = [] + pi = None + scan_in = None + for call in stil_calls: + if call.name == 'load_unload': + scan_out = call.parameters.get('Scan_Out') + if scan_out is not None: + scan_out = scan_out.replace('\n', '') + if pi: pats.append(ScanPattern(scan_in, pi, None, scan_out)) + scan_in = call.parameters.get('Scan_In') + if scan_in is not None: + scan_in = scan_in.replace('\n', '') + if call.name == 'allclock_capture': + pi = call.parameters['_pi'].replace('\n', '') + return pats + + +def match_patterns(stil_file, pats, interface): + intf_pos = dict([(n.name, i) for i, n in enumerate(interface)]) + pi_map = [intf_pos[n] for n in stil_file.signal_groups['_pi']] + scan_map = [intf_pos[re.sub(r'b..\.', '', n)] for n in reversed(stil_file.scan_chains['1'])] + # print(scan_map) + tests = PackedVectors(len(pats), len(interface), 2) + for i, p in enumerate(pats): + tests.set_values(i, p.scan_in, scan_map) + tests.set_values(i, p.pi, pi_map) + + resp = PackedVectors(len(pats), len(interface), 2) + for i, p in enumerate(pats): + resp.set_values(i, p.pi, pi_map) + resp.set_values(i, p.scan_out, scan_map) + + return tests, resp + diff --git a/kyupy/verilog.py b/kyupy/verilog.py new file mode 100644 index 0000000..2516a81 --- /dev/null +++ b/kyupy/verilog.py @@ -0,0 +1,161 @@ +from lark import Lark, Transformer +from collections import namedtuple +import gzip +from .circuit import Circuit, Node, Line +from .saed import pin_index, pin_is_output + +Instantiation = namedtuple('Instantiation', ['type', 'name', 'pins']) + + +class SignalDeclaration: + + def __init__(self, kind, tokens): + self.left = None + self.right = None + self.kind = kind + if len(tokens.children) == 1: + self.basename = tokens.children[0] + else: + self.basename = tokens.children[2] + self.left = int(tokens.children[0].value) + self.right = int(tokens.children[1].value) + + @property + def names(self): + if self.left is None: + return [self.basename] + if self.left <= self.right: + return [f'{self.basename}[{i}]' for i in range(self.left, self.right + 1)] + else: + return [f'{self.basename}[{i}]' for i in range(self.left, self.right - 1, -1)] + + def __repr__(self): + return f"{self.kind}:{self.basename}[{self.left}:{self.right}]" + + +class VerilogTransformer(Transformer): + def __init__(self, branchforks=False): + super().__init__() + self._signal_declarations = {} + self.branchforks = branchforks + + @staticmethod + def name(args): + s = args[0].value + if s[0] == '\\': + s = s[1:-1] + return s + + @staticmethod + def instantiation(args): + return Instantiation(args[0], args[1], + dict([(pin.children[0], pin.children[1]) for pin in args[2:]])) + + def input(self, args): + for sd in [SignalDeclaration('input', signal) for signal in args]: + self._signal_declarations[sd.basename] = sd + + def inout(self, args): + for sd in [SignalDeclaration('input', signal) for signal in args]: # just treat as input + self._signal_declarations[sd.basename] = sd + + def output(self, args): + for sd in [SignalDeclaration('output', signal) for signal in args]: + self._signal_declarations[sd.basename] = sd + + def wire(self, args): + for sd in [SignalDeclaration('wire', signal) for signal in args]: + self._signal_declarations[sd.basename] = sd + + def module(self, args): + c = Circuit(args[0]) + positions = {} + pos = 0 + for intf_sig in args[1].children: + for name in self._signal_declarations[intf_sig].names: + positions[name] = pos + pos += 1 + assignments = [] + for stmt in args[2:]: # pass 1: instantiate cells and driven signals + if type(stmt) is Instantiation: + n = Node(c, stmt.name, kind=stmt.type) + for p, s in stmt.pins.items(): + if pin_is_output(n.kind, p): + Line(c, (n, pin_index(stmt.type, p)), Node(c, s)) + elif stmt is not None and stmt.data == 'assign': + assignments.append((stmt.children[0], stmt.children[1])) + for sd in self._signal_declarations.values(): + if sd.kind == 'output' or sd.kind == 'input': + for name in sd.names: + n = Node(c, name, kind=sd.kind) + if name in positions: + c.interface[positions[name]] = n + if sd.kind == 'input': + Line(c, n, Node(c, name)) + for s1, s2 in assignments: # pass 1.5: process signal assignments + if s1 in c.forks: + assert s2 not in c.forks, 'assignment between two driven signals' + Line(c, c.forks[s1], Node(c, s2)) + elif s2 in c.forks: + assert s1 not in c.forks, 'assignment between two driven signals' + Line(c, c.forks[s2], Node(c, s1)) + for stmt in args[2:]: # pass 2: connect signals to readers + if type(stmt) is Instantiation: + for p, s in stmt.pins.items(): + n = c.cells[stmt.name] + if pin_is_output(n.kind, p): continue + if s.startswith("1'b"): + const = f'__const{s[3]}__' + if const not in c.cells: + Line(c, Node(c, const, const), Node(c, s)) + fork = c.forks[s] + if self.branchforks: + branchfork = Node(c, fork.name + "~" + n.name) + Line(c, fork, branchfork) + fork = branchfork + Line(c, fork, (n, pin_index(stmt.type, p))) + for sd in self._signal_declarations.values(): + if sd.kind == 'output': + for name in sd.names: + Line(c, c.forks[name], c.cells[name]) + return c + + @staticmethod + def start(args): + if len(args) == 1: + return args[0] + else: + return args + + +def parse(verilog, branchforks=False) -> Circuit: + grammar = """ + start: (module)* + module: "module" name parameters ";" (_statement)* "endmodule" + parameters: "(" [ name ( "," name )* ] ")" + _statement: input | output | inout | tri | wire | assign | instantiation + input: "input" signal ( "," signal )* ";" + output: "output" signal ( "," signal )* ";" + inout: "inout" signal ( "," signal )* ";" + tri: "tri" name ";" + wire: "wire" signal ( "," signal )* ";" + assign: "assign" name "=" name ";" + instantiation: name name "(" [ pin ( "," pin )* ] ")" ";" + pin: "." name "(" name ")" + signal: ( name | "[" /[0-9]+/ ":" /[0-9]+/ "]" name ) + + name: ( /[a-z_][a-z0-9_\\[\\]]*/i | /\\\\[^\\t \\r\\n]+[\\t \\r\\n](\\[[0-9]+\\])?/i | /1'b0/i | /1'b1/i ) + COMMENT: "//" /[^\\n]*/ + %ignore ( /\\r?\\n/ | COMMENT )+ + %ignore /[\\t \\f]+/ + """ + if '\n' not in str(verilog): # One line?: Assuming it is a file name. + if str(verilog).endswith('.gz'): + with gzip.open(verilog, 'rt') as f: + text = f.read() + else: + with open(verilog, 'r') as f: + text = f.read() + else: + text = str(verilog) + return Lark(grammar, parser="lalr", transformer=VerilogTransformer(branchforks)).parse(text) diff --git a/kyupy/wave_sim.py b/kyupy/wave_sim.py new file mode 100644 index 0000000..153d431 --- /dev/null +++ b/kyupy/wave_sim.py @@ -0,0 +1,401 @@ +import numpy as np +import math +from . import numba + + +TMAX = np.float32(2 ** 127) # almost np.PINF for 32-bit floating point values +TMIN = np.float32(-2 ** 127) # almost np.NINF for 32-bit floating point values + + +class WaveSim: + def __init__(self, circuit, timing, sims=8, wavecaps=16): + self.circuit = circuit + self.sims = sims + self.overflows = 0 + self.interface = list(circuit.interface) + [n for n in circuit.nodes if 'dff' in n.kind.lower()] + + self.lst_eat_valid = False + + self.cdata = np.zeros((len(self.interface), sims, 6), dtype='float32') + + if type(wavecaps) is int: + wavecaps = [wavecaps] * len(circuit.lines) + + intf_wavecap = 4 # sufficient for storing only 1 transition. + + # state allocation table. maps line and interface indices to self.state memory locations + + self.sat = np.zeros((len(circuit.lines) + 2 + 2 * len(self.interface), 2), dtype='int') + self.sat[:, 0] = -1 + filled = 0 + for lidx, cap in enumerate(wavecaps): + self.sat[lidx] = filled, cap + filled += cap + + self.zero_idx = len(circuit.lines) + self.sat[self.zero_idx] = filled, intf_wavecap + filled += intf_wavecap + self.tmp_idx = self.zero_idx + 1 + self.sat[self.tmp_idx] = filled, intf_wavecap + filled += intf_wavecap + + self.ppi_offset = self.tmp_idx + 1 + self.ppo_offset = self.ppi_offset + len(self.interface) + for i, n in enumerate(self.interface): + if len(n.outs) > 0: + self.sat[self.ppi_offset + i] = filled, intf_wavecap + filled += intf_wavecap + if len(n.ins) > 0: + self.sat[self.ppo_offset + i] = self.sat[n.ins[0].index] + + # pad timing + self.timing = np.zeros((len(self.sat), 2, 2)) + self.timing[:len(timing)] = timing + + # allocate self.state + self.state = np.zeros((filled, sims), dtype='float32') + TMAX + + # generate self.ops + ops = [] + interface_dict = dict([(n, i) for i, n in enumerate(self.interface)]) + for n in circuit.topological_order(): + if n in interface_dict: + inp_idx = self.ppi_offset + interface_dict[n] + if len(n.outs) > 0 and n.outs[0] is not None: + ops.append((0b1010, n.outs[0].index, inp_idx, self.zero_idx)) + if 'dff' in n.kind.lower(): + if len(n.outs) > 1 and n.outs[1] is not None: + ops.append((0b0101, n.outs[1].index, inp_idx, self.zero_idx)) + else: + for o_line in n.outs[1:]: + if o_line is not None: + ops.append((0b1010, o_line.index, inp_idx, self.zero_idx)) + else: + o0_idx = self.tmp_idx + i0_idx = self.zero_idx + i1_idx = self.zero_idx + if len(n.outs) > 0 and n.outs[0] is not None: + o0_idx = n.outs[0].index + else: + print(f'no outputs for {n}') + if len(n.ins) > 0 and n.ins[0] is not None: i0_idx = n.ins[0].index + if len(n.ins) > 1 and n.ins[1] is not None: i1_idx = n.ins[1].index + kind = n.kind.lower() + if kind == '__fork__': + for o_line in n.outs: + ops.append((0b1010, o_line.index, i0_idx, i1_idx)) + elif kind.startswith('nand'): + ops.append((0b0111, o0_idx, i0_idx, i1_idx)) + elif kind.startswith('nor'): + ops.append((0b0001, o0_idx, i0_idx, i1_idx)) + elif kind.startswith('and'): + ops.append((0b1000, o0_idx, i0_idx, i1_idx)) + elif kind.startswith('or'): + ops.append((0b1110, o0_idx, i0_idx, i1_idx)) + elif kind.startswith('xor'): + ops.append((0b0110, o0_idx, i0_idx, i1_idx)) + elif kind.startswith('xnor'): + ops.append((0b1001, o0_idx, i0_idx, i1_idx)) + elif kind.startswith('not') or kind.startswith('inv'): + ops.append((0b0101, o0_idx, i0_idx, i1_idx)) + elif kind.startswith('buf') or kind.startswith('nbuf'): + ops.append((0b1010, o0_idx, i0_idx, i1_idx)) + elif kind.startswith('__const1__') or kind.startswith('tieh'): + ops.append((0b0101, o0_idx, i0_idx, i1_idx)) + elif kind.startswith('__const0__') or kind.startswith('tiel'): + ops.append((0b1010, o0_idx, i0_idx, i1_idx)) + else: + print('unknown gate type', kind) + self.ops = np.asarray(ops, dtype='int32') + + # generate level data + levels = np.zeros(len(self.sat), dtype='int32') + level_starts = [0] + current_level = 1 + for i, op in enumerate(self.ops): + if levels[op[2]] >= current_level or levels[op[3]] >= current_level: + current_level += 1 + level_starts.append(i) + levels[op[1]] = current_level + self.level_starts = np.asarray(level_starts, dtype='int32') + self.level_stops = np.asarray(level_starts[1:] + [len(self.ops)], dtype='int32') + + m1 = np.array([2 ** x for x in range(7, -1, -1)], dtype='uint8') + m0 = ~m1 + self.mask = np.rollaxis(np.vstack((m0, m1)), 1) + + def get_line_delay(self, line, polarity): + return self.timing[line, 0, polarity] + + def set_line_delay(self, line, polarity, delay): + self.timing[line, 0, polarity] = delay + + def assign(self, vectors, time=0.0, offset=0): + nvectors = min(vectors.nvectors - offset, self.sims) + for i, node in enumerate(self.interface): + ppi_loc = self.sat[self.ppi_offset + i, 0] + if ppi_loc < 0: continue + for p in range(nvectors): + vector = p + offset + a = vectors.bits[i, :, vector // 8] + m = self.mask[vector % 8] + toggle = 0 + if a[0] & m[1]: + self.state[ppi_loc, p] = TMIN + toggle += 1 + if (len(a) > 2) and (a[2] & m[1]) and ((a[0] & m[1]) == (a[1] & m[1])): + self.state[ppi_loc + toggle, p] = time + toggle += 1 + self.state[ppi_loc + toggle, p] = TMAX + + def propagate(self, sims=None, sd=0.0, seed=1): + if sims is None: + sims = self.sims + else: + sims = min(sims, self.sims) + for op_start, op_stop in zip(self.level_starts, self.level_stops): + self.overflows += level_eval(self.ops, op_start, op_stop, self.state, self.sat, 0, sims, + self.timing, sd, seed) + self.lst_eat_valid = False + + def wave(self, line, vector): + if line < 0: + return [TMAX] + mem, wcap = self.sat[line] + if mem < 0: + return [TMAX] + return self.state[mem:mem + wcap, vector] + + def wave_ppi(self, i, vector): + return self.wave(self.ppi_offset + i, vector) + + def wave_ppo(self, o, vector): + return self.wave(self.ppo_offset + o, vector) + + def capture(self, time=TMAX, sd=0, seed=1, probabilities=None, offset=0): + for i, node in enumerate(self.interface): + if len(node.ins) == 0: continue + for p in range(self.sims): + self.cdata[i, p] = self.capture_wave(self.ppo_offset + i, p, time, sd, seed) + if probabilities is not None: + assert offset < probabilities.shape[1] + cap_dim = min(probabilities.shape[1] - offset, self.sims) + probabilities[:, offset:cap_dim + offset] = self.cdata[:, 0:cap_dim, 0] + self.lst_eat_valid = True + return self.cdata + + def reassign(self, time=0.0): + for i, node in enumerate(self.interface): + ppi_loc = self.sat[self.ppi_offset + i] + ppo_loc = self.sat[self.ppo_offset + i] + if ppi_loc < 0 or ppo_loc < 0: continue + for sidx in range(self.sims): + ival = self.val(self.ppi_offset + i, sidx, TMAX) > 0.5 + oval = self.cdata[i, sidx, 1] > 0.5 + toggle = 0 + if ival: + self.state[ppi_loc, sidx] = TMIN + toggle += 1 + if ival != oval: + self.state[ppi_loc + toggle, sidx] = time + toggle += 1 + self.state[ppi_loc + toggle, sidx] = TMAX + + def eat(self, line, vector): + eat = TMAX + for t in self.wave(line, vector): + if t >= TMAX: break + if t <= TMIN: continue + eat = min(eat, t) + return eat + + def lst(self, line, vector): + lst = TMIN + for t in self.wave(line, vector): + if t >= TMAX: break + if t <= TMIN: continue + lst = max(lst, t) + return lst + + def lst_ppo(self, o, vector): + if not self.lst_eat_valid: + self.capture() + return self.cdata[o, vector, 5] + + def toggles(self, line, vector): + tog = 0 + for t in self.wave(line, vector): + if t >= TMAX: break + if t <= TMIN: continue + tog += 1 + return tog + + def _vals(self, idx, vector, times, sd=0.0): + s_sqrt2 = sd * math.sqrt(2) + m = 0.5 + accs = [0.0] * len(times) + values = [0] * len(times) + for t in self.wave(idx, vector): + if t >= TMAX: break + for idx, time in enumerate(times): + if t < time: + values[idx] = values[idx] ^ 1 + m = -m + if t <= TMIN: continue + if s_sqrt2 > 0: + for idx, time in enumerate(times): + accs[idx] += m * (1 + math.erf((t - time) / s_sqrt2)) + if (m < 0) and (s_sqrt2 > 0): + for idx, time in enumerate(times): + accs[idx] += 1 + if s_sqrt2 == 0: + return values + else: + return accs + + def vals(self, line, vector, times, sd=0): + return self._vals(line, vector, times, sd) + + def val(self, line, vector, time=TMAX, sd=0): + return self.capture_wave(line, vector, time, sd)[0] + + def vals_ppo(self, o, vector, times, sd=0): + return self._vals(self.ppo_offset + o, vector, times, sd) + + def val_ppo(self, o, vector, time=TMAX, sd=0): + if not self.lst_eat_valid: + self.capture(time, sd) + return self.cdata[o, vector, 0] + + def capture_wave(self, line, vector, time=TMAX, sd=0.0, seed=1): + s_sqrt2 = sd * math.sqrt(2) + m = 0.5 + acc = 0.0 + eat = TMAX + lst = TMIN + tog = 0 + val = int(0) + final = int(0) + for t in self.wave(line, vector): + if t >= TMAX: break + m = -m + final ^= 1 + if t < time: + val ^= 1 + if t <= TMIN: continue + if s_sqrt2 > 0: + acc += m * (1 + math.erf((t - time) / s_sqrt2)) + eat = min(eat, t) + lst = max(lst, t) + tog += 1 + if s_sqrt2 > 0: + if m < 0: + acc += 1 + if acc >= 0.99: + val = 1 + elif acc > 0.01: + seed = (seed << 4) + (vector << 20) + (line-self.ppo_offset << 1) + seed = int(0xDEECE66D) * seed + 0xB + seed = int(0xDEECE66D) * seed + 0xB + rnd = float((seed >> 8) & 0xffffff) / float(1 << 24) + val = rnd < acc + else: + val = 0 + else: + acc = val + + return acc, val, final, (val != final), eat, lst + + +@numba.njit +def level_eval(ops, op_start, op_stop, state, sat, st_start, st_stop, line_times, sd, seed): + overflows = 0 + for op_idx in range(op_start, op_stop): + op = ops[op_idx] + for st_idx in range(st_start, st_stop): + overflows += wave_eval(op, state, sat, st_idx, line_times, sd, seed) + return overflows + + +@numba.njit +def rand_gauss(seed, sd): + clamp = 0.5 + if sd <= 0.0: + return 1.0 + while True: + x = -6.0 + for i in range(12): + seed = int(0xDEECE66D) * seed + 0xB + x += float((seed >> 8) & 0xffffff) / float(1 << 24) + x *= sd + if abs(x) <= clamp: + break + return x + 1.0 + + +@numba.njit +def wave_eval(op, state, sat, st_idx, line_times, sd=0.0, seed=0): + lut, z_idx, a_idx, b_idx = op + overflows = int(0) + + _seed = (seed << 4) + (z_idx << 20) + (st_idx << 1) + + a_mem = sat[a_idx, 0] + b_mem = sat[b_idx, 0] + z_mem, z_cap = sat[z_idx] + + a_cur = int(0) + b_cur = int(0) + z_cur = lut & 1 + if z_cur == 1: + state[z_mem, st_idx] = TMIN + + a = state[a_mem, st_idx] + line_times[a_idx, 0, z_cur] * rand_gauss(_seed ^ a_mem ^ z_cur, sd) + b = state[b_mem, st_idx] + line_times[b_idx, 0, z_cur] * rand_gauss(_seed ^ b_mem ^ z_cur, sd) + + previous_t = TMIN + + current_t = min(a, b) + inputs = int(0) + + while current_t < TMAX: + z_val = z_cur & 1 + if b < a: + b_cur += 1 + b = state[b_mem + b_cur, st_idx] + b += line_times[b_idx, 0, z_val ^ 1] * rand_gauss(_seed ^ b_mem ^ z_val ^ 1, sd) + thresh = line_times[b_idx, 1, z_val] * rand_gauss(_seed ^ b_mem ^ z_val, sd) + inputs ^= 2 + next_t = b + else: + a_cur += 1 + a = state[a_mem + a_cur, st_idx] + a += line_times[a_idx, 0, z_val ^ 1] * rand_gauss(_seed ^ a_mem ^ z_val ^ 1, sd) + thresh = line_times[a_idx, 1, z_val] * rand_gauss(_seed ^ a_mem ^ z_val, sd) + inputs ^= 1 + next_t = a + + if (z_cur & 1) != ((lut >> inputs) & 1): + # we generate a toggle in z_mem, if: + # ( it is the first toggle in z_mem OR + # following toggle is earlier OR + # pulse is wide enough ) AND enough space in z_mem. + if z_cur == 0 or next_t < current_t or (current_t - previous_t) > thresh: + if z_cur < (z_cap - 1): + state[z_mem + z_cur, st_idx] = current_t + previous_t = current_t + z_cur += 1 + else: + overflows += 1 + previous_t = state[z_mem + z_cur - 1, st_idx] + z_cur -= 1 + else: + z_cur -= 1 + if z_cur > 0: + previous_t = state[z_mem + z_cur - 1, st_idx] + else: + previous_t = TMIN + current_t = min(a, b) + + state[z_mem + z_cur, st_idx] = TMAX + return overflows diff --git a/kyupy/wave_sim_cuda.py b/kyupy/wave_sim_cuda.py new file mode 100644 index 0000000..9e92901 --- /dev/null +++ b/kyupy/wave_sim_cuda.py @@ -0,0 +1,282 @@ +import numpy as np +import math +from .wave_sim import WaveSim +from . import cuda + +TMAX = np.float32(2 ** 127) # almost np.PINF for 32-bit floating point values +TMIN = np.float32(-2 ** 127) # almost np.NINF for 32-bit floating point values + + +class WaveSimCuda(WaveSim): + def __init__(self, circuit, timing, sims=8, wavecaps=16): + super().__init__(circuit, timing, sims, wavecaps) + + self.tdata = np.zeros((len(self.interface), 3, (sims - 1) // 8 + 1), dtype='uint8') + + self.d_state = cuda.to_device(self.state) + self.d_sat = cuda.to_device(self.sat) + self.d_ops = cuda.to_device(self.ops) + self.d_timing = cuda.to_device(self.timing) + self.d_tdata = cuda.to_device(self.tdata) + self.d_cdata = cuda.to_device(self.cdata) + + self._block_dim = (32, 16) + + def get_line_delay(self, line, polarity): + return self.d_timing[line, 0, polarity] + + def set_line_delay(self, line, polarity, delay): + self.d_timing[line, 0, polarity] = delay + + def assign(self, vectors, time=0.0, offset=0): + assert (offset % 8) == 0 + byte_offset = offset // 8 + assert byte_offset < vectors.bits.shape[-1] + pdim = min(vectors.bits.shape[-1] - byte_offset, self.tdata.shape[-1]) + + self.tdata[..., 0:pdim] = vectors.bits[..., byte_offset:pdim + byte_offset] + if vectors.vdim == 1: + self.tdata[:, 1, 0:pdim] = ~self.tdata[:, 1, 0:pdim] + self.tdata[:, 2, 0:pdim] = 0 + cuda.to_device(self.tdata, to=self.d_tdata) + + grid_dim = self._grid_dim(self.sims, len(self.interface)) + assign_kernel[grid_dim, self._block_dim](self.d_state, self.d_sat, self.ppi_offset, + len(self.interface), self.d_tdata, time) + + def _grid_dim(self, x, y): + gx = math.ceil(x / self._block_dim[0]) + gy = math.ceil(y / self._block_dim[1]) + return gx, gy + + def propagate(self, sims=None, sd=0.0, seed=1): + if sims is None: + sims = self.sims + else: + sims = min(sims, self.sims) + for op_start, op_stop in zip(self.level_starts, self.level_stops): + grid_dim = self._grid_dim(sims, op_stop - op_start) + wave_kernel[grid_dim, self._block_dim](self.d_ops, op_start, op_stop, self.d_state, self.sat, int(0), + sims, self.d_timing, sd, seed) + cuda.synchronize() + self.lst_eat_valid = False + + def wave(self, line, vector): + if line < 0: + return None + mem, wcap = self.sat[line] + if mem < 0: + return None + return self.d_state[mem:mem + wcap, vector] + + def capture(self, time=TMAX, sd=0, seed=1, probabilities=None, offset=0): + grid_dim = self._grid_dim(self.sims, len(self.interface)) + capture_kernel[grid_dim, self._block_dim](self.d_state, self.d_sat, self.ppo_offset, + self.d_cdata, time, sd * math.sqrt(2), seed) + self.cdata[...] = self.d_cdata + if probabilities is not None: + assert offset < probabilities.shape[1] + cap_dim = min(probabilities.shape[1] - offset, self.sims) + probabilities[:, offset:cap_dim + offset] = self.cdata[:, 0:cap_dim, 0] + self.lst_eat_valid = True + return self.cdata + + def reassign(self, time=0.0): + grid_dim = self._grid_dim(self.sims, len(self.interface)) + reassign_kernel[grid_dim, self._block_dim](self.d_state, self.d_sat, self.ppi_offset, self.ppo_offset, + self.d_cdata, time) + cuda.synchronize() + + +@cuda.jit() +def reassign_kernel(state, sat, ppi_offset, ppo_offset, cdata, ppi_time): + vector, y = cuda.grid(2) + if vector >= state.shape[-1]: return + if ppo_offset + y >= len(sat): return + + ppo, ppo_cap = sat[ppo_offset + y] + ppi, ppi_cap = sat[ppi_offset + y] + if ppo < 0: return + if ppi < 0: return + + ppo_val = int(cdata[y, vector, 1]) + ppi_val = int(0) + for tidx in range(ppi_cap): + t = state[ppi + tidx, vector] + if t >= TMAX: break + ppi_val ^= 1 + + # make new waveform at PPI + toggle = 0 + if ppi_val: + state[ppi + toggle, vector] = TMIN + toggle += 1 + if ppi_val != ppo_val: + state[ppi + toggle, vector] = ppi_time + toggle += 1 + state[ppi + toggle, vector] = TMAX + + +@cuda.jit() +def capture_kernel(state, sat, ppo_offset, cdata, time, s_sqrt2, seed): + x, y = cuda.grid(2) + if ppo_offset + y >= len(sat): return + line, tdim = sat[ppo_offset + y] + if line < 0: return + if x >= state.shape[-1]: return + vector = x + m = 0.5 + acc = 0.0 + eat = TMAX + lst = TMIN + tog = 0 + val = int(0) + final = int(0) + for tidx in range(tdim): + t = state[line + tidx, vector] + if t >= TMAX: break + m = -m + final ^= 1 + if t < time: + val ^= 1 + if t <= TMIN: continue + if s_sqrt2 > 0: + acc += m * (1 + math.erf((t - time) / s_sqrt2)) + eat = min(eat, t) + lst = max(lst, t) + tog += 1 + if s_sqrt2 > 0: + if m < 0: + acc += 1 + if acc >= 0.99: + val = 1 + elif acc > 0.01: + seed = (seed << 4) + (vector << 20) + (y << 1) + seed = int(0xDEECE66D) * seed + 0xB + seed = int(0xDEECE66D) * seed + 0xB + rnd = float((seed >> 8) & 0xffffff) / float(1 << 24) + val = rnd < acc + else: + val = 0 + else: + acc = val + + cdata[y, vector, 0] = acc + cdata[y, vector, 1] = val + cdata[y, vector, 2] = final + cdata[y, vector, 3] = (val != final) + cdata[y, vector, 4] = eat + cdata[y, vector, 5] = lst + + +@cuda.jit() +def assign_kernel(state, sat, ppi_offset, intf_len, tdata, time): + x, y = cuda.grid(2) + if y >= intf_len: return + line = sat[ppi_offset + y, 0] + if line < 0: return + sdim = state.shape[-1] + if x >= sdim: return + vector = x + a0 = tdata[y, 0, vector // 8] + a1 = tdata[y, 1, vector // 8] + a2 = tdata[y, 2, vector // 8] + m = np.uint8(1 << (7 - (vector % 8))) + toggle = 0 + if a0 & m: + state[line + toggle, x] = TMIN + toggle += 1 + if (a2 & m) and ((a0 & m) == (a1 & m)): + state[line + toggle, x] = time + toggle += 1 + state[line + toggle, x] = TMAX + + +@cuda.jit(device=True) +def rand_gauss(seed, sd): + clamp = 0.5 + if sd <= 0.0: + return 1.0 + while True: + x = -6.0 + for i in range(12): + seed = int(0xDEECE66D) * seed + 0xB + x += float((seed >> 8) & 0xffffff) / float(1 << 24) + x *= sd + if abs(x) <= clamp: + break + return x + 1.0 + + +@cuda.jit() +def wave_kernel(ops, op_start, op_stop, state, sat, st_start, st_stop, line_times, sd, seed): + x, y = cuda.grid(2) + st_idx = st_start + x + op_idx = op_start + y + if st_idx >= st_stop: return + if op_idx >= op_stop: return + lut = ops[op_idx, 0] + z_idx = ops[op_idx, 1] + a_idx = ops[op_idx, 2] + b_idx = ops[op_idx, 3] + + z_mem, z_cap = sat[z_idx] + a_mem = sat[a_idx, 0] + b_mem = sat[b_idx, 0] + + _seed = (seed << 4) + (z_idx << 20) + (st_idx << 1) + + a_cur = int(0) + b_cur = int(0) + z_cur = lut & 1 + if z_cur == 1: + state[z_mem, st_idx] = TMIN + + a = state[a_mem, st_idx] + line_times[a_idx, 0, z_cur] * rand_gauss(_seed ^ a_mem ^ z_cur, sd) + b = state[b_mem, st_idx] + line_times[b_idx, 0, z_cur] * rand_gauss(_seed ^ b_mem ^ z_cur, sd) + + previous_t = TMIN + + current_t = min(a, b) + inputs = int(0) + + while current_t < TMAX: + z_val = z_cur & 1 + if b < a: + b_cur += 1 + b = state[b_mem + b_cur, st_idx] + b += line_times[b_idx, 0, z_val ^ 1] * rand_gauss(_seed ^ b_mem ^ z_val ^ 1, sd) + thresh = line_times[b_idx, 1, z_val] * rand_gauss(_seed ^ b_mem ^ z_val, sd) + inputs ^= 2 + next_t = b + else: + a_cur += 1 + a = state[a_mem + a_cur, st_idx] + a += line_times[a_idx, 0, z_val ^ 1] * rand_gauss(_seed ^ a_mem ^ z_val ^ 1, sd) + thresh = line_times[a_idx, 1, z_val] * rand_gauss(_seed ^ a_mem ^ z_val, sd) + inputs ^= 1 + next_t = a + + if (z_cur & 1) != ((lut >> inputs) & 1): + # we generate a toggle in z_mem, if: + # ( it is the first toggle in z_mem OR + # following toggle is earlier OR + # pulse is wide enough ) AND enough space in z_mem. + if z_cur == 0 or next_t < current_t or (current_t - previous_t) > thresh: + if z_cur < (z_cap - 1): + state[z_mem + z_cur, st_idx] = current_t + previous_t = current_t + z_cur += 1 + else: + # overflows += 1 + previous_t = state[z_mem + z_cur - 1, st_idx] + z_cur -= 1 + else: + z_cur -= 1 + if z_cur > 0: + previous_t = state[z_mem + z_cur - 1, st_idx] + else: + previous_t = TMIN + current_t = min(a, b) + + state[z_mem + z_cur, st_idx] = TMAX diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/b01.bench b/tests/b01.bench new file mode 100644 index 0000000..48ae6ca --- /dev/null +++ b/tests/b01.bench @@ -0,0 +1,64 @@ +# edf2bench v0.8 +# (!) 1997-2003 Giovanni Squillero +# +# Edf source: "b01/b01.edf" +# 2 inputs +# 2 outputs +# 5 D-type flipflops +# 10 inverters +# 39 gates (1 and, 28 nand, 1 or, 10 not) + + +INPUT(LINE1) +INPUT(LINE2) + +OUTPUT(OUTP_REG) +OUTPUT(OVERFLW_REG) + +OVERFLW_REG = DFF(U34) +STATO_REG_2_ = DFF(U45) +STATO_REG_1_ = DFF(U36) +STATO_REG_0_ = DFF(U35) +OUTP_REG = DFF(U44) + +U34 = AND(STATO_REG_1_, U38, STATO_REG_0_) +U35 = NAND(U68, U67, U66, U65) +U36 = NAND(U57, U55, U56) +U37 = OR(LINE2, LINE1) +U38 = NOT(STATO_REG_2_) +U39 = NOT(STATO_REG_1_) +U40 = NOT(LINE2) +U41 = NOT(LINE1) +U42 = NOT(STATO_REG_0_) +U43 = NAND(STATO_REG_1_, U42) +U44 = NAND(U73, U72) +U45 = NAND(U60, U59) +U46 = NAND(U70, U69) +U47 = NAND(LINE1, LINE2) +U48 = NAND(STATO_REG_2_, U43) +U49 = NOT(U37) +U50 = NAND(U49, U42) +U51 = NOT(U47) +U52 = NOT(U43) +U53 = NAND(U47, U43) +U54 = NAND(STATO_REG_2_, U47) +U55 = NAND(STATO_REG_0_, U39, U47) +U56 = NAND(U52, U54) +U57 = NAND(U62, U61, STATO_REG_2_) +U58 = NOT(U48) +U59 = NAND(U53, U38) +U60 = NAND(U50, U39, STATO_REG_2_) +U61 = NAND(STATO_REG_1_, U49) +U62 = NAND(U37, U42) +U63 = NAND(STATO_REG_0_, U47) +U64 = NAND(U51, U42) +U65 = NAND(U64, U63, U39, U38) +U66 = NAND(U43, U37, STATO_REG_2_) +U67 = NAND(U34, U47) +U68 = NAND(U51, U52) +U69 = NAND(LINE1, U40) +U70 = NAND(LINE2, U41) +U71 = NOT(U46) +U72 = NAND(U58, U71) +U73 = NAND(U46, U48) + diff --git a/tests/b01.v b/tests/b01.v new file mode 100644 index 0000000..aa809ad --- /dev/null +++ b/tests/b01.v @@ -0,0 +1,96 @@ +// +// Milkyway Hierarchical Verilog Dump: +// Generated on 04/25/2017 at 20:29:24 +// Design Generated by Consolidated Verilog Reader +// File produced by Consolidated Verilog Writer +// Library Name :lib_mw +// Cell Name :b01 +// Hierarchy delimiter:'/' +// Write Command : write_verilog b01.v -no_physical_only_cells -no_corner_pad_cells +// -no_pad_filler_cells +// + + +module b01 (line1 , Scan_Out , overflw , outp , Scan_In , Scan_Enable , + clock , reset , line2 ); +input line1 ; +output Scan_Out ; +output overflw ; +output outp ; +input Scan_In ; +input Scan_Enable ; +input clock ; +input reset ; +input line2 ; + + + +assign Scan_Out = overflw ; +SDFFARX1 stato_reg_1_0 (.QN ( n137 ) , .Q ( stato_1_N ) , .CLK ( clock ) + , .RSTB ( n86 ) , .SE ( Scan_Enable ) , .SI ( stato_0_N ) , .D ( n53 ) ) ; +INVX0 U1 (.ZN ( n1 ) , .INP ( n105 ) ) ; +SDFFARX1 outp_reg (.Q ( outp ) , .CLK ( clock ) , .RSTB ( n86 ) + , .SE ( Scan_Enable ) , .SI ( Scan_In ) , .D ( n54 ) ) ; +SDFFARX1 stato_reg_2_0 (.QN ( n148 ) , .Q ( stato_2_N ) , .CLK ( clock ) + , .RSTB ( n86 ) , .SE ( Scan_Enable ) , .SI ( stato_1_N ) , .D ( n55 ) ) ; +SDFFARX1 stato_reg_0_0 (.QN ( n149 ) , .Q ( stato_0_N ) , .CLK ( clock ) + , .RSTB ( n86 ) , .SE ( Scan_Enable ) , .SI ( outp ) , .D ( n56 ) ) ; +SDFFARX1 overflw_reg (.Q ( overflw ) , .CLK ( clock ) , .RSTB ( n86 ) + , .SE ( Scan_Enable ) , .SI ( stato_2_N ) , .D ( n61 ) ) ; +AND2X1 U108 (.IN1 ( n1 ) , .IN2 ( stato_0_N ) , .Q ( n133 ) ) ; +AND2X1 U109 (.IN1 ( n142 ) , .IN2 ( n136 ) , .Q ( n100 ) ) ; +NAND2X0 U110 (.IN1 ( n129 ) , .IN2 ( n128 ) , .QN ( n131 ) ) ; +NAND2X0 U111 (.IN1 ( n144 ) , .IN2 ( n132 ) , .QN ( n92 ) ) ; +NOR2X0 U112 (.QN ( n132 ) , .IN1 ( n133 ) , .IN2 ( n147 ) ) ; +OR2X1 U113 (.IN2 ( n1 ) , .IN1 ( n96 ) , .Q ( n144 ) ) ; +NAND2X0 U114 (.IN1 ( line1 ) , .IN2 ( n134 ) , .QN ( n128 ) ) ; +INVX0 U115 (.ZN ( n134 ) , .INP ( line2 ) ) ; +AND2X1 U116 (.IN1 ( n135 ) , .IN2 ( n148 ) , .Q ( n61 ) ) ; +NOR2X0 U117 (.QN ( n135 ) , .IN1 ( n149 ) , .IN2 ( n137 ) ) ; +NOR2X0 U118 (.QN ( n136 ) , .IN1 ( n137 ) , .IN2 ( n149 ) ) ; +NAND2X0 U119 (.IN1 ( n92 ) , .IN2 ( n137 ) , .QN ( n91 ) ) ; +AND2X1 U120 (.IN1 ( stato_2_N ) , .IN2 ( n131 ) , .Q ( n147 ) ) ; +NAND2X1 U121 (.IN2 ( n148 ) , .IN1 ( n109 ) , .QN ( n108 ) ) ; +NOR2X0 U122 (.QN ( n130 ) , .IN1 ( n1 ) , .IN2 ( n148 ) ) ; +NOR2X0 U123 (.QN ( n97 ) , .IN1 ( n104 ) , .IN2 ( n105 ) ) ; +AND2X1 U124 (.IN1 ( n106 ) , .IN2 ( n148 ) , .Q ( n104 ) ) ; +INVX0 U125 (.ZN ( n86 ) , .INP ( reset ) ) ; +NAND2X1 U127 (.IN2 ( n115 ) , .IN1 ( n114 ) , .QN ( n54 ) ) ; +NAND2X0 U128 (.IN1 ( stato_1_N ) , .IN2 ( n149 ) , .QN ( n140 ) ) ; +NAND2X0 U130 (.IN1 ( n129 ) , .IN2 ( n128 ) , .QN ( n142 ) ) ; +NAND2X0 U131 (.IN1 ( n129 ) , .IN2 ( n128 ) , .QN ( n103 ) ) ; +NAND2X0 U134 (.IN1 ( line2 ) , .IN2 ( line1 ) , .QN ( n105 ) ) ; +NAND2X0 U135 (.IN1 ( line2 ) , .IN2 ( n84 ) , .QN ( n129 ) ) ; +INVX0 U136 (.ZN ( n84 ) , .INP ( line1 ) ) ; +NAND2X0 U137 (.IN1 ( stato_1_N ) , .IN2 ( n149 ) , .QN ( n106 ) ) ; +NAND2X0 U138 (.IN1 ( n149 ) , .IN2 ( n148 ) , .QN ( n96 ) ) ; +NOR2X0 U139 (.QN ( n125 ) , .IN1 ( stato_1_N ) , .IN2 ( n1 ) ) ; +NAND2X0 U140 (.IN1 ( n105 ) , .IN2 ( n140 ) , .QN ( n109 ) ) ; +NOR2X0 U141 (.QN ( n122 ) , .IN1 ( n130 ) , .IN2 ( n140 ) ) ; +NOR2X0 U142 (.QN ( n114 ) , .IN1 ( n117 ) , .IN2 ( n118 ) ) ; +NAND2X0 U143 (.IN1 ( n117 ) , .IN2 ( n105 ) , .QN ( n120 ) ) ; +NAND2X0 U144 (.IN1 ( n107 ) , .IN2 ( n108 ) , .QN ( n55 ) ) ; +NAND2X0 U145 (.IN1 ( n120 ) , .IN2 ( n119 ) , .QN ( n53 ) ) ; +NAND2X0 U146 (.IN1 ( n91 ) , .IN2 ( n90 ) , .QN ( n56 ) ) ; +NAND2X0 U147 (.IN1 ( n110 ) , .IN2 ( n137 ) , .QN ( n107 ) ) ; +NOR2X0 U148 (.QN ( n119 ) , .IN1 ( n123 ) , .IN2 ( n122 ) ) ; +NOR2X0 U149 (.QN ( n90 ) , .IN1 ( n98 ) , .IN2 ( n97 ) ) ; +INVX0 U150 (.ZN ( n145 ) , .INP ( n112 ) ) ; +NAND2X0 U151 (.IN1 ( n105 ) , .IN2 ( n146 ) , .QN ( n110 ) ) ; +NOR2X0 U152 (.QN ( n146 ) , .IN1 ( n147 ) , .IN2 ( n145 ) ) ; +NOR2X0 U153 (.QN ( n123 ) , .IN1 ( n124 ) , .IN2 ( n149 ) ) ; +NOR2X0 U154 (.QN ( n98 ) , .IN1 ( n99 ) , .IN2 ( n1 ) ) ; +NOR2X0 U155 (.QN ( n124 ) , .IN1 ( n126 ) , .IN2 ( n125 ) ) ; +NOR2X0 U156 (.QN ( n99 ) , .IN1 ( n100 ) , .IN2 ( n61 ) ) ; +NAND2X0 U157 (.IN1 ( stato_2_N ) , .IN2 ( n140 ) , .QN ( n116 ) ) ; +NAND2X0 U158 (.IN1 ( stato_2_N ) , .IN2 ( stato_0_N ) , .QN ( n112 ) ) ; +NAND2X0 U159 (.IN1 ( n137 ) , .IN2 ( stato_2_N ) , .QN ( n121 ) ) ; +NAND2X0 U160 (.IN1 ( stato_2_N ) , .IN2 ( n1 ) , .QN ( n127 ) ) ; +NAND2X0 U161 (.IN1 ( n142 ) , .IN2 ( n116 ) , .QN ( n115 ) ) ; +NOR2X0 U162 (.QN ( n118 ) , .IN1 ( n131 ) , .IN2 ( n112 ) ) ; +NOR2X0 U163 (.QN ( n117 ) , .IN1 ( n121 ) , .IN2 ( n142 ) ) ; +NAND2X0 U164 (.IN1 ( n127 ) , .IN2 ( n113 ) , .QN ( n126 ) ) ; +NAND2X0 U165 (.IN1 ( stato_2_N ) , .IN2 ( n103 ) , .QN ( n113 ) ) ; +endmodule + + diff --git a/tests/b14.sdf.gz b/tests/b14.sdf.gz new file mode 100644 index 0000000..c0d6c7b Binary files /dev/null and b/tests/b14.sdf.gz differ diff --git a/tests/b14.stil.gz b/tests/b14.stil.gz new file mode 100644 index 0000000..a8cea1b Binary files /dev/null and b/tests/b14.stil.gz differ diff --git a/tests/b14.v.gz b/tests/b14.v.gz new file mode 100644 index 0000000..59c9911 Binary files /dev/null and b/tests/b14.v.gz differ diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..e82b2fc --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,8 @@ +import pytest + + +@pytest.fixture +def mydir(): + import os + from pathlib import Path + return Path(os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))) diff --git a/tests/gates.sdf b/tests/gates.sdf new file mode 100644 index 0000000..1652ff0 --- /dev/null +++ b/tests/gates.sdf @@ -0,0 +1,29 @@ +(DELAYFILE +(SDFVERSION "OVI 2.1") +(DESIGN "gates") +(DIVIDER /) +(VOLTAGE 1.20:1.20:1.20) +(PROCESS "TYPICAL") +(TEMPERATURE 25.00:25.00:25.00) +(TIMESCALE 1ns) +(CELL + (CELLTYPE "NAND2X1") + (INSTANCE nandgate) + (DELAY + (ABSOLUTE + (IOPATH IN1 QN (0.099:0.103:0.103) (0.122:0.127:0.127)) + (IOPATH IN2 QN (0.083:0.086:0.086) (0.100:0.104:0.104)) + ) + ) +) +(CELL + (CELLTYPE "AND2X1") + (INSTANCE andgate) + (DELAY + (ABSOLUTE + (IOPATH IN1 Q (0.367:0.378:0.378) (0.351:0.377:0.377)) + (IOPATH IN2 Q (0.366:0.375:0.375) (0.359:0.370:0.370)) + ) + ) +) +) \ No newline at end of file diff --git a/tests/gates.v b/tests/gates.v new file mode 100644 index 0000000..2fa07cd --- /dev/null +++ b/tests/gates.v @@ -0,0 +1,11 @@ +module gates (a, b, o0, o1 ); +input a; +input b; +output o0; +output o1; + +AND2X1 andgate (.IN1 ( a ) , .IN2 ( b ) , .Q ( o0 ) ) ; +NAND2X1 nandgate (.IN1 ( a ) , .IN2 ( b ) , .QN ( o1 ) ) ; + + +endmodule \ No newline at end of file diff --git a/tests/test_bench.py b/tests/test_bench.py new file mode 100644 index 0000000..800f9be --- /dev/null +++ b/tests/test_bench.py @@ -0,0 +1,15 @@ +from kyupy import bench + + +def test_b01(mydir): + with open(mydir / 'b01.bench', 'r') as f: + c = bench.parse(f.read()) + assert 92 == len(c.nodes) + c = bench.parse(mydir / 'b01.bench') + assert 92 == len(c.nodes) + + +def test_simple(): + c = bench.parse('input(a, b) output(z) z=and(a,b)') + assert len(c.nodes) == 4 + assert len(c.interface) == 3 diff --git a/tests/test_circuit.py b/tests/test_circuit.py new file mode 100644 index 0000000..d61e8aa --- /dev/null +++ b/tests/test_circuit.py @@ -0,0 +1,56 @@ +from kyupy.circuit import Circuit, Node, Line + + +def test_circuit(): + c = Circuit() + in1 = Node(c, 'in1', 'buf') + in2 = Node(c, 'in2', 'buf') + out1 = Node(c, 'out1', 'buf') + + assert 'in1' in c.cells + assert 'and1' not in c.cells + + c.interface[0] = in1 + c.interface[1] = in2 + c.interface[2] = out1 + + and1 = Node(c, 'and1', kind='and') + Line(c, in1, and1) + Line(c, in2, and1) + Line(c, and1, out1) + + assert len(in1.ins) == 0 + assert len(in1.outs) == 1 + assert len(in2.outs) == 1 + + assert in1.outs[0].reader == and1 + assert in1.outs[0].driver == in1 + + assert len(and1.ins) == 2 + assert len(and1.outs) == 1 + + or1 = Node(c, 'or1', 'or') + Line(c, and1, (or1, 1)) + + or2 = Node(c, 'or2', 'or') + or3 = Node(c, 'or3', 'or') + + assert or2.index == 5 + assert or3.index == 6 + + assert len(c.nodes) == 7 + or2.remove() + or3 = c.cells['or3'] + assert or3.index == 5 + assert 'or2' not in c.cells + assert len(c.nodes) == 6 + + c.cells['or3'].remove() + assert 'or3' not in c.cells + assert len(c.nodes) == 5 + + repr(c) + str(c) + + for n in c.topological_order(): + repr(n) diff --git a/tests/test_logic_sim.py b/tests/test_logic_sim.py new file mode 100644 index 0000000..df50546 --- /dev/null +++ b/tests/test_logic_sim.py @@ -0,0 +1,161 @@ +from kyupy.logic_sim import LogicSim +from kyupy import bench +from kyupy.packed_vectors import PackedVectors + + +def test_vd1(): + c = bench.parse('input(x, y) output(a, o, n) a=and(x,y) o=or(x,y) n=not(x)') + s = LogicSim(c, 4) + assert len(s.interface) == 5 + p = PackedVectors(4, len(s.interface)) + p[0] = '00000' + p[1] = '01000' + p[2] = '10000' + p[3] = '11000' + s.assign(p) + s.propagate() + s.capture(p) + assert p[0] == '00001' + assert p[1] == '01011' + assert p[2] == '10010' + assert p[3] == '11110' + + +def test_vd2(): + c = bench.parse('input(x, y) output(a, o, n) a=and(x,y) o=or(x,y) n=not(x)') + s = LogicSim(c, 16, 2) + assert len(s.interface) == 5 + p = PackedVectors(16, len(s.interface), 2) + p[0] = '00000' + p[1] = '01000' + p[2] = '0-000' + p[3] = '0X000' + p[4] = '10000' + p[5] = '11000' + p[6] = '1-000' + p[7] = '1X000' + p[8] = '-0000' + p[9] = '-1000' + p[10] = '--000' + p[11] = '-X000' + p[12] = 'X0000' + p[13] = 'X1000' + p[14] = 'X-000' + p[15] = 'XX000' + s.assign(p) + s.propagate() + s.capture(p) + assert p[0] == '00001' + assert p[1] == '01011' + assert p[2] == '0-0X1' + assert p[3] == '0X0X1' + assert p[4] == '10010' + assert p[5] == '11110' + assert p[6] == '1-X10' + assert p[7] == '1XX10' + assert p[8] == '-00XX' + assert p[9] == '-1X1X' + assert p[10] == '--XXX' + assert p[11] == '-XXXX' + assert p[12] == 'X00XX' + assert p[13] == 'X1X1X' + assert p[14] == 'X-XXX' + assert p[15] == 'XXXXX' + + +def test_vd3(): + c = bench.parse('input(x, y) output(a, o, n, xo) a=and(x,y) o=or(x,y) n=not(x) xo=xor(x,y)') + s = LogicSim(c, 64, 3) + assert len(s.interface) == 6 + p = PackedVectors(64, len(s.interface), 3) + p[0] = '000010' + p[1] = '010111' + p[2] = '0-0X1X' + p[3] = '0X0X1X' + p[4] = '0R0R1R' + p[5] = '0F0F1F' + p[6] = '0P0P1P' + p[7] = '0N0N1N' + p[8] = '100101' + p[9] = '111100' + p[10] = '1-X10X' + p[11] = '1XX10X' + p[12] = '1RR10F' + p[13] = '1FF10R' + p[14] = '1PP10N' + p[15] = '1NN10P' + p[16] = '-00XXX' + p[17] = '-1X1XX' + p[18] = '--XXXX' + p[19] = '-XXXXX' + p[20] = '-RXXXX' + p[21] = '-FXXXX' + p[22] = '-PXXXX' + p[23] = '-NXXXX' + p[24] = 'X00XXX' + p[25] = 'X1X1XX' + p[26] = 'X-XXXX' + p[27] = 'XXXXXX' + p[28] = 'XRXXXX' + p[29] = 'XFXXXX' + p[30] = 'XPXXXX' + p[31] = 'XNXXXX' + p[32] = 'R00RFR' + p[33] = 'R1R1FF' + p[34] = 'R-XXFX' + p[35] = 'RXXXFX' + p[36] = 'RRRRFP' + p[37] = 'RFPNFN' + p[38] = 'RPPRFR' + p[39] = 'RNRNFF' + p[40] = 'F00FRF' + p[41] = 'F1F1RR' + p[42] = 'F-XXRX' + p[43] = 'FXXXRX' + p[44] = 'FRPNRN' + p[45] = 'FFFFRP' + p[46] = 'FPPFRF' + p[47] = 'FNFNRR' + p[48] = 'P00PNP' + p[49] = 'P1P1NN' + p[50] = 'P-XXNX' + p[51] = 'PXXXNX' + p[52] = 'PRPRNR' + p[53] = 'PFPFNF' + p[54] = 'PPPPNP' + p[55] = 'PNPNNN' + p[56] = 'N00NPN' + p[57] = 'N1N1PP' + p[58] = 'N-XXPX' + p[59] = 'NXXXPX' + p[60] = 'NRRNPF' + p[61] = 'NFFNPR' + p[62] = 'NPPNPN' + p[63] = 'NNNNPP' + expect = p.copy() + s.assign(p) + s.propagate() + s.capture(p) + for i in range(64): + assert p[i] == expect[i] + + +def test_b01(mydir): + c = bench.parse(mydir / 'b01.bench') + + # 2-valued + s = LogicSim(c, 8) + assert len(s.interface) == 9 + t = PackedVectors(8, len(s.interface)) + t.randomize() + s.assign(t) + s.propagate() + s.capture(t) + + # 8-valued + s = LogicSim(c, 8, 3) + t = PackedVectors(8, len(s.interface), 3) + t.randomize() + s.assign(t) + s.propagate() + s.capture(t) diff --git a/tests/test_packed_vectors.py b/tests/test_packed_vectors.py new file mode 100644 index 0000000..2f2a4a0 --- /dev/null +++ b/tests/test_packed_vectors.py @@ -0,0 +1,88 @@ +from kyupy.packed_vectors import PackedVectors + + +def test_basic(): + ba = PackedVectors(8, 1, 1) + assert '0\n0\n0\n0\n0\n0\n0\n0' == str(ba) + ba.set_value(0, 0, 1) + ba.set_value(1, 0, 'H') + ba.set_value(2, 0, 'h') + ba.set_value(3, 0, True) + ba.set_value(4, 0, 0) + ba.set_value(5, 0, 'L') + ba.set_value(6, 0, 'l') + ba.set_value(7, 0, False) + assert '1\n1\n1\n1\n0\n0\n0\n0' == str(ba) + ba.set_value(1, 0, '0') + ba.set_value(5, 0, '1') + assert '1\n0\n1\n1\n0\n1\n0\n0' == str(ba) + ba = PackedVectors(8, 1, 2) + assert '-\n-\n-\n-\n-\n-\n-\n-' == str(ba) + ba.set_value(0, 0, 1) + ba.set_value(7, 0, 0) + ba.set_value(4, 0, 'X') + assert '1\n-\n-\n-\nX\n-\n-\n0' == str(ba) + ba.set_value(4, 0, '-') + assert '1\n-\n-\n-\n-\n-\n-\n0' == str(ba) + ba = PackedVectors(8, 2, 2) + assert '--\n--\n--\n--\n--\n--\n--\n--' == str(ba) + ba.set_value(0, 0, '1') + ba.set_value(7, 1, '0') + ba.set_values(1, 'XX') + assert '1-\nXX\n--\n--\n--\n--\n--\n-0' == str(ba) + + +def test_8v(): + ba = PackedVectors(1, 8, 3) + assert '--------' == str(ba) + ba.set_values(0, r'-x01^v\/') + assert r'-X01PNFR' == str(ba) + ba.set_values(0, '-XLHPNFR') + assert r'-X01PNFR' == str(ba) + ba.set_values(0, '-xlhpnfr') + assert r'-X01PNFR' == str(ba) + p1 = PackedVectors(1, 8, 1) + p2 = PackedVectors(1, 8, 1) + p1.set_values(0, '01010101') + p2.set_values(0, '00110011') + p = PackedVectors.from_pair(p1, p2) + assert r'0FR10FR1' == str(p) + p1 = PackedVectors(1, 8, 2) + p2 = PackedVectors(1, 8, 2) + p1.set_values(0, '0101-X-X') + p2.set_values(0, '00110011') + p = PackedVectors.from_pair(p1, p2) + assert r'0FR1----' == str(p) + p1.set_values(0, '0101-X-X') + p2.set_values(0, '-X-X--XX') + p = PackedVectors.from_pair(p1, p2) + assert r'--------' == str(p) + + +def test_slicing(): + lv = PackedVectors(3, 2, 1) + assert '00\n00\n00' == str(lv) + lv.set_value(1, 0, '1') + lv.set_value(1, 1, '1') + assert '00' == lv[0] + assert '11' == lv[1] + assert 3 == len(lv) + lv2 = lv[1:3] + assert 2 == len(lv2) + assert '11' == lv2[0] + assert '00' == lv2[1] + + +def test_copy(): + lv1 = PackedVectors(8, 1, 1) + lv1.set_values_for_position(0, '01010101') + lv2 = PackedVectors(8, 1, 1) + lv2.set_values_for_position(0, '00100101') + diff = lv1.diff(lv2) + lv3 = lv1.copy(selection_mask=diff) + assert str(lv3) == '1\n0\n1' + lv4 = lv1.copy(selection_mask=~diff) + assert str(lv4) == '0\n0\n1\n0\n1' + lv5 = lv3 + lv4 + assert str(lv5) == '1\n0\n1\n0\n0\n1\n0\n1' + diff --git a/tests/test_sdf.py b/tests/test_sdf.py new file mode 100644 index 0000000..61932fa --- /dev/null +++ b/tests/test_sdf.py @@ -0,0 +1,100 @@ +from kyupy import sdf, verilog +from kyupy.saed import pin_index + + +def test_parse(): + test = ''' + (DELAYFILE + (SDFVERSION "OVI 2.1") + (DESIGN "test") + (DATE "Wed May 31 14:46:06 2017") + (VENDOR "saed90nm_max") + (PROGRAM "Synopsys Design Compiler cmos-annotated") + (VERSION "I-2013.12-ICC-SP3") + (DIVIDER /) + (VOLTAGE 1.20:1.20:1.20) + (PROCESS "TYPICAL") + (TEMPERATURE 25.00:25.00:25.00) + (TIMESCALE 1ns) + (CELL + (CELLTYPE "b14") + (INSTANCE) + (DELAY + (ABSOLUTE + (INTERCONNECT U621/ZN U19246/IN1 (0.000:0.000:0.000)) + (INTERCONNECT U13292/QN U19246/IN2 (0.001:0.001:0.001)) + (INTERCONNECT U15050/QN U19247/IN1 (0.000:0.000:0.000)) + (INTERCONNECT U13293/QN U19247/IN2 (0.000:0.000:0.000) (0.000:0.000:0.000)) + ) + ) + ) + (CELL + (CELLTYPE "INVX2") + (INSTANCE U78) + (DELAY + (ABSOLUTE + (IOPATH INP ZN (0.201:0.227:0.227) (0.250:0.271:0.271)) + ) + ) + ) + (CELL + (CELLTYPE "SDFFARX1") + (INSTANCE reg3_reg_1_0) + (DELAY + (ABSOLUTE + (IOPATH (posedge CLK) Q (0.707:0.710:0.710) (0.737:0.740:0.740)) + (IOPATH (negedge RSTB) Q () (0.909:0.948:0.948)) + (IOPATH (posedge CLK) QN (0.585:0.589:0.589) (0.545:0.550:0.550)) + (IOPATH (negedge RSTB) QN (1.546:1.593:1.593) ()) + ) + ) + (TIMINGCHECK + (WIDTH (posedge CLK) (0.284:0.284:0.284)) + (WIDTH (negedge CLK) (0.642:0.642:0.642)) + (SETUP (posedge D) (posedge CLK) (0.544:0.553:0.553)) + (SETUP (negedge D) (posedge CLK) (0.620:0.643:0.643)) + (HOLD (posedge D) (posedge CLK) (-0.321:-0.331:-0.331)) + (HOLD (negedge D) (posedge CLK) (-0.196:-0.219:-0.219)) + (RECOVERY (posedge RSTB) (posedge CLK) (-1.390:-1.455:-1.455)) + (HOLD (posedge RSTB) (posedge CLK) (1.448:1.509:1.509)) + (SETUP (posedge SE) (posedge CLK) (0.662:0.670:0.670)) + (SETUP (negedge SE) (posedge CLK) (0.698:0.702:0.702)) + (HOLD (posedge SE) (posedge CLK) (-0.435:-0.444:-0.444)) + (HOLD (negedge SE) (posedge CLK) (-0.291:-0.295:-0.295)) + (SETUP (posedge SI) (posedge CLK) (0.544:0.544:0.544)) + (SETUP (negedge SI) (posedge CLK) (0.634:0.688:0.688)) + (HOLD (posedge SI) (posedge CLK) (-0.317:-0.318:-0.318)) + (HOLD (negedge SI) (posedge CLK) (-0.198:-0.247:-0.247)) + (WIDTH (negedge RSTB) (0.345:0.345:0.345)) + ))) + ''' + df = sdf.parse(test) + assert df.name == 'test' + # print(f'DelayFile(name={df.name}, interconnects={len(df.interconnects)}, iopaths={len(df.iopaths)})') + + +def test_b14(mydir): + df = sdf.parse(mydir / 'b14.sdf.gz') + assert df.name == 'b14' + + +def test_gates(mydir): + c = verilog.parse(mydir / 'gates.v') + df = sdf.parse(mydir / 'gates.sdf') + lt = df.annotation(c, pin_index, dataset=1) + nand_a = c.cells['nandgate'].ins[0] + nand_b = c.cells['nandgate'].ins[1] + and_a = c.cells['andgate'].ins[0] + and_b = c.cells['andgate'].ins[1] + + assert lt[nand_a.index, 0, 0] == 0.103 + assert lt[nand_a.index, 0, 1] == 0.127 + + assert lt[nand_b.index, 0, 0] == 0.086 + assert lt[nand_b.index, 0, 1] == 0.104 + + assert lt[and_a.index, 0, 0] == 0.378 + assert lt[and_a.index, 0, 1] == 0.377 + + assert lt[and_b.index, 0, 0] == 0.375 + assert lt[and_b.index, 0, 1] == 0.370 diff --git a/tests/test_stil.py b/tests/test_stil.py new file mode 100644 index 0000000..08faef3 --- /dev/null +++ b/tests/test_stil.py @@ -0,0 +1,9 @@ +from kyupy import stil + + +def test_b14(mydir): + s = stil.parse(mydir / 'b14.stil.gz') + assert 10 == len(s.signal_groups) + assert 1 == len(s.scan_chains) + assert 3259 == len(s.calls) + diff --git a/tests/test_verilog.py b/tests/test_verilog.py new file mode 100644 index 0000000..1a4aef9 --- /dev/null +++ b/tests/test_verilog.py @@ -0,0 +1,9 @@ +from kyupy import verilog + + +def test_b01(mydir): + with open(mydir / 'b01.v', 'r') as f: + modules = verilog.parse(f.read()) + assert modules is not None + assert verilog.parse(mydir / 'b01.v') is not None + diff --git a/tests/test_wave_sim.py b/tests/test_wave_sim.py new file mode 100644 index 0000000..38560da --- /dev/null +++ b/tests/test_wave_sim.py @@ -0,0 +1,138 @@ +import numpy as np +from kyupy.wave_sim import WaveSim, wave_eval, TMIN, TMAX +from kyupy.logic_sim import LogicSim +from kyupy import verilog +from kyupy import sdf +from kyupy.saed import pin_index +from kyupy.packed_vectors import PackedVectors +from kyupy.wave_sim_cuda import WaveSimCuda + + +def test_wave_eval(): + # SDF specifies IOPATH delays with respect to output polarity + # SDF pulse rejection value is determined by IOPATH causing last transition and polarity of last transition + line_times = np.zeros((3, 2, 2)) + line_times[0, 0, 0] = 0.1 # A -> Z rise delay + line_times[0, 0, 1] = 0.2 # A -> Z fall delay + line_times[0, 1, 0] = 0.1 # A -> Z negative pulse limit (terminate in rising Z) + line_times[0, 1, 1] = 0.2 # A -> Z positive pulse limit + line_times[1, 0, 0] = 0.3 # as above for B -> Z + line_times[1, 0, 1] = 0.4 + line_times[1, 1, 0] = 0.3 + line_times[1, 1, 1] = 0.4 + + state = np.zeros((3*16, 1)) + TMAX # 3 waveforms of capacity 16 + state[::16, 0] = 16 # first entry is capacity + a = state[0:16, 0] + b = state[16:32, 0] + z = state[32:, 0] + sat = np.zeros((3, 2), dtype='int') + sat[0] = 0, 16 + sat[1] = 16, 16 + sat[2] = 32, 16 + + wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times) + assert TMIN == z[0] + + a[0] = TMIN + wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times) + assert TMIN == z[0] + + b[0] = TMIN + wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times) + assert TMAX == z[0] + + a[0] = 1 # A _/^^^ + b[0] = 2 # B __/^^ + wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times) + assert TMIN == z[0] # ^^^\___ B -> Z fall delay + assert 2.4 == z[1] + assert TMAX == z[2] + + a[0] = TMIN # A ^^^^^^ + b[0] = TMIN # B ^^^\__ + b[1] = 2 + wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times) + assert 2.3 == z[0] # ___/^^^ B -> Z rise delay + assert TMAX == z[1] + + # pos pulse of 0.35 at B -> 0.45 after delays + a[0] = TMIN # A ^^^^^^^^ + b[0] = TMIN + b[1] = 2 # B ^^\__/^^ + b[2] = 2.35 + wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times) + assert 2.3 == z[0] # __/^^\__ + assert 2.75 == z[1] + assert TMAX == z[2] + + # neg pulse of 0.45 at B -> 0.35 after delays + a[0] = TMIN # A ^^^^^^^^ + b[0] = 2 # B __/^^\__ + b[1] = 2.45 + b[2] = TMAX + wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times) + assert TMIN == z[0] # ^^\__/^^ + assert 2.4 == z[1] + assert 2.75 == z[2] + assert TMAX == z[3] + + # neg pulse of 0.35 at B -> 0.25 after delays (filtered) + a[0] = TMIN # A ^^^^^^^^ + b[0] = 2 # B __/^^\__ + b[1] = 2.35 + b[2] = TMAX + wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times) + assert TMIN == z[0] # ^^^^^^ + assert TMAX == z[1] + + # pos pulse of 0.25 at B -> 0.35 after delays (filtered) + a[0] = TMIN # A ^^^^^^^^ + b[0] = TMIN + b[1] = 2 # B ^^\__/^^ + b[2] = 2.25 + wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times) + assert TMAX == z[0] # ______ + + +def compare_to_logic_sim(wsim): + tests = PackedVectors(wsim.sims, len(wsim.interface), 3) + tests.randomize() + wsim.assign(tests) + wsim.propagate(8) + cap = np.zeros((len(wsim.interface), wsim.sims)) + wsim.capture(probabilities=cap) + + resp = tests.copy() + + for iidx, inode in enumerate(wsim.interface): + if len(inode.ins) > 0: + for vidx in range(wsim.sims): + resp.set_value(vidx, iidx, 0 if cap[iidx, vidx] < 0.5 else 1) + + lsim = LogicSim(wsim.circuit, len(tests), 3) + lsim.assign(tests) + lsim.propagate() + exp = tests.copy() + lsim.capture(exp) + + for i in range(8): + exp_str = exp[i].replace('R', '1').replace('F', '0').replace('P', '0').replace('N', '1') + res_str = resp[i].replace('R', '1').replace('F', '0').replace('P', '0').replace('N', '1') + assert res_str == exp_str + + +def test_b14(mydir): + c = verilog.parse(mydir / 'b14.v.gz', branchforks=True) + df = sdf.parse(mydir / 'b14.sdf.gz') + lt = df.annotation(c, pin_index) + wsim = WaveSim(c, lt, 8) + compare_to_logic_sim(wsim) + + +def test_b14_cuda(mydir): + c = verilog.parse(mydir / 'b14.v.gz', branchforks=True) + df = sdf.parse(mydir / 'b14.sdf.gz') + lt = df.annotation(c, pin_index) + wsim = WaveSimCuda(c, lt, 8) + compare_to_logic_sim(wsim)