Browse Source

better docs, new techlib as default, fix tests

devel
Stefan Holst 1 year ago
parent
commit
35e727e714
  1. 2
      README.rst
  2. 1
      docs/index.rst
  3. 4
      docs/miscellaneous.rst
  4. 5
      docs/simulators.rst
  5. 7
      docs/techlib.rst
  6. 7
      src/kyupy/__init__.py
  7. 34
      src/kyupy/circuit.py
  8. 2
      src/kyupy/logic.py
  9. 40
      src/kyupy/logic_sim.py
  10. 29
      src/kyupy/sdf.py
  11. 31
      src/kyupy/stil.py
  12. 308
      src/kyupy/techlib.py
  13. 26
      src/kyupy/verilog.py
  14. 49
      src/kyupy/wave_sim.py
  15. 8
      tests/conftest.py
  16. 335
      tests/rng_haltonBase2.synth_yosys.v
  17. 7
      tests/test_sdf.py
  18. 20
      tests/test_stil.py
  19. 12
      tests/test_verilog.py
  20. 15
      tests/test_wave_sim.py

2
README.rst

@ -6,7 +6,7 @@ It contains fundamental building blocks for research software in the fields of V
* Efficient data structures for gate-level circuits and related design data. * Efficient data structures for gate-level circuits and related design data.
* Partial `lark <https://github.com/lark-parser/lark>`_ parsers for common design files like * Partial `lark <https://github.com/lark-parser/lark>`_ parsers for common design files like
bench, gate-level verilog, standard delay format (SDF), standard test interface language (STIL). bench, gate-level Verilog, standard delay format (SDF), standard test interface language (STIL), design exchange format (DEF).
* Bit-parallel gate-level 2-, 4-, and 8-valued logic simulation. * Bit-parallel gate-level 2-, 4-, and 8-valued logic simulation.
* GPU-accelerated high-throughput gate-level timing simulation. * GPU-accelerated high-throughput gate-level timing simulation.
* High-performance through the use of `numpy <https://numpy.org>`_ and `numba <https://numba.pydata.org>`_. * High-performance through the use of `numpy <https://numpy.org>`_ and `numba <https://numba.pydata.org>`_.

1
docs/index.rst

@ -8,6 +8,7 @@ API Reference
circuit circuit
logic logic
techlib
parsers parsers
simulators simulators
miscellaneous miscellaneous

4
docs/miscellaneous.rst

@ -4,7 +4,3 @@ Miscellaneous
.. automodule:: kyupy .. automodule:: kyupy
:members: :members:
.. automodule:: kyupy.techlib
:members:

5
docs/simulators.rst

@ -1,6 +1,11 @@
Simulators Simulators
========== ==========
KyuPy's simulators are optimized for cells with at most 4 inputs and 1 output.
More complex cells must be mapped to simulation primitives first.
Logic Simulation - :mod:`kyupy.logic_sim` Logic Simulation - :mod:`kyupy.logic_sim`
----------------------------------------- -----------------------------------------

7
docs/techlib.rst

@ -0,0 +1,7 @@
Technology Libraries
====================
.. automodule:: kyupy.techlib
:members:

7
src/kyupy/__init__.py

@ -1,6 +1,5 @@
"""A package for processing and analysis of non-hierarchical gate-level VLSI designs. """The kyupy package itself contains a miscellaneous utility functions.
The kyupy package itself contains a logger and other simple utility functions.
In addition, it defines a ``numba`` and a ``cuda`` objects that point to the actual packages In addition, it defines a ``numba`` and a ``cuda`` objects that point to the actual packages
if they are available and otherwise point to mocks. if they are available and otherwise point to mocks.
""" """
@ -22,7 +21,7 @@ def cdiv(x, y):
def popcount(a): def popcount(a):
"""Returns the number of 1-bits in a given packed numpy array.""" """Returns the number of 1-bits in a given packed numpy array of type ``uint8``."""
return np.sum(_pop_count_lut[a]) return np.sum(_pop_count_lut[a])
@ -81,6 +80,7 @@ def hr_time(seconds):
def batchrange(nitems, maxsize): def batchrange(nitems, maxsize):
"""A simple generator that produces offsets and sizes for batch-loops."""
for offset in range(0, nitems, maxsize): for offset in range(0, nitems, maxsize):
yield offset, min(nitems-offset, maxsize) yield offset, min(nitems-offset, maxsize)
@ -121,7 +121,6 @@ class Log:
self.start = time.perf_counter() self.start = time.perf_counter()
self.logfile = sys.stdout self.logfile = sys.stdout
"""When set to a file handle, log messages are written to it instead to standard output. """When set to a file handle, log messages are written to it instead to standard output.
After each write, ``flush()`` is called as well.
""" """
self.indent = 0 self.indent = 0
self._limit = -1 self._limit = -1

34
src/kyupy/circuit.py

@ -226,14 +226,14 @@ class Circuit:
"""A list of all :class:`Node` objects contained in the circuit. """A list of all :class:`Node` objects contained in the circuit.
The position of a node in this list equals its index :code:`self.nodes[42].index == 42`. The position of a node in this list equals its index :code:`self.nodes[42].index == 42`.
This list should not be changed directly. This list must not be changed directly.
Use the :class:`Node` constructor and :py:attr:`Node.remove()` to add and remove nodes. Use the :class:`Node` constructor and :py:attr:`Node.remove()` to add and remove nodes.
""" """
self.lines : list[Line] = IndexList() self.lines : list[Line] = IndexList()
"""A list of all :class:`Line` objects contained in the circuit. """A list of all :class:`Line` objects contained in the circuit.
The position of a line in this list equals its index :code:`self.lines[42].index == 42`. The position of a line in this list equals its index :code:`self.lines[42].index == 42`.
This list should not be changed directly. This list must not be changed directly.
Use the :class:`Line` constructor and :py:attr:`Line.remove()` to add and remove lines. Use the :class:`Line` constructor and :py:attr:`Line.remove()` to add and remove lines.
""" """
self.io_nodes : list[Node] = GrowingList() self.io_nodes : list[Node] = GrowingList()
@ -243,18 +243,27 @@ class Circuit:
The position of a node in the io_nodes list corresponds to positions of logic values in test vectors. The position of a node in the io_nodes list corresponds to positions of logic values in test vectors.
The port direction is not stored explicitly. The port direction is not stored explicitly.
Usually, nodes in the io_nodes list without any lines in their :py:attr:`Node.ins` list are primary inputs, Usually, nodes in the io_nodes list without any lines in their :py:attr:`Node.ins` list are primary inputs,
and nodes without any lines in their :py:attr:`Node.outs` list are regarded as primary outputs. and all other nodes in the io_nodes list are regarded as primary outputs.
""" """
self.cells : dict[str, Node] = {} self.cells : dict[str, Node] = {}
"""A dictionary to access cells by name. """A dictionary to access cells by name.
This dictionary must not be changed directly.
Use the :class:`Node` constructor and :py:attr:`Node.remove()` to add and remove nodes.
""" """
self.forks : dict[str, Node] = {} self.forks : dict[str, Node] = {}
"""A dictionary to access forks by name. """A dictionary to access forks by name.
This dictionary must not be changed directly.
Use the :class:`Node` constructor and :py:attr:`Node.remove()` to add and remove nodes.
""" """
@property @property
def s_nodes(self): def s_nodes(self):
"""A list of all io_nodes as well as all flip-flops and latches in the circuit (in that order). """A list of all primary I/Os as well as all flip-flops and latches in the circuit (in that order).
The s_nodes list defines the order of all ports and all sequential elements in the circuit.
This list is constructed on-the-fly. If used in some inner toop, consider caching the list for better performance.
""" """
return list(self.io_nodes) + [n for n in self.nodes if 'dff' in n.kind.lower()] + [n for n in self.nodes if 'latch' in n.kind.lower()] return list(self.io_nodes) + [n for n in self.nodes if 'dff' in n.kind.lower()] + [n for n in self.nodes if 'latch' in n.kind.lower()]
@ -275,7 +284,7 @@ class Circuit:
def s_locs(self, prefix): def s_locs(self, prefix):
"""Returns the indices of I/Os and sequential elements that start with given name prefix. """Returns the indices of I/Os and sequential elements that start with given name prefix.
The returned values are used to index into the :py:attr:`s_nodes` array. The returned values are used to index into the :py:attr:`s_nodes` list.
It works the same as :py:attr:`io_locs`. See there for more details. It works the same as :py:attr:`io_locs`. See there for more details.
""" """
return self._locs(prefix, self.s_nodes) return self._locs(prefix, self.s_nodes)
@ -299,7 +308,7 @@ class Circuit:
@property @property
def stats(self): def stats(self):
"""A dictionary with the number of all different elements in the circuit. """A dictionary with the counts of all different elements in the circuit.
The dictionary contains the number of all different kinds of nodes, the number The dictionary contains the number of all different kinds of nodes, the number
of lines, as well various sums like number of combinational gates, number of of lines, as well various sums like number of combinational gates, number of
@ -342,6 +351,10 @@ class Circuit:
information is not needed, such forks can be removed and the two neighbors information is not needed, such forks can be removed and the two neighbors
can be connected directly using one line. Forks that drive more than one node can be connected directly using one line. Forks that drive more than one node
are not removed by this function. are not removed by this function.
This function may remove some nodes and some lines from the circuit.
Therefore that indices of other nodes and lines may change to keep the indices consecutive.
It may therefore invalidate external data for nodes and lines.
""" """
ios = set(self.io_nodes) ios = set(self.io_nodes)
for n in list(self.forks.values()): for n in list(self.forks.values()):
@ -363,6 +376,11 @@ class Circuit:
The given node will be removed, the implementation is copied in and The given node will be removed, the implementation is copied in and
the signal lines are connected appropriately. The number and arrangement the signal lines are connected appropriately. The number and arrangement
of the input and output ports must match the pins of the replaced node. of the input and output ports must match the pins of the replaced node.
This function tries to preserve node and line indices as much as possible.
Usually, it only adds additional nodes and lines, preserving the order of
all existing nodes and lines. If an implementation is empty, however, nodes
and lines may get removed, changing indices and invalidating external data.
""" """
ios = set(impl.io_nodes) ios = set(impl.io_nodes)
impl_in_nodes = [n for n in impl.io_nodes if len(n.ins) == 0] impl_in_nodes = [n for n in impl.io_nodes if len(n.ins) == 0]
@ -422,9 +440,11 @@ class Circuit:
def resolve_tlib_cells(self, tlib): def resolve_tlib_cells(self, tlib):
"""Substitute all technology library cells with kyupy native simulation primitives. """Substitute all technology library cells with kyupy native simulation primitives.
See :py:attr:`substitute()` for more detail.
""" """
for n in list(self.nodes): for n in list(self.nodes):
if n.kind in tlib.cells:# and 'DFF' not in n.kind and 'LATCH' not in n.kind: if n.kind in tlib.cells:
self.substitute(n, tlib.cells[n.kind][0]) self.substitute(n, tlib.cells[n.kind][0])
def copy(self): def copy(self):

2
src/kyupy/logic.py

@ -32,7 +32,7 @@ The axis convention is as follows:
* The **last** axis goes along patterns/vectors. I.e. ``values[...,0]`` is pattern 0, ``values[...,1]`` is pattern 1, etc. * The **last** axis goes along patterns/vectors. I.e. ``values[...,0]`` is pattern 0, ``values[...,1]`` is pattern 1, etc.
* The **second-to-last** axis goes along the I/O and flip-flops of circuits. For a circuit ``c``, this axis is usually * The **second-to-last** axis goes along the I/O and flip-flops of circuits. For a circuit ``c``, this axis is usually
``len(c.s_nodes)`` long. The values of all inputs, outputs and flip-flops are stored within the same array and the location ``len(c.s_nodes)`` long. The values of all inputs, outputs and flip-flops are stored within the same array and the location
along the second-to-last axis is determined by the order in ``c.s_nodes``. along the second-to-last axis is determined by the order in :py:attr:`~kyupy.circuit.Circuit.s_nodes`.
Two storage formats are used in KyuPy: Two storage formats are used in KyuPy:

40
src/kyupy/logic_sim.py

@ -1,7 +1,7 @@
"""A high-throughput combinational logic simulator. """A high-throughput combinational logic simulator.
The class :py:class:`~kyupy.logic_sim.LogicSim` performs parallel simulations of the combinational part of a circuit. The class :py:class:`~kyupy.logic_sim.LogicSim` performs parallel simulations of the combinational part of a circuit.
The logic operations are performed bit-parallel on packed numpy arrays. The logic operations are performed bit-parallel on packed numpy arrays (see bit-parallel (bp) array description in :py:mod:`~kyupy.logic`).
Simple sequential circuits can be simulated by repeated assignments and propagations. Simple sequential circuits can be simulated by repeated assignments and propagations.
However, this simulator ignores the clock network and simply assumes that all state-elements are clocked all the time. However, this simulator ignores the clock network and simply assumes that all state-elements are clocked all the time.
""" """
@ -19,8 +19,8 @@ class LogicSim(sim.SimOps):
:param circuit: The circuit to simulate. :param circuit: The circuit to simulate.
:param sims: The number of parallel logic simulations to perform. :param sims: The number of parallel logic simulations to perform.
:param m: The arity of the logic, must be 2, 4, or 8. :param m: The arity of the logic, must be 2, 4, or 8.
:param c_reuse: If True, intermediate signal values may get overwritten when not needed anymore. :param c_reuse: If True, intermediate signal values may get overwritten when not needed anymore to save memory.
:param strip_forks: If True, forks are not included in the simulation model. :param strip_forks: If True, forks are not included in the simulation model to save memory and simulation time.
""" """
def __init__(self, circuit: Circuit, sims: int = 8, m: int = 8, c_reuse: bool = False, strip_forks: bool = False): def __init__(self, circuit: Circuit, sims: int = 8, m: int = 8, c_reuse: bool = False, strip_forks: bool = False):
assert m in [2, 4, 8] assert m in [2, 4, 8]
@ -34,10 +34,12 @@ class LogicSim(sim.SimOps):
self.s = np.zeros((2, self.s_len, 3, nbytes), dtype=np.uint8) self.s = np.zeros((2, self.s_len, 3, nbytes), dtype=np.uint8)
"""Logic values of the sequential elements (flip-flops) and ports. """Logic values of the sequential elements (flip-flops) and ports.
The elements are as follows: It is a pair of arrays in bit-parallel (bp) storage format:
* ``s[0]`` Assigned values. Simulator will read (P)PI value from here. * ``s[0]`` Assigned values. Simulator will read (P)PI value from here.
* ``s[1]`` Result values. Simulator will write (P)PO values here. * ``s[1]`` Result values. Simulator will write (P)PO values here.
Access this array to assign new values to the (P)PIs or read values from the (P)POs.
""" """
self.s[:,:,1,:] = 255 # unassigned self.s[:,:,1,:] = 255 # unassigned
@ -49,16 +51,10 @@ class LogicSim(sim.SimOps):
""" """
self.c[self.pippi_c_locs] = self.s[0, self.pippi_s_locs, :self.mdim] self.c[self.pippi_c_locs] = self.s[0, self.pippi_s_locs, :self.mdim]
def c_to_s(self): def c_prop(self, inject_cb=None):
"""Copies the results of the combinational portion to ``s[1]``. """Propagate the input values through the combinational circuit towards the outputs.
"""
self.s[1, self.poppo_s_locs, :self.mdim] = self.c[self.poppo_c_locs]
if self.mdim == 1:
self.s[1, self.poppo_s_locs, 1:2] = self.c[self.poppo_c_locs]
def c_prop(self, sims=None, inject_cb=None):
"""Propagate the input values towards the outputs (Perform all logic operations in topological order).
Performs all logic operations in topological order.
If the circuit is sequential (it contains flip-flops), one call simulates one clock cycle. If the circuit is sequential (it contains flip-flops), one call simulates one clock cycle.
:param inject_cb: A callback function for manipulating intermediate signal values. :param inject_cb: A callback function for manipulating intermediate signal values.
@ -67,13 +63,11 @@ class LogicSim(sim.SimOps):
resumes with the manipulated values after the callback returns. resumes with the manipulated values after the callback returns.
:type inject_cb: ``f(Line, ndarray)`` :type inject_cb: ``f(Line, ndarray)``
""" """
if sims is None: sims = self.sims
nbytes = (sims - 1) // 8 + 1
t0 = self.c_locs[self.tmp_idx] t0 = self.c_locs[self.tmp_idx]
t1 = self.c_locs[self.tmp2_idx] t1 = self.c_locs[self.tmp2_idx]
if self.m == 2: if self.m == 2:
if inject_cb is None: if inject_cb is None:
_prop_cpu(self.ops, self.c_locs, self.c[...,:nbytes]) _prop_cpu(self.ops, self.c_locs, self.c)
else: else:
for op, o0, i0, i1, i2, i3 in self.ops[:,:6]: for op, o0, i0, i1, i2, i3 in self.ops[:,:6]:
o0, i0, i1, i2, i3 = [self.c_locs[x] for x in (o0, i0, i1, i2, i3)] o0, i0, i1, i2, i3 = [self.c_locs[x] for x in (o0, i0, i1, i2, i3)]
@ -264,14 +258,21 @@ class LogicSim(sim.SimOps):
else: print(f'unknown op {op}') else: print(f'unknown op {op}')
if inject_cb is not None: inject_cb(o0, self.s[o0]) if inject_cb is not None: inject_cb(o0, self.s[o0])
def c_to_s(self):
"""Copies (captures) the results of the combinational portion to ``s[1]``.
"""
self.s[1, self.poppo_s_locs, :self.mdim] = self.c[self.poppo_c_locs]
if self.mdim == 1:
self.s[1, self.poppo_s_locs, 1:2] = self.c[self.poppo_c_locs]
def s_ppo_to_ppi(self): def s_ppo_to_ppi(self):
"""Constructs a new assignment based on the current data in ``s``. """Constructs a new assignment based on the current data in ``s``.
Use this function for simulating consecutive clock cycles. Use this function for simulating consecutive clock cycles.
For 2-valued or 4-valued simulations, all valued from PPOs (in ``s[1]``) and copied to the PPIs (in ``s[0]``). For 2-valued or 4-valued simulations, all valued from PPOs (in ``s[1]``) and copied to the PPIs (in ``s[0]``).
For 8-valued simulations, PPI transitions are constructed from the initial values of the assignment and the For 8-valued simulations, PPI transitions are constructed from the final values of the assignment (in ``s[0]``) and the
final values of the results. final values of the results (in ``s[1]``).
""" """
# TODO: handle latches correctly # TODO: handle latches correctly
if self.mdim < 3: if self.mdim < 3:
@ -282,11 +283,10 @@ class LogicSim(sim.SimOps):
self.s[0, self.ppio_s_locs, 2] = self.s[0, self.ppio_s_locs, 0] ^ self.s[0, self.ppio_s_locs, 1] # TODO: not correct for X, - self.s[0, self.ppio_s_locs, 2] = self.s[0, self.ppio_s_locs, 0] ^ self.s[0, self.ppio_s_locs, 1] # TODO: not correct for X, -
def cycle(self, cycles: int = 1, inject_cb=None): def cycle(self, cycles: int = 1, inject_cb=None):
"""Assigns the given state, propagates it and captures the new state. """Repeatedly assigns a state, propagates it, captures the new state, and transfers PPOs to PPIs.
:param cycles: The number of cycles to simulate. :param cycles: The number of cycles to simulate.
:param inject_cb: A callback function for manipulating intermediate signal values. See :py:func:`c_prop`. :param inject_cb: A callback function for manipulating intermediate signal values. See :py:func:`c_prop`.
:returns: The given state object.
""" """
for _ in range(cycles): for _ in range(cycles):
self.s_to_c() self.s_to_c()

29
src/kyupy/sdf.py

@ -3,9 +3,8 @@
This parser extracts pin-to-pin delay and interconnect delay information from SDF files. This parser extracts pin-to-pin delay and interconnect delay information from SDF files.
Sophisticated timing specifications (timing checks, conditional delays, etc.) are ignored. Sophisticated timing specifications (timing checks, conditional delays, etc.) are ignored.
The functions :py:func:`load` and :py:func:`read` return an intermediate representation (:class:`DelayFile` object). The functions :py:func:`parse` and :py:func:`load` return an intermediate representation (:class:`DelayFile` object).
Call :py:func:`DelayFile.iopaths` to match the intermediate representation to a given circuit. Call :py:func:`DelayFile.iopaths` and :py:func:`DelayFile.interconnects` to generate delay information for a given circuit.
""" """
from collections import namedtuple from collections import namedtuple
@ -16,6 +15,7 @@ from lark import Lark, Transformer
from . import log, readtext from . import log, readtext
from .circuit import Circuit from .circuit import Circuit
from .techlib import TechLib
Interconnect = namedtuple('Interconnect', ['orig', 'dest', 'r', 'f']) Interconnect = namedtuple('Interconnect', ['orig', 'dest', 'r', 'f'])
@ -34,17 +34,20 @@ class DelayFile:
return '\n'.join(f'{n}: {l}' for n, l in self.cells.items()) + '\n' + \ return '\n'.join(f'{n}: {l}' for n, l in self.cells.items()) + '\n' + \
'\n'.join(str(i) for i in self._interconnects) '\n'.join(str(i) for i in self._interconnects)
def iopaths(self, circuit:Circuit, tlib): def iopaths(self, circuit:Circuit, tlib:TechLib):
"""Constructs an ndarray containing all IOPATH delays. """Constructs an ndarray containing all IOPATH delays.
All IOPATH delays for a node ``n`` are annotated to the line connected to the input pin specified in the IOPATH. All IOPATH delays for a node ``n`` are annotated to the line connected to the input pin specified in the IOPATH.
Limited support of SDF spec: Limited support of SDF spec:
* ABSOLUTE delay values only
* two delvals per delval_list. First is rising/posedge, second is falling/negedge * Only ABSOLUTE delay values are supported.
* Only two delvals per delval_list is supported. First is rising/posedge, second is falling/negedge
transition at the output of the IOPATH (SDF spec, pp. 3-17). transition at the output of the IOPATH (SDF spec, pp. 3-17).
* PATHPULSE declarations are ignored. * PATHPULSE declarations are ignored.
The axes convention of KyuPy's delay data arrays is as follows:
* Axis 0: dataset (usually 3 datasets per SDF-file) * Axis 0: dataset (usually 3 datasets per SDF-file)
* Axis 1: line index (e.g. ``n.ins[0]``, ``n.ins[1]``) * Axis 1: line index (e.g. ``n.ins[0]``, ``n.ins[1]``)
* Axis 2: polarity of the transition at the IOPATH-input (e.g. at ``n.ins[0]`` or ``n.ins[1]``), 0='rising/posedge', 1='falling/negedge' * Axis 2: polarity of the transition at the IOPATH-input (e.g. at ``n.ins[0]`` or ``n.ins[1]``), 0='rising/posedge', 1='falling/negedge'
@ -75,20 +78,24 @@ class DelayFile:
return np.moveaxis(delays, -1, 0) return np.moveaxis(delays, -1, 0)
def interconnects(self, circuit, tlib): def interconnects(self, circuit:Circuit, tlib:TechLib):
"""Constructs an ndarray containing all INTERCONNECT delays. """Constructs an ndarray containing all INTERCONNECT delays.
To properly annotate interconnect delays, the circuit model has to include a '__fork__' node on To properly annotate interconnect delays, the circuit model has to include a '__fork__' node on
every signal and every fanout-branch. The Verilog parser aids in this by setting the parameter every signal and every fanout-branch. The Verilog parser aids in this by setting the parameter
`branchforks=True` in :py:func:`kyupy.verilog.parse` or :py:func:`kyupy.verilog.load`. `branchforks=True` in :py:func:`~kyupy.verilog.parse` or :py:func:`~kyupy.verilog.load`.
Limited support of SDF spec: Limited support of SDF spec:
* ABSOLUTE delay values only
* two delvals per delval_list. First is rising/posedge, second is falling/negedge transition. * Only ABSOLUTE delay values are supported.
* Only two delvals per delval_list is supported. First is rising/posedge, second is falling/negedge
transition.
* PATHPULSE declarations are ignored. * PATHPULSE declarations are ignored.
The axes convention of KyuPy's delay data arrays is as follows:
* Axis 0: dataset (usually 3 datasets per SDF-file) * Axis 0: dataset (usually 3 datasets per SDF-file)
* Axis 1: line index. usually input line of a __fork__ * Axis 1: line index. Usually input line of a __fork__.
* Axis 2: (axis of size 2 for compatability to IOPATH results. Values are broadcast along this axis.) * Axis 2: (axis of size 2 for compatability to IOPATH results. Values are broadcast along this axis.)
* Axis 3: polarity of the transition, 0='rising/posedge', 1='falling/negedge' * Axis 3: polarity of the transition, 0='rising/posedge', 1='falling/negedge'
""" """

31
src/kyupy/stil.py

@ -3,8 +3,8 @@
The main purpose of this parser is to load scan pattern sets from STIL files. The main purpose of this parser is to load scan pattern sets from STIL files.
It supports only a subset of STIL. It supports only a subset of STIL.
The functions :py:func:`load` and :py:func:`read` return an intermediate representation (:class:`StilFile` object). The functions :py:func:`parse` and :py:func:`load` return an intermediate representation (:py:class:`StilFile` object).
Call :py:func:`StilFile.tests`, :py:func:`StilFile.tests_loc`, or :py:func:`StilFile.responses` to Call :py:func:`StilFile.tests()`, :py:func:`StilFile.tests_loc()`, or :py:func:`StilFile.responses()` to
obtain the appropriate vector sets. obtain the appropriate vector sets.
""" """
@ -90,6 +90,11 @@ class StilFile:
"""Assembles and returns a scan test pattern set for given circuit. """Assembles and returns a scan test pattern set for given circuit.
This function assumes a static (stuck-at fault) test. This function assumes a static (stuck-at fault) test.
:param circuit: The circuit to assemble the patterns for. The patterns will follow the
:py:attr:`~kyupy.circuit.Circuit.s_nodes` ordering of the this circuit.
:return: A 4-valued multi-valued (mv) logic array (see :py:mod:`~kyupy.logic`).
The values for primary inputs and sequential elements are filled, the primary outputs are left unassigned.
""" """
interface, pi_map, _, scan_maps, scan_inversions = self._maps(circuit) interface, pi_map, _, scan_maps, scan_inversions = self._maps(circuit)
tests = np.full((len(interface), len(self.patterns)), logic.UNASSIGNED) tests = np.full((len(interface), len(self.patterns)), logic.UNASSIGNED)
@ -109,6 +114,20 @@ class StilFile:
This function assumes a launch-on-capture (LoC) delay test. This function assumes a launch-on-capture (LoC) delay test.
It performs a logic simulation to obtain the first capture pattern (the one that launches the delay It performs a logic simulation to obtain the first capture pattern (the one that launches the delay
test) and assembles the test pattern set from from pairs for initialization- and launch-patterns. test) and assembles the test pattern set from from pairs for initialization- and launch-patterns.
:param circuit: The circuit to assemble the patterns for. The patterns will follow the
:py:attr:`~kyupy.circuit.Circuit.s_nodes` ordering of the this circuit.
:param init_filter: A function for filtering the initialization patterns. This function is called
with the initialization patterns from the STIL file as mvarray before logic simulation.
It shall return an mvarray with the same shape. This function can be used, for example, to fill
patterns.
:param launch_filter: A function for filtering the launch patterns. This function is called
with the launch patterns generated by logic simulation before they are combined with
the initialization patterns to form the final 8-valued test patterns.
The function shall return an mvarray with the same shape. This function can be used, for example, to fill
patterns.
:return: An 8-valued multi-valued (mv) logic array (see :py:mod:`~kyupy.logic`). The values for primary
inputs and sequential elements are filled, the primary outputs are left unassigned.
""" """
interface, pi_map, po_map, scan_maps, scan_inversions = self._maps(circuit) interface, pi_map, po_map, scan_maps, scan_inversions = self._maps(circuit)
init = np.full((len(interface), len(self.patterns)), logic.UNASSIGNED) init = np.full((len(interface), len(self.patterns)), logic.UNASSIGNED)
@ -142,7 +161,13 @@ class StilFile:
return logic.mv_transition(init, launch) return logic.mv_transition(init, launch)
def responses(self, circuit): def responses(self, circuit):
"""Assembles and returns a scan test response pattern set for given circuit.""" """Assembles and returns a scan test response pattern set for given circuit.
:param circuit: The circuit to assemble the patterns for. The patterns will follow the
:py:attr:`~kyupy.circuit.Circuit.s_nodes` ordering of the this circuit.
:return: A 4-valued multi-valued (mv) logic array (see :py:mod:`~kyupy.logic`).
The values for primary outputs and sequential elements are filled, the primary inputs are left unassigned.
"""
interface, _, po_map, scan_maps, scan_inversions = self._maps(circuit) interface, _, po_map, scan_maps, scan_inversions = self._maps(circuit)
resp = np.full((len(interface), len(self.patterns)), logic.UNASSIGNED) resp = np.full((len(interface), len(self.patterns)), logic.UNASSIGNED)
for i, p in enumerate(self.patterns): for i, p in enumerate(self.patterns):

308
src/kyupy/techlib.py

@ -1,21 +1,19 @@
"""KyuPy's Built-In Technology Libraries
Technology libraries provide cell definitions and their implementation with simulation primitives.
A couple of common standard cell libraries are built-in.
Others can be easily added by providing a bench-like description of the cells.
"""
import re import re
from itertools import product from itertools import product
from .circuit import Node, Line
from . import bench from . import bench
class TechLib: class TechLibOld:
"""Provides some information specific to standard cell libraries necessary
for loading gate-level designs. :py:class:`~kyupy.circuit.Node` objects do not
have pin names. The methods defined here map pin names to pin directions and defined
positions in the ``node.ins`` and ``node.outs`` lists. The default implementation
provides mappings for SAED-inspired standard cell libraries.
"""
@staticmethod @staticmethod
def pin_index(kind, pin): def pin_index(kind, pin):
"""Returns a pin list position for a given node kind and pin name."""
if isinstance(pin, int): if isinstance(pin, int):
return max(0, pin-1) return max(0, pin-1)
if kind[:3] in ('OAI', 'AOI'): if kind[:3] in ('OAI', 'AOI'):
@ -51,16 +49,24 @@ class TechLib:
@staticmethod @staticmethod
def pin_is_output(kind, pin): def pin_is_output(kind, pin):
"""Returns True, if given pin name of a node kind is an output."""
if isinstance(pin, int): if isinstance(pin, int):
return pin == 0 return pin == 0
if 'MUX' in kind and pin == 'S': return False if 'MUX' in kind and pin == 'S': return False
return pin in ('Q', 'QN', 'Z', 'ZN', 'Y', 'CO', 'S', 'SO', 'C1') return pin in ('Q', 'QN', 'Z', 'ZN', 'Y', 'CO', 'S', 'SO', 'C1')
class TechLibNew: class TechLib:
"""Class for standard cell library definitions.
:py:class:`~kyupy.circuit.Node` objects do not have pin names.
This class maps pin names to pin directions and defined positions in the ``node.ins`` and ``node.outs`` lists.
Furthermore, it gives access to implementations of complex cells. See also :py:func:`~kyupy.circuit.substitute` and
:py:func:`~kyupy.circuit.resolve_tlib_cells`.
"""
def __init__(self, lib_src): def __init__(self, lib_src):
self.cells = dict() self.cells = dict()
"""A dictionary with pin definitions and circuits for each cell kind (type).
"""
for c_str in re.split(r';\s+', lib_src): for c_str in re.split(r';\s+', lib_src):
c_str = re.sub(r'^\s+', '', c_str) c_str = re.sub(r'^\s+', '', c_str)
name_len = c_str.find(' ') name_len = c_str.find(' ')
@ -82,16 +88,56 @@ class TechLibNew:
self.cells[name] = (c, pin_dict) self.cells[name] = (c, pin_dict)
def pin_index(self, kind, pin): def pin_index(self, kind, pin):
"""Returns a pin list position for a given node kind and pin name."""
assert kind in self.cells, f'Unknown cell: {kind}' assert kind in self.cells, f'Unknown cell: {kind}'
assert pin in self.cells[kind][1], f'Unknown pin: {pin} for cell {kind}' assert pin in self.cells[kind][1], f'Unknown pin: {pin} for cell {kind}'
return self.cells[kind][1][pin][0] return self.cells[kind][1][pin][0]
def pin_is_output(self, kind, pin): def pin_is_output(self, kind, pin):
"""Returns True, if given pin name of a node kind is an output."""
assert kind in self.cells, f'Unknown cell: {kind}' assert kind in self.cells, f'Unknown cell: {kind}'
assert pin in self.cells[kind][1], f'Unknown pin: {pin} for cell {kind}' assert pin in self.cells[kind][1], f'Unknown pin: {pin} for cell {kind}'
return self.cells[kind][1][pin][1] return self.cells[kind][1][pin][1]
GSC180 = TechLib(r"""
BUFX{1,3} input(A) output(Y) Y=BUF1(A) ;
CLKBUFX{1,2,3} input(A) output(Y) Y=BUF1(A) ;
INVX{1,2,4,8} input(A) output(Y) Y=INV1(A) ;
TBUFX{1,2,4,8} input(A,OE) output(Y) Y=AND2(A,OE) ;
TINVX1 input(A,OE) output(Y) AB=INV1(A) Y=AND2(AB,OE) ;
AND2X1 input(A,B) output(Y) Y=AND2(A,B) ;
NAND2X{1,2} input(A,B) output(Y) Y=NAND2(A,B) ;
NAND3X1 input(A,B,C) output(Y) Y=NAND3(A,B,C) ;
NAND4X1 input(A,B,C,D) output(Y) Y=NAND4(A,B,C,D) ;
OR2X1 input(A,B) output(Y) Y=OR2(A,B) ;
OR4X1 input(A,B,C,D) output(Y) Y=OR4(A,B,C,D) ;
NOR2X1 input(A,B) output(Y) Y=NOR2(A,B) ;
NOR3X1 input(A,B,C) output(Y) Y=NOR3(A,B,C) ;
NOR4X1 input(A,B,C,D) output(Y) Y=NOR4(A,B,C,D) ;
XOR2X1 input(A,B) output(Y) Y=XOR2(A,B) ;
MX2X1 input(A,B,S0) output(Y) Y=MUX21(A,B,S0) ;
AOI21X1 input(A0,A1,B0) output(Y) Y=AOI21(A0,A1,B0) ;
AOI22X1 input(A0,A1,B0,B1) output(Y) Y=AOI22(A0,A1,B0,B1) ;
OAI21X1 input(A0,A1,B0) output(Y) Y=OAI21(A0,A1,B0) ;
OAI22X1 input(A0,A1,B0,B1) output(Y) Y=OAI22(A0,A1,B0,B1) ;
OAI33X1 input(A0,A1,A2,B0,B1,B2) output(Y) AA=OR2(A0,A1) BB=OR2(B0,B1) Y=OAI22(AA,A2,BB,B2) ;
ADDFX1 input(A,B,CI) output(CO,S) AB=XOR2(A,B) CO=XOR2(AB,CI) S=AO22(AB,CI,A,B) ;
ADDHX1 input(A,B) output(CO,S) CO=XOR2(A,B) S=AND2(A,B) ;
DFFX1 input(CK,D) output(Q,QN) Q=DFF(D,CK) QN=INV1(Q) ;
DFFSRX1 input(CK,D,RN,SN) output(Q,QN) DR=AND2(D,RN) SET=INV1(SN) DRS=OR2(DR,SET) Q=DFF(DRS,CK) QN=INV1(Q) ;
SDFFSRX1 input(CK,D,RN,SE,SI,SN) output(Q,QN) DR=AND2(D,RN) SET=INV1(SN) DRS=OR2(DR,SET) DI=MUX21(DRS,SI,SE) Q=DFF(DI,CK) QN=INV1(Q) ;
TLATSRX1 input(D,G,RN,SN) output(Q,QN) DR=AND2(D,RN) SET=INV1(SN) DRS=OR2(DR,SET) Q=LATCH(DRS,G) QN=INV1(Q) ;
TLATX1 input(C,D) output(Q,QN) Q=LATCH(D,C) QN=INV1(Q) ;
""")
"""The GSC 180nm generic standard cell library.
"""
_nangate_common = r""" _nangate_common = r"""
FILLCELL_X{1,2,4,8,16,32} ; FILLCELL_X{1,2,4,8,16,32} ;
@ -153,21 +199,7 @@ DLL_X{1,2} input(D,GN) output(Q) G=INV1(GN) Q=LATCH(D,G) ;
""" """
NANGATE_ZN = TechLibNew(_nangate_common + r""" NANGATE = TechLib(_nangate_common + r"""
INV_X{1,2,4,8,16,32} input(A) output(ZN) ZN=INV1(A) ;
AND2_X{1,2,4} input(A1,A2) output(ZN) ZN=AND2(A1,A2) ;
AND3_X{1,2,4} input(A1,A2,A3) output(ZN) ZN=AND3(A1,A2,A3) ;
AND4_X{1,2,4} input(A1,A2,A3,A4) output(ZN) ZN=AND4(A1,A2,A3,A4) ;
OR2_X{1,2,4} input(A1,A2) output(ZN) ZN=OR2(A1,A2) ;
OR3_X{1,2,4} input(A1,A2,A3) output(ZN) ZN=OR3(A1,A2,A3) ;
OR4_X{1,2,4} input(A1,A2,A3,A4) output(ZN) ZN=OR4(A1,A2,A3,A4) ;
XOR2_X{1,2} input(A,B) output(Z) Z=XOR2(A,B) ;
XNOR2_X{1,2} input(A,B) output(ZN) ZN=XNOR2(A,B) ;
""")
NANGATE = TechLibNew(_nangate_common + r"""
INV_X{1,2,4,8,16,32} input(I) output(ZN) ZN=INV1(I) ; INV_X{1,2,4,8,16,32} input(I) output(ZN) ZN=INV1(I) ;
AND2_X{1,2,4} input(A1,A2) output(Z) Z=AND2(A1,A2) ; AND2_X{1,2,4} input(A1,A2) output(Z) Z=AND2(A1,A2) ;
@ -179,99 +211,27 @@ OR4_X{1,2,4} input(A1,A2,A3,A4) output(Z) Z=OR4(A1,A2,A3,A4) ;
XOR2_X{1,2} input(A1,A2) output(Z) Z=XOR2(A1,A2) ; XOR2_X{1,2} input(A1,A2) output(Z) Z=XOR2(A1,A2) ;
XNOR2_X{1,2} input(A1,A2) output(ZN) ZN=XNOR2(A1,A2) ; XNOR2_X{1,2} input(A1,A2) output(ZN) ZN=XNOR2(A1,A2) ;
""") """)
"""An newer NANGATE-variant that uses 'Z' as output pin names for AND and OR gates.
"""
# SAED90nm and SAED32nm libraries. NANGATE_ZN = TechLib(_nangate_common + r"""
# not included here: negative-edge flip-flops, tri-state, latches, clock gating, level shifters INV_X{1,2,4,8,16,32} input(A) output(ZN) ZN=INV1(A) ;
SAED90 = TechLibNew(r"""
NBUFFX{2,4,8,16,32}$ input(INP) output(Z) Z=BUF1(INP) ;
AOBUFX{1,2,4}$ input(INP) output(Z) Z=BUF1(INP) ;
DELLN{1,2,3}X2$ input(INP) output(Z)Z=BUF1(INP) ;
INVX{0,1,2,4,8,16,32}$ input(INP) output(ZN) ZN=INV1(INP) ;
AOINVX{1,2,4}$ input(INP) output(ZN) ZN=INV1(INP) ;
IBUFFX{2,4,8,16,32}$ input(INP) output(ZN) ZN=INV1(INP) ;
TIEH$ output(Z) Z=__const1__() ;
TIEL$ output(ZN) ZN=__const0__() ;
HEAD2X{2,4,8,16,32}$ input(SLEEP) output(SLEEPOUT) SLEEPOUT=BUF1(SLEEP) ;
HEADX{2,4,8,16,32}$ input(SLEEP) ;
ANTENNA$ input(INP) ;
CLOAD1$ input(INP) ;
DCAP$ ;
DHFILL{HLH,LHL}2 ;
DHFILLHLHLS11$ ;
SHFILL{1,2,3,64,128}$ ;
AND2X{1,2,4}$ input(IN1,IN2) output(Q) Q=AND2(IN1,IN2) ;
AND3X{1,2,4}$ input(IN1,IN2,IN3) output(Q) Q=AND3(IN1,IN2,IN3) ;
AND4X{1,2,4}$ input(IN1,IN2,IN3,IN4) output(Q) Q=AND4(IN1,IN2,IN3,IN4) ;
OR2X{1,2,4}$ input(IN1,IN2) output(Q) Q=OR2(IN1,IN2) ;
OR3X{1,2,4}$ input(IN1,IN2,IN3) output(Q) Q=OR3(IN1,IN2,IN3) ;
OR4X{1,2,4}$ input(IN1,IN2,IN3,IN4) output(Q) Q=OR4(IN1,IN2,IN3,IN4) ;
XOR2X{1,2}$ input(IN1,IN2) output(Q) Q=XOR2(IN1,IN2) ;
XOR3X{1,2}$ input(IN1,IN2,IN3) output(Q) Q=XOR3(IN1,IN2,IN3) ;
NAND2X{0,1,2,4}$ input(IN1,IN2) output(QN) QN=NAND2(IN1,IN2) ;
NAND3X{0,1,2,4}$ input(IN1,IN2,IN3) output(QN) QN=NAND3(IN1,IN2,IN3) ;
NAND4X{0,1}$ input(IN1,IN2,IN3,IN4) output(QN) QN=NAND4(IN1,IN2,IN3,IN4) ;
NOR2X{0,1,2,4}$ input(IN1,IN2) output(QN) QN=NOR2(IN1,IN2) ;
NOR3X{0,1,2,4}$ input(IN1,IN2,IN3) output(QN) QN=NOR3(IN1,IN2,IN3) ;
NOR4X{0,1}$ input(IN1,IN2,IN3,IN4) output(QN) QN=NOR4(IN1,IN2,IN3,IN4) ;
XNOR2X{1,2}$ input(IN1,IN2) output(Q) Q=XNOR2(IN1,IN2) ;
XNOR3X{1,2}$ input(IN1,IN2,IN3) output(Q) Q=XNOR3(IN1,IN2,IN3) ;
ISOLAND{,AO}X{1,2,4,8}$ input(ISO,D) output(Q) ISOB=NOT1(ISO) Q=AND2(ISOB,D) ;
ISOLOR{,AO}X{1,2,4,8}$ input(ISO,D) output(Q) Q=OR2(ISO,D) ;
AO21X{1,2}$ input(IN1,IN2,IN3) output(Q) Q=AO21(IN1,IN2,IN3) ;
OA21X{1,2}$ input(IN1,IN2,IN3) output(Q) Q=OA21(IN1,IN2,IN3) ;
AOI21X{1,2}$ input(IN1,IN2,IN3) output(QN) QN=AOI21(IN1,IN2,IN3) ;
OAI21X{1,2}$ input(IN1,IN2,IN3) output(QN) QN=OAI21(IN1,IN2,IN3) ;
AO22X{1,2}$ input(IN1,IN2,IN3,IN4) output(Q) Q=AO22(IN1,IN2,IN3,IN4) ;
OA22X{1,2}$ input(IN1,IN2,IN3,IN4) output(Q) Q=OA22(IN1,IN2,IN3,IN4) ;
AOI22X{1,2}$ input(IN1,IN2,IN3,IN4) output(QN) QN=AOI22(IN1,IN2,IN3,IN4) ;
OAI22X{1,2}$ input(IN1,IN2,IN3,IN4) output(QN) QN=OAI22(IN1,IN2,IN3,IN4) ;
MUX21X{1,2}$ input(IN1,IN2,S) output(Q) Q=MUX21(IN1,IN2,S) ;
AO221X{1,2}$ input(IN1,IN2,IN3,IN4,IN5) output(Q) A=AO22(IN1,IN2,IN3,IN4) Q=OR2(IN5,A) ;
OA221X{1,2}$ input(IN1,IN2,IN3,IN4,IN5) output(Q) A=OA22(IN1,IN2,IN3,IN4) Q=AND2(IN5,A) ;
AOI221X{1,2}$ input(IN1,IN2,IN3,IN4,IN5) output(QN) A=AO22(IN1,IN2,IN3,IN4) QN=NOR2(IN5,A) ;
OAI221X{1,2}$ input(IN1,IN2,IN3,IN4,IN5) output(QN) A=OA22(IN1,IN2,IN3,IN4) QN=NAND2(IN5,A) ;
AO222X{1,2}$ input(IN1,IN2,IN3,IN4,IN5,IN6) output(Q) A=AO22(IN1,IN2,IN3,IN4) Q=AO21(IN5,IN6,A) ;
OA222X{1,2}$ input(IN1,IN2,IN3,IN4,IN5,IN6) output(Q) A=OA22(IN1,IN2,IN3,IN4) Q=OA21(IN5,IN6,A) ;
AOI222X{1,2}$ input(IN1,IN2,IN3,IN4,IN5,IN6) output(QN) A=AO22(IN1,IN2,IN3,IN4) QN=AOI21(IN5,IN6,A) ;
OAI222X{1,2}$ input(IN1,IN2,IN3,IN4,IN5,IN6) output(QN) A=OA22(IN1,IN2,IN3,IN4) QN=OAI21(IN5,IN6,A) ;
MUX41X{1,2}$ input(IN1,IN2,IN3,IN4,S0,S1) output(Q) A=MUX21(IN1,IN2,S0) B=MUX21(IN3,IN4,S0) Q=MUX21(A,B,S1) ;
DEC24X{1,2}$ input(IN1,IN2) output(Q0,Q1,Q2,Q3) IN1B=INV1(IN1) IN2B=INV1(IN2) Q0=NOR2(IN1,IN2) Q1=AND(IN1,IN2B) Q2=AND(IN1B,IN2) Q3=AND(IN1,IN2) ;
FADDX{1,2}$ input(A,B,CI) output(S,CO) AB=XOR2(A,B) CO=XOR2(AB,CI) S=AO22(AB,CI,A,B) ;
HADDX{1,2}$ input(A0,B0) output(SO,C1) C1=XOR2(A0,B0) SO=AND2(A0,B0) ;
{,AO}DFFARX{1,2}$ input(D,CLK,RSTB) output(Q,QN) DR=AND2(D,RSTB) Q=DFF(DR,CLK) QN=INV1(Q) ;
DFFASRX{1,2}$ input(D,CLK,RSTB,SETB) output(Q,QN) DR=AND2(D,RSTB) SET=INV1(SETB) DRS=OR2(DR,SET) Q=DFF(DRS,CLK) QN=INV1(Q) ;
DFFASX{1,2}$ input(D,CLK,SETB) output(Q,QN) SET=INV1(SETB) DS=OR2(D,SET) Q=DFF(DS,CLK) QN=INV1(Q) ;
DFFSSRX{1,2}$ input(CLK,D,RSTB,SETB) output(Q,QN) DR=AND2(D,RSTB) SET=INV1(SETB) DRS=OR2(DR,SET) Q=DFF(DRS,CLK) QN=INV1(Q) ;
DFFX{1,2}$ input(D,CLK) output(Q,QN) Q=DFF(D,CLK) QN=INV1(Q) ;
SDFFARX{1,2}$ input(D,CLK,RSTB,SE,SI) output(Q,QN) DR=AND2(D,RSTB) DI=MUX21(DR,SI,SE) Q=DFF(DI,CLK) QN=INV1(Q) ;
SDFFASRSX{1,2}$ input(D,CLK,RSTB,SETB,SE,SI) output(Q,QN,S0) DR=AND2(D,RSTB) SET=INV1(SETB) DRS=OR2(DR,SET) DI=MUX21(DRS,SI,SE) Q=DFF(DI,CLK) QN=INV1(Q) S0=BUF1(Q) ;
SDFFASRX{1,2}$ input(D,CLK,RSTB,SETB,SE,SI) output(Q,QN) DR=AND2(D,RSTB) SET=INV1(SETB) DRS=OR2(DR,SET) DI=MUX21(DRS,SI,SE) Q=DFF(DI,CLK) QN=INV1(Q) ;
SDFFASX{1,2}$ input(D,CLK,SETB,SE,SI) output(Q,QN) SET=INV1(SETB) DS=OR2(D,SET) DI=MUX21(DS,SI,SE) Q=DFF(DI,CLK) QN=INV1(Q) ;
SDFFSSRX{1,2}$ input(CLK,D,RSTB,SETB,SI,SE) output(Q,QN) DR=AND2(D,RSTB) SET=INV1(SETB) DRS=OR2(DR,SET) DI=MUX21(DRS,SI,SE) Q=DFF(DI,CLK) QN=INV1(Q) ;
SDFFX{1,2}$ input(D,CLK,SE,SI) output(Q,QN) DI=MUX21(D,SI,SE) Q=DFF(DI,CLK) QN=INV1(Q) ;
LATCHX{1,2}$ input(D,CLK) output(Q,QN) Q=LATCH(D,CLK) QN=INV1(Q) ; AND2_X{1,2,4} input(A1,A2) output(ZN) ZN=AND2(A1,A2) ;
""".replace('$','{,_LVT,_HVT}')) AND3_X{1,2,4} input(A1,A2,A3) output(ZN) ZN=AND3(A1,A2,A3) ;
AND4_X{1,2,4} input(A1,A2,A3,A4) output(ZN) ZN=AND4(A1,A2,A3,A4) ;
OR2_X{1,2,4} input(A1,A2) output(ZN) ZN=OR2(A1,A2) ;
OR3_X{1,2,4} input(A1,A2,A3) output(ZN) ZN=OR3(A1,A2,A3) ;
OR4_X{1,2,4} input(A1,A2,A3,A4) output(ZN) ZN=OR4(A1,A2,A3,A4) ;
XOR2_X{1,2} input(A,B) output(Z) Z=XOR2(A,B) ;
XNOR2_X{1,2} input(A,B) output(ZN) ZN=XNOR2(A,B) ;
""")
"""An older NANGATE-variant that uses 'ZN' as output pin names for AND and OR gates.
"""
SAED32 = TechLibNew(r""" SAED32 = TechLib(r"""
NBUFFX{2,4,8,16,32}$ input(A) output(Y) Y=BUF1(A) ; NBUFFX{2,4,8,16,32}$ input(A) output(Y) Y=BUF1(A) ;
AOBUFX{1,2,4}$ input(A) output(Y) Y=BUF1(A) ; AOBUFX{1,2,4}$ input(A) output(Y) Y=BUF1(A) ;
DELLN{1,2,3}X2$ input(A) output(Y) Y=BUF1(A) ; DELLN{1,2,3}X2$ input(A) output(Y) Y=BUF1(A) ;
@ -360,38 +320,96 @@ SDFFX{1,2}$ input(D,CLK,SE,SI) output(Q,QN) DI=MUX21(D,SI,SE) Q=DF
LATCHX{1,2}$ input(D,CLK) output(Q,QN) Q=LATCH(D,CLK) QN=INV1(Q) ; LATCHX{1,2}$ input(D,CLK) output(Q,QN) Q=LATCH(D,CLK) QN=INV1(Q) ;
""".replace('$','_RVT')) """.replace('$','_RVT'))
"""The SAED 32nm educational technology library.
It defines all cells except: negative-edge flip-flops, tri-state, latches, clock gating, level shifters
"""
GSC180 = TechLibNew(r"""
BUFX{1,3} input(A) output(Y) Y=BUF1(A) ;
CLKBUFX{1,2,3} input(A) output(Y) Y=BUF1(A) ;
INVX{1,2,4,8} input(A) output(Y) Y=INV1(A) ;
TBUFX{1,2,4,8} input(A,OE) output(Y) Y=AND2(A,OE) ;
TINVX1 input(A,OE) output(Y) AB=INV1(A) Y=AND2(AB,OE) ;
AND2X1 input(A,B) output(Y) Y=AND2(A,B) ; SAED90 = TechLib(r"""
NAND2X{1,2} input(A,B) output(Y) Y=NAND2(A,B) ; NBUFFX{2,4,8,16,32}$ input(INP) output(Z) Z=BUF1(INP) ;
NAND3X1 input(A,B,C) output(Y) Y=NAND3(A,B,C) ; AOBUFX{1,2,4}$ input(INP) output(Z) Z=BUF1(INP) ;
NAND4X1 input(A,B,C,D) output(Y) Y=NAND4(A,B,C,D) ; DELLN{1,2,3}X2$ input(INP) output(Z)Z=BUF1(INP) ;
OR2X1 input(A,B) output(Y) Y=OR2(A,B) ;
OR4X1 input(A,B,C,D) output(Y) Y=OR4(A,B,C,D) ;
NOR2X1 input(A,B) output(Y) Y=NOR2(A,B) ;
NOR3X1 input(A,B,C) output(Y) Y=NOR3(A,B,C) ;
NOR4X1 input(A,B,C,D) output(Y) Y=NOR4(A,B,C,D) ;
XOR2X1 input(A,B) output(Y) Y=XOR2(A,B) ;
MX2X1 input(A,B,S0) output(Y) Y=MUX21(A,B,S0) ; INVX{0,1,2,4,8,16,32}$ input(INP) output(ZN) ZN=INV1(INP) ;
AOI21X1 input(A0,A1,B0) output(Y) Y=AOI21(A0,A1,B0) ; AOINVX{1,2,4}$ input(INP) output(ZN) ZN=INV1(INP) ;
AOI22X1 input(A0,A1,B0,B1) output(Y) Y=AOI22(A0,A1,B0,B1) ; IBUFFX{2,4,8,16,32}$ input(INP) output(ZN) ZN=INV1(INP) ;
OAI21X1 input(A0,A1,B0) output(Y) Y=OAI21(A0,A1,B0) ;
OAI22X1 input(A0,A1,B0,B1) output(Y) Y=OAI22(A0,A1,B0,B1) ;
OAI33X1 input(A0,A1,A2,B0,B1,B2) output(Y) AA=OR2(A0,A1) BB=OR2(B0,B1) Y=OAI22(AA,A2,BB,B2) ;
ADDFX1 input(A,B,CI) output(CO,S) AB=XOR2(A,B) CO=XOR2(AB,CI) S=AO22(AB,CI,A,B) ;
ADDHX1 input(A,B) output(CO,S) CO=XOR2(A,B) S=AND2(A,B) ;
DFFX1 input(CK,D) output(Q,QN) Q=DFF(D,CK) QN=INV1(Q) ; TIEH$ output(Z) Z=__const1__() ;
DFFSRX1 input(CK,D,RN,SN) output(Q,QN) DR=AND2(D,RN) SET=INV1(SN) DRS=OR2(DR,SET) Q=DFF(DRS,CK) QN=INV1(Q) ; TIEL$ output(ZN) ZN=__const0__() ;
SDFFSRX1 input(CK,D,RN,SE,SI,SN) output(Q,QN) DR=AND2(D,RN) SET=INV1(SN) DRS=OR2(DR,SET) DI=MUX21(DRS,SI,SE) Q=DFF(DI,CK) QN=INV1(Q) ;
TLATSRX1 input(D,G,RN,SN) output(Q,QN) DR=AND2(D,RN) SET=INV1(SN) DRS=OR2(DR,SET) Q=LATCH(DRS,G) QN=INV1(Q) ; HEAD2X{2,4,8,16,32}$ input(SLEEP) output(SLEEPOUT) SLEEPOUT=BUF1(SLEEP) ;
TLATX1 input(C,D) output(Q,QN) Q=LATCH(D,C) QN=INV1(Q) ; HEADX{2,4,8,16,32}$ input(SLEEP) ;
""")
ANTENNA$ input(INP) ;
CLOAD1$ input(INP) ;
DCAP$ ;
DHFILL{HLH,LHL}2 ;
DHFILLHLHLS11$ ;
SHFILL{1,2,3,64,128}$ ;
AND2X{1,2,4}$ input(IN1,IN2) output(Q) Q=AND2(IN1,IN2) ;
AND3X{1,2,4}$ input(IN1,IN2,IN3) output(Q) Q=AND3(IN1,IN2,IN3) ;
AND4X{1,2,4}$ input(IN1,IN2,IN3,IN4) output(Q) Q=AND4(IN1,IN2,IN3,IN4) ;
OR2X{1,2,4}$ input(IN1,IN2) output(Q) Q=OR2(IN1,IN2) ;
OR3X{1,2,4}$ input(IN1,IN2,IN3) output(Q) Q=OR3(IN1,IN2,IN3) ;
OR4X{1,2,4}$ input(IN1,IN2,IN3,IN4) output(Q) Q=OR4(IN1,IN2,IN3,IN4) ;
XOR2X{1,2}$ input(IN1,IN2) output(Q) Q=XOR2(IN1,IN2) ;
XOR3X{1,2}$ input(IN1,IN2,IN3) output(Q) Q=XOR3(IN1,IN2,IN3) ;
NAND2X{0,1,2,4}$ input(IN1,IN2) output(QN) QN=NAND2(IN1,IN2) ;
NAND3X{0,1,2,4}$ input(IN1,IN2,IN3) output(QN) QN=NAND3(IN1,IN2,IN3) ;
NAND4X{0,1}$ input(IN1,IN2,IN3,IN4) output(QN) QN=NAND4(IN1,IN2,IN3,IN4) ;
NOR2X{0,1,2,4}$ input(IN1,IN2) output(QN) QN=NOR2(IN1,IN2) ;
NOR3X{0,1,2,4}$ input(IN1,IN2,IN3) output(QN) QN=NOR3(IN1,IN2,IN3) ;
NOR4X{0,1}$ input(IN1,IN2,IN3,IN4) output(QN) QN=NOR4(IN1,IN2,IN3,IN4) ;
XNOR2X{1,2}$ input(IN1,IN2) output(Q) Q=XNOR2(IN1,IN2) ;
XNOR3X{1,2}$ input(IN1,IN2,IN3) output(Q) Q=XNOR3(IN1,IN2,IN3) ;
ISOLAND{,AO}X{1,2,4,8}$ input(ISO,D) output(Q) ISOB=NOT1(ISO) Q=AND2(ISOB,D) ;
ISOLOR{,AO}X{1,2,4,8}$ input(ISO,D) output(Q) Q=OR2(ISO,D) ;
AO21X{1,2}$ input(IN1,IN2,IN3) output(Q) Q=AO21(IN1,IN2,IN3) ;
OA21X{1,2}$ input(IN1,IN2,IN3) output(Q) Q=OA21(IN1,IN2,IN3) ;
AOI21X{1,2}$ input(IN1,IN2,IN3) output(QN) QN=AOI21(IN1,IN2,IN3) ;
OAI21X{1,2}$ input(IN1,IN2,IN3) output(QN) QN=OAI21(IN1,IN2,IN3) ;
AO22X{1,2}$ input(IN1,IN2,IN3,IN4) output(Q) Q=AO22(IN1,IN2,IN3,IN4) ;
OA22X{1,2}$ input(IN1,IN2,IN3,IN4) output(Q) Q=OA22(IN1,IN2,IN3,IN4) ;
AOI22X{1,2}$ input(IN1,IN2,IN3,IN4) output(QN) QN=AOI22(IN1,IN2,IN3,IN4) ;
OAI22X{1,2}$ input(IN1,IN2,IN3,IN4) output(QN) QN=OAI22(IN1,IN2,IN3,IN4) ;
MUX21X{1,2}$ input(IN1,IN2,S) output(Q) Q=MUX21(IN1,IN2,S) ;
AO221X{1,2}$ input(IN1,IN2,IN3,IN4,IN5) output(Q) A=AO22(IN1,IN2,IN3,IN4) Q=OR2(IN5,A) ;
OA221X{1,2}$ input(IN1,IN2,IN3,IN4,IN5) output(Q) A=OA22(IN1,IN2,IN3,IN4) Q=AND2(IN5,A) ;
AOI221X{1,2}$ input(IN1,IN2,IN3,IN4,IN5) output(QN) A=AO22(IN1,IN2,IN3,IN4) QN=NOR2(IN5,A) ;
OAI221X{1,2}$ input(IN1,IN2,IN3,IN4,IN5) output(QN) A=OA22(IN1,IN2,IN3,IN4) QN=NAND2(IN5,A) ;
AO222X{1,2}$ input(IN1,IN2,IN3,IN4,IN5,IN6) output(Q) A=AO22(IN1,IN2,IN3,IN4) Q=AO21(IN5,IN6,A) ;
OA222X{1,2}$ input(IN1,IN2,IN3,IN4,IN5,IN6) output(Q) A=OA22(IN1,IN2,IN3,IN4) Q=OA21(IN5,IN6,A) ;
AOI222X{1,2}$ input(IN1,IN2,IN3,IN4,IN5,IN6) output(QN) A=AO22(IN1,IN2,IN3,IN4) QN=AOI21(IN5,IN6,A) ;
OAI222X{1,2}$ input(IN1,IN2,IN3,IN4,IN5,IN6) output(QN) A=OA22(IN1,IN2,IN3,IN4) QN=OAI21(IN5,IN6,A) ;
MUX41X{1,2}$ input(IN1,IN2,IN3,IN4,S0,S1) output(Q) A=MUX21(IN1,IN2,S0) B=MUX21(IN3,IN4,S0) Q=MUX21(A,B,S1) ;
DEC24X{1,2}$ input(IN1,IN2) output(Q0,Q1,Q2,Q3) IN1B=INV1(IN1) IN2B=INV1(IN2) Q0=NOR2(IN1,IN2) Q1=AND(IN1,IN2B) Q2=AND(IN1B,IN2) Q3=AND(IN1,IN2) ;
FADDX{1,2}$ input(A,B,CI) output(S,CO) AB=XOR2(A,B) CO=XOR2(AB,CI) S=AO22(AB,CI,A,B) ;
HADDX{1,2}$ input(A0,B0) output(SO,C1) C1=XOR2(A0,B0) SO=AND2(A0,B0) ;
{,AO}DFFARX{1,2}$ input(D,CLK,RSTB) output(Q,QN) DR=AND2(D,RSTB) Q=DFF(DR,CLK) QN=INV1(Q) ;
DFFASRX{1,2}$ input(D,CLK,RSTB,SETB) output(Q,QN) DR=AND2(D,RSTB) SET=INV1(SETB) DRS=OR2(DR,SET) Q=DFF(DRS,CLK) QN=INV1(Q) ;
DFFASX{1,2}$ input(D,CLK,SETB) output(Q,QN) SET=INV1(SETB) DS=OR2(D,SET) Q=DFF(DS,CLK) QN=INV1(Q) ;
DFFSSRX{1,2}$ input(CLK,D,RSTB,SETB) output(Q,QN) DR=AND2(D,RSTB) SET=INV1(SETB) DRS=OR2(DR,SET) Q=DFF(DRS,CLK) QN=INV1(Q) ;
DFFX{1,2}$ input(D,CLK) output(Q,QN) Q=DFF(D,CLK) QN=INV1(Q) ;
SDFFARX{1,2}$ input(D,CLK,RSTB,SE,SI) output(Q,QN) DR=AND2(D,RSTB) DI=MUX21(DR,SI,SE) Q=DFF(DI,CLK) QN=INV1(Q) ;
SDFFASRSX{1,2}$ input(D,CLK,RSTB,SETB,SE,SI) output(Q,QN,S0) DR=AND2(D,RSTB) SET=INV1(SETB) DRS=OR2(DR,SET) DI=MUX21(DRS,SI,SE) Q=DFF(DI,CLK) QN=INV1(Q) S0=BUF1(Q) ;
SDFFASRX{1,2}$ input(D,CLK,RSTB,SETB,SE,SI) output(Q,QN) DR=AND2(D,RSTB) SET=INV1(SETB) DRS=OR2(DR,SET) DI=MUX21(DRS,SI,SE) Q=DFF(DI,CLK) QN=INV1(Q) ;
SDFFASX{1,2}$ input(D,CLK,SETB,SE,SI) output(Q,QN) SET=INV1(SETB) DS=OR2(D,SET) DI=MUX21(DS,SI,SE) Q=DFF(DI,CLK) QN=INV1(Q) ;
SDFFSSRX{1,2}$ input(CLK,D,RSTB,SETB,SI,SE) output(Q,QN) DR=AND2(D,RSTB) SET=INV1(SETB) DRS=OR2(DR,SET) DI=MUX21(DRS,SI,SE) Q=DFF(DI,CLK) QN=INV1(Q) ;
SDFFX{1,2}$ input(D,CLK,SE,SI) output(Q,QN) DI=MUX21(D,SI,SE) Q=DFF(DI,CLK) QN=INV1(Q) ;
LATCHX{1,2}$ input(D,CLK) output(Q,QN) Q=LATCH(D,CLK) QN=INV1(Q) ;
""".replace('$','{,_LVT,_HVT}'))
"""The SAED 90nm educational technology library.
It defines all cells except: negative-edge flip-flops, tri-state, latches, clock gating, level shifters
"""

26
src/kyupy/verilog.py

@ -10,7 +10,7 @@ from lark import Lark, Transformer, Tree
from . import log, readtext from . import log, readtext
from .circuit import Circuit, Node, Line from .circuit import Circuit, Node, Line
from .techlib import TechLib from .techlib import NANGATE
Instantiation = namedtuple('Instantiation', ['type', 'name', 'pins']) Instantiation = namedtuple('Instantiation', ['type', 'name', 'pins'])
@ -35,7 +35,7 @@ class SignalDeclaration:
class VerilogTransformer(Transformer): class VerilogTransformer(Transformer):
def __init__(self, branchforks=False, tlib=TechLib()): def __init__(self, branchforks=False, tlib=NANGATE):
super().__init__() super().__init__()
self.branchforks = branchforks self.branchforks = branchforks
self.tlib = tlib self.tlib = tlib
@ -232,23 +232,29 @@ GRAMMAR = r"""
""" """
def parse(text, *, branchforks=False, tlib=TechLib()): def parse(text, tlib=NANGATE, branchforks=False):
"""Parses the given ``text`` as Verilog code. """Parses the given ``text`` as Verilog code.
:param text: A string with Verilog code. :param text: A string with Verilog code.
:param tlib: A technology library object that defines all known cells.
:type tlib: :py:class:`~kyupy.techlib.TechLib`
:param branchforks: If set to ``True``, the returned circuit will include additional `forks` on each fanout branch. :param branchforks: If set to ``True``, the returned circuit will include additional `forks` on each fanout branch.
These forks are needed to correctly annotate interconnect delays These forks are needed to correctly annotate interconnect delays
(see :py:func:`kyupy.sdf.DelayFile.annotation`). (see :py:func:`~kyupy.sdf.DelayFile.interconnects()`).
:param tlib: A technology library object that provides pin name mappings. :return: A :py:class:`~kyupy.circuit.Circuit` object.
:type tlib: :py:class:`~kyupy.techlib.TechLib`
:return: A :class:`~kyupy.circuit.Circuit` object.
""" """
return Lark(GRAMMAR, parser="lalr", transformer=VerilogTransformer(branchforks, tlib)).parse(text) return Lark(GRAMMAR, parser="lalr", transformer=VerilogTransformer(branchforks, tlib)).parse(text)
def load(file, *args, **kwargs): def load(file, tlib=NANGATE, branchforks=False):
"""Parses the contents of ``file`` as Verilog code. """Parses the contents of ``file`` as Verilog code.
The given file may be gzip compressed. Takes the same keyword arguments as :py:func:`parse`. :param file: A file name or a file handle. Files with `.gz`-suffix are decompressed on-the-fly.
:param tlib: A technology library object that defines all known cells.
:type tlib: :py:class:`~kyupy.techlib.TechLib`
:param branchforks: If set to ``True``, the returned circuit will include additional `forks` on each fanout branch.
These forks are needed to correctly annotate interconnect delays
(see :py:func:`~kyupy.sdf.DelayFile.interconnects()`).
:return: A :py:class:`~kyupy.circuit.Circuit` object.
""" """
return parse(readtext(file), *args, **kwargs) return parse(readtext(file), tlib, branchforks)

49
src/kyupy/wave_sim.py

@ -16,7 +16,7 @@ import math
import numpy as np import numpy as np
from . import numba, cuda, hr_bytes, sim, cdiv from . import numba, cuda, sim, cdiv
TMAX = np.float32(2 ** 127) TMAX = np.float32(2 ** 127)
@ -32,7 +32,8 @@ class WaveSim(sim.SimOps):
"""A waveform-based combinational logic timing simulator running on CPU. """A waveform-based combinational logic timing simulator running on CPU.
:param circuit: The circuit to simulate. :param circuit: The circuit to simulate.
:param timing: The timing annotation of the circuit (see :py:func:`kyupy.sdf.DelayFile.annotation` for details) :param delays: One or more delay annotations for the circuit (see :py:func:`kyupy.sdf.DelayFile.iopaths` for details).
Each parallel simulation may use the same delays or different delays, depending on the use-case (see :py:attr:`simctl_int`).
:param sims: The number of parallel simulations. :param sims: The number of parallel simulations.
:param c_caps: The number of floats available in each waveform. Values must be positive and a multiple of 4. :param c_caps: The number of floats available in each waveform. Values must be positive and a multiple of 4.
Waveforms encode the signal switching history by storing transition times. Waveforms encode the signal switching history by storing transition times.
@ -40,12 +41,16 @@ class WaveSim(sim.SimOps):
that can be stored. A capacity of ``n`` can store at least ``n-2`` transitions. If more transitions are that can be stored. A capacity of ``n`` can store at least ``n-2`` transitions. If more transitions are
generated during simulation, the latest glitch is removed (freeing up two transition times) and an overflow generated during simulation, the latest glitch is removed (freeing up two transition times) and an overflow
flag is set. If an integer is given, all waveforms are set to that same capacity. With an array of length flag is set. If an integer is given, all waveforms are set to that same capacity. With an array of length
``len(circuit.lines)`` the capacity is set for each intermediate waveform individually. ``len(circuit.lines)`` the capacity is set individually for each intermediate waveform.
:param a_ctrl: An integer array controlling the accumulation of weighted switching activity during simulation.
Its shape must be ``(len(circuit.lines), 3)``. ``a_ctrl[...,0]`` is the index into the accumulation buffer, -1 means ignore.
``a_ctrl[...,1]`` is the (integer) weight for a rising transition, ``a_ctrl[...,2]`` is the (integer) weight for
a falling transition. The accumulation buffer (:py:attr:`abuf`) is allocated automatically if ``a_ctrl`` is given.
:param c_reuse: If enabled, memory of intermediate signal waveforms will be re-used. This greatly reduces
memory footprint, but intermediate signal waveforms may become unaccessible after a propagation.
:param strip_forks: If enabled, the simulator will not evaluate fork nodes explicitly. This saves simulation time :param strip_forks: If enabled, the simulator will not evaluate fork nodes explicitly. This saves simulation time
by reducing the number of nodes to simulate, but (interconnect) delay annotations of lines read by fork nodes and memory by reducing the number of nodes to simulate, but (interconnect) delay annotations of lines read by fork nodes
are ignored. are ignored.
:param c_reuse: If enabled, memory of intermediate signal waveforms will be re-used. This greatly reduces
memory footprint, but intermediate signal waveforms become unaccessible after a propagation.
""" """
def __init__(self, circuit, delays, sims=8, c_caps=16, a_ctrl=None, c_reuse=False, strip_forks=False): def __init__(self, circuit, delays, sims=8, c_caps=16, a_ctrl=None, c_reuse=False, strip_forks=False):
super().__init__(circuit, c_caps=c_caps, c_caps_min=4, a_ctrl=a_ctrl, c_reuse=c_reuse, strip_forks=strip_forks) super().__init__(circuit, c_caps=c_caps, c_caps_min=4, a_ctrl=a_ctrl, c_reuse=c_reuse, strip_forks=strip_forks)
@ -58,8 +63,8 @@ class WaveSim(sim.SimOps):
self.s = np.zeros((11, self.s_len, sims), dtype=np.float32) self.s = np.zeros((11, self.s_len, sims), dtype=np.float32)
"""Information about the logic values and transitions around the sequential elements (flip-flops) and ports. """Information about the logic values and transitions around the sequential elements (flip-flops) and ports.
The first 3 values are read by ``s_to_c()``. The first 3 values are read by :py:func:`s_to_c`.
The remaining values are written by ``c_to_s()``. The remaining values are written by :py:func:`c_to_s`.
The elements are as follows: The elements are as follows:
@ -82,10 +87,13 @@ class WaveSim(sim.SimOps):
self.abuf = np.zeros((self.abuf_len, sims), dtype=np.int32) if self.abuf_len > 0 else np.zeros((1, 1), dtype=np.int32) self.abuf = np.zeros((self.abuf_len, sims), dtype=np.int32) if self.abuf_len > 0 else np.zeros((1, 1), dtype=np.int32)
self.simctl_int = np.zeros((2, sims), dtype=np.int32) self.simctl_int = np.zeros((2, sims), dtype=np.int32)
"""Per-simulation delay configuration. """Integer array for per-simulation delay configuration.
* ``simctl_int[0]`` delay dataset or random seed for picking a delay * ``simctl_int[0]`` delay dataset or random seed for picking a delay. By default, each sim has a unique seed.
* ``simctl_int[1]`` 0: seed parameter specifies dataset, 1: simctl_int[0] specifies dataset, 2: simctl_int[0] + seed param. is a random seed for picking a delay dataset * ``simctl_int[1]`` Method for picking a delay:
* 0: seed parameter of :py:func:`c_prop` directly specifies dataset for all simulations
* 1: ``simctl_int[0]`` specifies dataset on a per-simulation basis
* 2 (default): ``simctl_int[0]`` and seed parameter of :py:func:`c_prop` together are a random seed for picking a delay dataset.
""" """
self.simctl_int[0] = range(sims) # unique seed for each sim by default, zero this to pick same delays for all sims. self.simctl_int[0] = range(sims) # unique seed for each sim by default, zero this to pick same delays for all sims.
self.simctl_int[1] = 2 # random picking by default. self.simctl_int[1] = 2 # random picking by default.
@ -100,8 +108,7 @@ class WaveSim(sim.SimOps):
def s_to_c(self): def s_to_c(self):
"""Transfers values of sequential elements and primary inputs to the combinational portion. """Transfers values of sequential elements and primary inputs to the combinational portion.
Based on the data in ``self.s``, waveforms are generated on the input lines of the circuit. Waveforms are generated on the input lines of the combinational circuit based on the data in :py:attr:`s`.
It modifies ``self.c``.
""" """
sins = self.s[:, self.pippi_s_locs] sins = self.s[:, self.pippi_s_locs]
cond = (sins[2] != 0) + 2*(sins[0] != 0) # choices order: 0 R F 1 cond = (sins[2] != 0) + 2*(sins[0] != 0) # choices order: 0 R F 1
@ -113,7 +120,7 @@ class WaveSim(sim.SimOps):
"""Propagates all waveforms from the (pseudo) primary inputs to the (pseudo) primary outputs. """Propagates all waveforms from the (pseudo) primary inputs to the (pseudo) primary outputs.
:param sims: Number of parallel simulations to execute. If None, all available simulations are performed. :param sims: Number of parallel simulations to execute. If None, all available simulations are performed.
:param seed: Random seed for delay variations. :param seed: Seed for picking delays. See also: :py:attr:`simctl_int`.
""" """
sims = min(sims or self.sims, self.sims) sims = min(sims or self.sims, self.sims)
for op_start, op_stop in zip(self.level_starts, self.level_stops): for op_start, op_stop in zip(self.level_starts, self.level_stops):
@ -122,8 +129,8 @@ class WaveSim(sim.SimOps):
def c_to_s(self, time=TMAX, sd=0.0, seed=1): def c_to_s(self, time=TMAX, sd=0.0, seed=1):
"""Simulates a capture operation at all sequential elements and primary outputs. """Simulates a capture operation at all sequential elements and primary outputs.
Propagated waveforms in ``self.c`` at and around the given capture time are analyzed and Propagated waveforms at the outputs of the combinational circuit at and around the given capture time are analyzed and
the results are stored in ``self.s``. the results are stored in :py:attr:`s`.
:param time: The desired capture time. By default, a capture of the settled value is performed. :param time: The desired capture time. By default, a capture of the settled value is performed.
:param sd: A standard deviation for uncertainty in the actual capture time. :param sd: A standard deviation for uncertainty in the actual capture time.
@ -134,9 +141,9 @@ class WaveSim(sim.SimOps):
self.s[3:, s_loc, vector] = wave_capture_cpu(self.c, c_loc, c_len, vector, time=time, sd=sd, seed=seed) self.s[3:, s_loc, vector] = wave_capture_cpu(self.c, c_loc, c_len, vector, time=time, sd=sd, seed=seed)
def s_ppo_to_ppi(self, time=0.0): def s_ppo_to_ppi(self, time=0.0):
"""Re-assigns the last sampled capture to the appropriate pseudo-primary inputs (PPI). """Re-assigns the last sampled capture of the PPOs to the appropriate pseudo-primary inputs (PPIs).
Each PPI transition is constructed from its previous final value, the Each PPI transition is constructed from the final value of the previous assignment, the
given time, and the sampled captured value of its PPO. Reads and modifies ``self.s``. given time, and the sampled captured value of its PPO. Reads and modifies :py:attr:`s`.
:param time: The transition time at the inputs (usually 0.0). :param time: The transition time at the inputs (usually 0.0).
""" """
@ -322,7 +329,8 @@ def wave_capture_cpu(c, c_loc, c_len, vector, time=TMAX, sd=0.0, seed=1):
class WaveSimCuda(WaveSim): class WaveSimCuda(WaveSim):
"""A GPU-accelerated waveform-based combinational logic timing simulator. """A GPU-accelerated waveform-based combinational logic timing simulator.
The API is the same as for :py:class:`WaveSim`. The API is identical to :py:class:`WaveSim`. See there for complete documentation.
All internal memories are mirrored into GPU memory upon construction. All internal memories are mirrored into GPU memory upon construction.
Some operations like access to single waveforms can involve large communication overheads. Some operations like access to single waveforms can involve large communication overheads.
""" """
@ -431,7 +439,6 @@ def wave_eval_gpu(ops, op_start, op_stop, cbuf, c_locs, c_caps, abuf, sim_start,
# accumulate WSA into abuf # accumulate WSA into abuf
if a_loc >= 0: if a_loc >= 0:
#abuf[a_loc, sim] += nrise*a_wr + nfall*a_wf
cuda.atomic.add(abuf, (a_loc, sim), nrise*a_wr + nfall*a_wf) cuda.atomic.add(abuf, (a_loc, sim), nrise*a_wr + nfall*a_wf)

8
tests/conftest.py

@ -8,13 +8,13 @@ def mydir():
return Path(os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))) return Path(os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__))))
@pytest.fixture(scope='session') @pytest.fixture(scope='session')
def b14_circuit(mydir): def b15_2ig_circuit(mydir):
from kyupy import verilog from kyupy import verilog
from kyupy.techlib import SAED32 from kyupy.techlib import SAED32
return verilog.load(mydir / 'b14.v.gz', branchforks=True, tlib=SAED32) return verilog.load(mydir / 'b15_2ig.v.gz', branchforks=True, tlib=SAED32)
@pytest.fixture(scope='session') @pytest.fixture(scope='session')
def b14_delays(mydir, b14_circuit): def b15_2ig_delays(mydir, b15_2ig_circuit):
from kyupy import sdf from kyupy import sdf
from kyupy.techlib import SAED32 from kyupy.techlib import SAED32
return sdf.load(mydir / 'b14.sdf.gz').iopaths(b14_circuit, tlib=SAED32)[1:2] return sdf.load(mydir / 'b15_2ig.sdf.gz').iopaths(b15_2ig_circuit, tlib=SAED32)[1:2]

335
tests/rng_haltonBase2.synth_yosys.v

@ -0,0 +1,335 @@
/* Generated by Yosys 0.9 (git sha1 UNKNOWN, gcc 4.8.5 -fPIC -Os) */
(* top = 1 *)
(* src = "rng_haltonBase2.v:1" *)
module rng1(clk, reset, o_output);
(* src = "rng_haltonBase2.v:7|rng_haltonBase2.v:19" *)
wire [11:0] _00_;
wire _01_;
wire _02_;
wire _03_;
wire _04_;
wire _05_;
wire _06_;
wire _07_;
wire _08_;
wire _09_;
wire _10_;
wire _11_;
wire _12_;
wire _13_;
wire _14_;
wire _15_;
wire _16_;
wire _17_;
wire _18_;
wire _19_;
wire _20_;
wire _21_;
wire _22_;
wire _23_;
wire _24_;
wire _25_;
wire _26_;
wire _27_;
wire _28_;
wire _29_;
wire _30_;
wire _31_;
wire _32_;
wire _33_;
wire _34_;
(* src = "rng_haltonBase2.v:2" *)
input clk;
(* src = "rng_haltonBase2.v:7|rng_haltonBase2.v:12" *)
wire \halton.clk ;
(* init = 12'h000 *)
(* src = "rng_haltonBase2.v:7|rng_haltonBase2.v:17" *)
wire [11:0] \halton.counter ;
(* src = "rng_haltonBase2.v:7|rng_haltonBase2.v:14" *)
wire [11:0] \halton.o_output ;
(* src = "rng_haltonBase2.v:7|rng_haltonBase2.v:13" *)
wire \halton.reset ;
(* src = "rng_haltonBase2.v:4" *)
output [11:0] o_output;
(* src = "rng_haltonBase2.v:3" *)
input reset;
AND2X1 _35_ (
.IN1(\halton.counter [1]),
.IN2(\halton.counter [0]),
.Q(_01_)
);
NOR2X0 _36_ (
.IN1(\halton.counter [1]),
.IN2(\halton.counter [0]),
.QN(_02_)
);
NOR3X0 _37_ (
.IN1(reset),
.IN2(_01_),
.IN3(_02_),
.QN(_00_[1])
);
AND2X1 _38_ (
.IN1(\halton.counter [2]),
.IN2(_01_),
.Q(_03_)
);
NOR2X0 _39_ (
.IN1(\halton.counter [2]),
.IN2(_01_),
.QN(_04_)
);
NOR3X0 _40_ (
.IN1(reset),
.IN2(_03_),
.IN3(_04_),
.QN(_00_[2])
);
AND4X1 _41_ (
.IN1(\halton.counter [1]),
.IN2(\halton.counter [0]),
.IN3(\halton.counter [2]),
.IN4(\halton.counter [3]),
.Q(_05_)
);
NOR2X0 _42_ (
.IN1(\halton.counter [3]),
.IN2(_03_),
.QN(_06_)
);
NOR3X0 _43_ (
.IN1(reset),
.IN2(_05_),
.IN3(_06_),
.QN(_00_[3])
);
AND2X1 _44_ (
.IN1(\halton.counter [4]),
.IN2(_05_),
.Q(_07_)
);
NOR2X0 _45_ (
.IN1(\halton.counter [4]),
.IN2(_05_),
.QN(_08_)
);
NOR3X0 _46_ (
.IN1(reset),
.IN2(_07_),
.IN3(_08_),
.QN(_00_[4])
);
AND2X1 _47_ (
.IN1(\halton.counter [5]),
.IN2(_07_),
.Q(_09_)
);
NOR2X0 _48_ (
.IN1(\halton.counter [5]),
.IN2(_07_),
.QN(_10_)
);
NOR3X0 _49_ (
.IN1(reset),
.IN2(_09_),
.IN3(_10_),
.QN(_00_[5])
);
AND4X1 _50_ (
.IN1(\halton.counter [4]),
.IN2(\halton.counter [5]),
.IN3(\halton.counter [6]),
.IN4(_05_),
.Q(_11_)
);
NOR2X0 _51_ (
.IN1(\halton.counter [6]),
.IN2(_09_),
.QN(_12_)
);
NOR3X0 _52_ (
.IN1(reset),
.IN2(_11_),
.IN3(_12_),
.QN(_00_[6])
);
AND2X1 _53_ (
.IN1(\halton.counter [7]),
.IN2(_11_),
.Q(_13_)
);
NOR2X0 _54_ (
.IN1(\halton.counter [7]),
.IN2(_11_),
.QN(_14_)
);
NOR3X0 _55_ (
.IN1(reset),
.IN2(_13_),
.IN3(_14_),
.QN(_00_[7])
);
AND3X1 _56_ (
.IN1(\halton.counter [7]),
.IN2(\halton.counter [8]),
.IN3(_11_),
.Q(_15_)
);
NOR2X0 _57_ (
.IN1(\halton.counter [8]),
.IN2(_13_),
.QN(_16_)
);
NOR3X0 _58_ (
.IN1(reset),
.IN2(_15_),
.IN3(_16_),
.QN(_00_[8])
);
AND4X1 _59_ (
.IN1(\halton.counter [7]),
.IN2(\halton.counter [8]),
.IN3(\halton.counter [9]),
.IN4(_11_),
.Q(_17_)
);
NOR2X0 _60_ (
.IN1(\halton.counter [9]),
.IN2(_15_),
.QN(_18_)
);
NOR3X0 _61_ (
.IN1(reset),
.IN2(_17_),
.IN3(_18_),
.QN(_00_[9])
);
AND2X1 _62_ (
.IN1(\halton.counter [10]),
.IN2(_17_),
.Q(_19_)
);
NOR2X0 _63_ (
.IN1(\halton.counter [10]),
.IN2(_17_),
.QN(_20_)
);
NOR3X0 _64_ (
.IN1(reset),
.IN2(_19_),
.IN3(_20_),
.QN(_00_[10])
);
AND3X1 _65_ (
.IN1(\halton.counter [10]),
.IN2(\halton.counter [11]),
.IN3(_17_),
.Q(_21_)
);
AOI21X1 _66_ (
.IN1(\halton.counter [10]),
.IN2(_17_),
.IN3(\halton.counter [11]),
.QN(_22_)
);
NOR3X0 _67_ (
.IN1(reset),
.IN2(_21_),
.IN3(_22_),
.QN(_00_[11])
);
NOR2X0 _68_ (
.IN1(reset),
.IN2(\halton.counter [0]),
.QN(_00_[0])
);
(* src = "rng_haltonBase2.v:7|rng_haltonBase2.v:19" *)
DFFX1 _69_ (
.CLK(clk),
.D(_00_[0]),
.Q(\halton.counter [0]),
.QN(_23_)
);
(* src = "rng_haltonBase2.v:7|rng_haltonBase2.v:19" *)
DFFX1 _70_ (
.CLK(clk),
.D(_00_[1]),
.Q(\halton.counter [1]),
.QN(_24_)
);
(* src = "rng_haltonBase2.v:7|rng_haltonBase2.v:19" *)
DFFX1 _71_ (
.CLK(clk),
.D(_00_[2]),
.Q(\halton.counter [2]),
.QN(_25_)
);
(* src = "rng_haltonBase2.v:7|rng_haltonBase2.v:19" *)
DFFX1 _72_ (
.CLK(clk),
.D(_00_[3]),
.Q(\halton.counter [3]),
.QN(_26_)
);
(* src = "rng_haltonBase2.v:7|rng_haltonBase2.v:19" *)
DFFX1 _73_ (
.CLK(clk),
.D(_00_[4]),
.Q(\halton.counter [4]),
.QN(_27_)
);
(* src = "rng_haltonBase2.v:7|rng_haltonBase2.v:19" *)
DFFX1 _74_ (
.CLK(clk),
.D(_00_[5]),
.Q(\halton.counter [5]),
.QN(_28_)
);
(* src = "rng_haltonBase2.v:7|rng_haltonBase2.v:19" *)
DFFX1 _75_ (
.CLK(clk),
.D(_00_[6]),
.Q(\halton.counter [6]),
.QN(_29_)
);
(* src = "rng_haltonBase2.v:7|rng_haltonBase2.v:19" *)
DFFX1 _76_ (
.CLK(clk),
.D(_00_[7]),
.Q(\halton.counter [7]),
.QN(_30_)
);
(* src = "rng_haltonBase2.v:7|rng_haltonBase2.v:19" *)
DFFX1 _77_ (
.CLK(clk),
.D(_00_[8]),
.Q(\halton.counter [8]),
.QN(_31_)
);
(* src = "rng_haltonBase2.v:7|rng_haltonBase2.v:19" *)
DFFX1 _78_ (
.CLK(clk),
.D(_00_[9]),
.Q(\halton.counter [9]),
.QN(_32_)
);
(* src = "rng_haltonBase2.v:7|rng_haltonBase2.v:19" *)
DFFX1 _79_ (
.CLK(clk),
.D(_00_[10]),
.Q(\halton.counter [10]),
.QN(_33_)
);
(* src = "rng_haltonBase2.v:7|rng_haltonBase2.v:19" *)
DFFX1 _80_ (
.CLK(clk),
.D(_00_[11]),
.Q(\halton.counter [11]),
.QN(_34_)
);
assign \halton.clk = clk;
assign \halton.o_output = { \halton.counter [0], \halton.counter [1], \halton.counter [2], \halton.counter [3], \halton.counter [4], \halton.counter [5], \halton.counter [6], \halton.counter [7], \halton.counter [8], \halton.counter [9], \halton.counter [10], \halton.counter [11] };
assign \halton.reset = reset;
assign o_output = { \halton.counter [0], \halton.counter [1], \halton.counter [2], \halton.counter [3], \halton.counter [4], \halton.counter [5], \halton.counter [6], \halton.counter [7], \halton.counter [8], \halton.counter [9], \halton.counter [10], \halton.counter [11] };
endmodule

7
tests/test_sdf.py

@ -72,12 +72,11 @@ def test_parse():
''' '''
df = sdf.parse(test) df = sdf.parse(test)
assert df.name == 'test' assert df.name == 'test'
# print(f'DelayFile(name={df.name}, interconnects={len(df.interconnects)}, iopaths={len(df.iopaths)})')
def test_b14(mydir): def test_b15(mydir):
df = sdf.load(mydir / 'b14.sdf.gz') df = sdf.load(mydir / 'b15_2ig.sdf.gz')
assert df.name == 'b14' assert df.name == 'b15'
def test_gates(mydir): def test_gates(mydir):

20
tests/test_stil.py

@ -1,21 +1,21 @@
from kyupy import stil, verilog from kyupy import stil, verilog
from kyupy.techlib import SAED32
def test_b15(mydir):
b15 = verilog.load(mydir / 'b15_2ig.v.gz', tlib=SAED32)
def test_b14(mydir): s = stil.load(mydir / 'b15_2ig.sa_nf.stil.gz')
b14 = verilog.load(mydir / 'b14.v.gz')
s = stil.load(mydir / 'b14.stuck.stil.gz')
assert len(s.signal_groups) == 10 assert len(s.signal_groups) == 10
assert len(s.scan_chains) == 1 assert len(s.scan_chains) == 1
assert len(s.calls) == 2163 assert len(s.calls) == 1357
tests = s.tests(b14) tests = s.tests(b15)
resp = s.responses(b14) resp = s.responses(b15)
assert len(tests) > 0 assert len(tests) > 0
assert len(resp) > 0 assert len(resp) > 0
s2 = stil.load(mydir / 'b14.transition.stil.gz') s2 = stil.load(mydir / 'b15_2ig.tf_nf.stil.gz')
tests = s2.tests_loc(b14) tests = s2.tests_loc(b15)
resp = s2.responses(b14) resp = s2.responses(b15)
assert len(tests) > 0 assert len(tests) > 0
assert len(resp) > 0 assert len(resp) > 0

12
tests/test_verilog.py

@ -1,11 +1,11 @@
from kyupy import verilog from kyupy import verilog
from kyupy.techlib import SAED90, SAED32
def test_b01(mydir): def test_b01(mydir):
with open(mydir / 'b01.v', 'r') as f: with open(mydir / 'b01.v', 'r') as f:
c = verilog.parse(f.read()) c = verilog.parse(f.read(), tlib=SAED90)
assert c is not None assert c is not None
assert verilog.load(mydir / 'b01.v') is not None assert verilog.load(mydir / 'b01.v', tlib=SAED90) is not None
assert len(c.nodes) == 139 assert len(c.nodes) == 139
assert len(c.lines) == 203 assert len(c.lines) == 203
@ -16,7 +16,7 @@ def test_b01(mydir):
def test_b15(mydir): def test_b15(mydir):
c = verilog.load(mydir / 'b15_4ig.v.gz') c = verilog.load(mydir / 'b15_4ig.v.gz', tlib=SAED32)
assert len(c.nodes) == 12067 assert len(c.nodes) == 12067
assert len(c.lines) == 20731 assert len(c.lines) == 20731
stats = c.stats stats = c.stats
@ -26,7 +26,7 @@ def test_b15(mydir):
def test_gates(mydir): def test_gates(mydir):
c = verilog.load(mydir / 'gates.v') c = verilog.load(mydir / 'gates.v', tlib=SAED90)
assert len(c.nodes) == 10 assert len(c.nodes) == 10
assert len(c.lines) == 10 assert len(c.lines) == 10
stats = c.stats stats = c.stats
@ -36,7 +36,7 @@ def test_gates(mydir):
def test_halton2(mydir): def test_halton2(mydir):
c = verilog.load(mydir / 'rng_haltonBase2.synth_yosys.v') c = verilog.load(mydir / 'rng_haltonBase2.synth_yosys.v', tlib=SAED90)
assert len(c.nodes) == 146 assert len(c.nodes) == 146
assert len(c.lines) == 210 assert len(c.lines) == 210
stats = c.stats stats = c.stats

15
tests/test_wave_sim.py

@ -156,16 +156,13 @@ def compare_to_logic_sim(wsim: WaveSim):
np.testing.assert_allclose(resp, exp) np.testing.assert_allclose(resp, exp)
def test_b14(b14_circuit, b14_delays): def test_b15(b15_2ig_circuit, b15_2ig_delays):
compare_to_logic_sim(WaveSim(b14_circuit, b14_delays, 8)) compare_to_logic_sim(WaveSim(b15_2ig_circuit, b15_2ig_delays, 8))
def test_b14_strip_forks(b14_circuit, b14_delays): def test_b15_strip_forks(b15_2ig_circuit, b15_2ig_delays):
compare_to_logic_sim(WaveSim(b14_circuit, b14_delays, 8, strip_forks=True)) compare_to_logic_sim(WaveSim(b15_2ig_circuit, b15_2ig_delays, 8, strip_forks=True))
def test_b14_cuda(b14_circuit, b14_delays): def test_b15_cuda(b15_2ig_circuit, b15_2ig_delays):
compare_to_logic_sim(WaveSimCuda(b14_circuit, b14_delays, 8, strip_forks=True)) compare_to_logic_sim(WaveSimCuda(b15_2ig_circuit, b15_2ig_delays, 8, strip_forks=True))
if __name__ == '__main__':
test_nand_delays()

Loading…
Cancel
Save