Make Node and Line indexable, documentation.

5 years ago · 35cf63cf38
6 changed files with 142 additions and 42 deletions
--- a/src/kyupy/init.py
+++ b/src/kyupy/init.py
@ -10,6 +10,24 @@ import gzip
				@@ -10,6 +10,24 @@ import gzip
 import numpy as np


+_pop_count_lut = np.asarray([bin(x).count('1') for x in range(256)])
+
+
+def popcount(a):
+    return np.sum(_pop_count_lut[a])
+
+
+def readtext(file):
+    if hasattr(file, 'read'):
+        return file.read()
+    if str(file).endswith('.gz'):
+        with gzip.open(file, 'rt') as f:
+            return f.read()
+    else:
+        with open(file, 'rt') as f:
+            return f.read()
+
+
 def hr_sci(value):
    multiplier = 0
    while abs(value) >= 1000:
@ -18,7 +36,7 @@ def hr_sci(value):
				@@ -18,7 +36,7 @@ def hr_sci(value):
    while abs(value) < 1:
        value *= 1000
        multiplier -= 1
-    return f'{value:.3f}{" kMGTafpnµm"[multiplier]}'
+    return f'{value:.3f}{" kMGTPEafpnµm"[multiplier]}'


 def hr_bytes(nbytes):
@ -89,6 +107,10 @@ class Log:
				@@ -89,6 +107,10 @@ class Log:
 log = Log()


+#
+# Code below mocks basic numba and cuda functions for pure-python fallback.
+#
+
 class MockNumba:
    @staticmethod
    def njit(func):
@ -113,12 +135,10 @@ class MockCuda:
				@@ -113,12 +135,10 @@ class MockCuda:
                    self.func = funcc

                def __call__(self, *args, **kwargs):
-                    # print(f'device func call {self.func.__name__}')
                    return self.func(*args, **kwargs)

                def __getitem__(self, item):
                    grid_dim, block_dim = item
-                    # print(f'kernel call {self.func.__name__} grid_dim:{grid_dim} block_dim:{block_dim}')

                    def inner(*args, **kwargs):
                        for grid_x in range(grid_dim[0]):
@ -162,21 +182,3 @@ else:
				@@ -162,21 +182,3 @@ else:
    numba = MockNumba()
    cuda = MockCuda()
    log.warn('Numba unavailable. Falling back to pure Python.')
-
-
-_pop_count_lut = np.asarray([bin(x).count('1') for x in range(256)])
-
-
-def popcount(a):
-    return np.sum(_pop_count_lut[a])
-
-
-def readtext(file):
-    if hasattr(file, 'read'):
-        return file.read()
-    if str(file).endswith('.gz'):
-        with gzip.open(file, 'rt') as f:
-            return f.read()
-    else:
-        with open(file, 'rt') as f:
-            return f.read()
--- a/src/kyupy/circuit.py
+++ b/src/kyupy/circuit.py
@ -75,6 +75,9 @@ class Node:
				@@ -75,6 +75,9 @@ class Node:
        """A list of output connections (:class:`Line` objects).
        """

+    def __index__(self):
+        return self.index
+
    def __repr__(self):
        ins = ' '.join([f'<{line.index}' if line is not None else '<None' for line in self.ins])
        outs = ' '.join([f'>{line.index}' if line is not None else '>None' for line in self.outs])
@ -160,6 +163,9 @@ class Line:
				@@ -160,6 +163,9 @@ class Line:
        self.reader = None
        self.circuit = None

+    def __index__(self):
+        return self.index
+
    def __repr__(self):
        return f'{self.index}'

--- a/src/kyupy/logic_sim.py
+++ b/src/kyupy/logic_sim.py
@ -1,4 +1,4 @@
				@@ -1,4 +1,4 @@
-"""A High-Throughput combinational logic simulator.
+"""A high-throughput combinational logic simulator.

 The class :py:class:`~kyupy.logic_sim.LogicSim` performs parallel simulations of the combinational part of a circuit.
 The logic operations are performed bit-parallel on packed numpy arrays.
@ -31,6 +31,8 @@ class LogicSim:
				@@ -31,6 +31,8 @@ class LogicSim:
        self.sims = sims
        nbytes = (sims - 1) // 8 + 1
        self.interface = list(circuit.interface) + [n for n in circuit.nodes if 'dff' in n.kind.lower()]
+        self.width = len(self.interface)
+        """The number of bits in the circuit state (number of ports + number of state-elements)."""
        self.state = np.zeros((len(circuit.lines), mdim, nbytes), dtype='uint8')
        self.state_epoch = np.zeros(len(circuit.nodes), dtype='int8') - 1
        self.tmp = np.zeros((5, mdim, nbytes), dtype='uint8')
--- a/src/kyupy/sdf.py
+++ b/src/kyupy/sdf.py
@ -43,19 +43,29 @@ class DelayFile:
				@@ -43,19 +43,29 @@ class DelayFile:
        Currently, only ABSOLUTE IOPATH and INTERCONNECT delays are supported.
        Pulse rejection limits are derived from absolute delays, explicit declarations (PATHPULSE etc.) are ignored.

-        :param circuit:
-        :param pin_index_f:
-        :param ffdelays:
-        :param interconnect:
-        :type dataset: int or tuple
+        :param circuit: The circuit to annotate. Names from the STIL file are matched to the node names.
+        :type circuit: :class:`~kyupy.circuit.Circuit`
+        :param pin_index_f: A function that returns a pin position by node type and pin name.
+        :param dataset: SDFs store multiple values for each delay (e.g. minimum, typical, maximum).
+            An integer selects the dataset to use (default is 1 for 'typical').
+            If a tuple is given, the annotator will calculate the average of multiple datasets.
+        :type dataset: ``int`` or ``tuple``
+        :param interconnect: Whether or not to include the delays of interconnects in the annotation.
+            To properly annotate interconnect delays, the circuit model has to include a '__fork__' node on
+            every signal and every fanout-branch. The Verilog parser aids in this by setting the parameter
+            `branchforks=True` in :py:func:`kyupy.verilog.parse`.
+        :type interconnect: ``bool``
+        :param ffdelays: Whether or not to include the delays of flip-flops in the annotation.
+        :type ffdelays: ``bool``
        :return: A 3-dimensional ndarray with timing data.

            * Axis 0: line index.
-            * Axis 1: type of timing data: 0=`delay`, 1=`pulse rejection limit`.
-            * Axis 2: The polarity of the output transition of the reading node: 0=`rising`, 1=`falling`.
+            * Axis 1: type of timing data: 0='delay', 1='pulse rejection limit'.
+            * Axis 2: The polarity of the output transition of the reading node: 0='rising', 1='falling'.

            The polarity for pulse rejection is determined by the latter transition of the pulse.
-            E.g., timing[42,1,0] is the rejection limit of a negative pulse at the output of the reader of line 42.
+            E.g., ``timing[42, 1, 0]`` is the rejection limit of a negative pulse at the output
+            of the reader of line 42.
        """
        def select_del(_delvals, idx):
            if isinstance(dataset, tuple):
--- a/src/kyupy/stil.py
+++ b/src/kyupy/stil.py
@ -4,7 +4,7 @@ The main purpose of this parser is to load scan pattern sets from STIL files.
				@@ -4,7 +4,7 @@ The main purpose of this parser is to load scan pattern sets from STIL files.
 It supports only a very limited subset of STIL.

 The functions :py:func:`load` and :py:func:`read` return an intermediate representation (:class:`StilFile` object).
-Call :py:func:`StilFile.tests4v`, :py:func:`StilFile.tests8v`, or :py:func:`StilFile.responses4v` to
+Call :py:func:`StilFile.tests`, :py:func:`StilFile.tests_loc`, or :py:func:`StilFile.responses` to
 obtain the appropriate vector sets.
 """

--- a/src/kyupy/wave_sim.py
+++ b/src/kyupy/wave_sim.py
@ -1,10 +1,10 @@
				@@ -1,10 +1,10 @@
-"""High-Throughput combinational logic timing simulators.
+"""High-throughput combinational logic timing simulators.

 These simulators work similarly to :py:class:`~kyupy.logic_sim.LogicSim`.
 They propagate values through the combinational circuit from (pseudo) primary inputs to (pseudo) primary outputs.
 Instead of propagating logic values, these simulators propagate signal histories (waveforms).
-They are designed to run many simulations in parallel and while their latencies are quite high, they achieve
-high throughput performance.
+They are designed to run many simulations in parallel and while their latencies are quite high, they can achieve
+high throughput.

 The simulators are not event-based and are not capable of simulating sequential circuits directly.

@ -20,9 +20,13 @@ import numpy as np
				@@ -20,9 +20,13 @@ import numpy as np
 from . import numba, cuda, hr_bytes


-TMAX = np.float32(2 ** 127)  # almost np.PINF for 32-bit floating point values
-TMAX_OVL = np.float32(1.1 * 2 ** 127)  # almost np.PINF with overflow mark
-TMIN = np.float32(-2 ** 127)  # almost np.NINF for 32-bit floating point values
+TMAX = np.float32(2 ** 127)
+"""A large 32-bit floating point value used to mark the end of a waveform."""
+TMAX_OVL = np.float32(1.1 * 2 ** 127)
+"""A large 32-bit floating point value used to mark the end of a waveform that
+may be incomplete due to an overflow."""
+TMIN = np.float32(-2 ** 127)
+"""A large negative 32-bit floating point value used at the beginning of waveforms that start with logic-1."""


 class Heap:
@ -92,7 +96,23 @@ class Heap:
				@@ -92,7 +96,23 @@ class Heap:


 class WaveSim:
-    """A waveform-based combinational logic timing simulator."""
+    """A waveform-based combinational logic timing simulator running on CPU.
+
+    :param circuit: The circuit to simulate.
+    :param timing: The timing annotation of the circuit (see :py:func:`kyupy.sdf.DelayFile.annotation` for details)
+    :param sims: The number of parallel simulations.
+    :param wavecaps: The number of floats available in each waveform. Waveforms are encoding the signal switching
+        history by storing transition times. The waveform capacity roughly corresponds to the number of transitions
+        that can be stored. A capacity of ``n`` can store at least ``n-2`` transitions. If more transitions are
+        generated during simulation, the latest glitch is removed (freeing up two transition times) and an overflow
+        flag is set. If an integer is given, all waveforms are set to that same capacity. With an array of length
+        ``len(circuit.lines)`` the capacity can be controlled for each intermediate waveform individually.
+    :param strip_forks: If enabled, the simulator will not evaluate fork nodes explicitly. This saves simulation time
+        by reducing the number of nodes to simulate, but (interconnect) delay annotations of lines read by fork nodes
+        are ignored.
+    :param keep_waveforms: If disabled, memory of intermediate signal waveforms will be re-used. This greatly reduces
+        memory footprint, but intermediate signal waveforms become unaccessible after a propagation.
+    """
    def __init__(self, circuit, timing, sims=8, wavecaps=16, strip_forks=False, keep_waveforms=True):
        self.circuit = circuit
        self.sims = sims
@ -258,12 +278,24 @@ class WaveSim:
				@@ -258,12 +278,24 @@ class WaveSim:
               f'levels={len(self.level_starts)} mem={hr_bytes(total_mem)}>'

    def get_line_delay(self, line, polarity):
+        """Returns the current delay of the given ``line`` and ``polarity`` in the simulation model."""
        return self.timing[line, 0, polarity]

    def set_line_delay(self, line, polarity, delay):
+        """Sets a new ``delay`` for the given ``line`` and ``polarity`` in the simulation model."""
        self.timing[line, 0, polarity] = delay

    def assign(self, vectors, time=0.0, offset=0):
+        """Assigns new values to the primary inputs and state-elements.
+
+        :param vectors: The values to assign preferably in 8-valued logic. The values are converted to
+            appropriate waveforms with or one transition (``RISE``, ``FALL``) no transitions
+            (``ZERO``, ``ONE``, and others).
+        :type vectors: :py:class:`~kyupy.logic.BPArray`
+        :param time: The transition time of the generated waveforms.
+        :param offset: The offset into the vector set. The vector assigned to the first simulator is
+            ``vectors[offset]``.
+        """
        nvectors = min(len(vectors) - offset, self.sims)
        for i in range(len(self.interface)):
            ppi_loc = self.sat[self.ppi_offset + i, 0]
@ -287,6 +319,12 @@ class WaveSim:
				@@ -287,6 +319,12 @@ class WaveSim:
                self.state[ppi_loc + toggle, p] = TMAX

    def propagate(self, sims=None, sd=0.0, seed=1):
+        """Propagates all waveforms from the (pseudo) primary inputs to the (pseudo) primary outputs.
+
+        :param sims: Number of parallel simulations to execute. If None, all available simulations are performed.
+        :param sd: Standard deviation for injection of random delay variation. Active, if value is positive.
+        :param seed: Random seed for delay variations.
+        """
        sims = min(sims or self.sims, self.sims)
        for op_start, op_stop in zip(self.level_starts, self.level_stops):
            self.overflows += level_eval(self.ops, op_start, op_stop, self.state, self.sat, 0, sims,
@ -294,6 +332,8 @@ class WaveSim:
				@@ -294,6 +332,8 @@ class WaveSim:
        self.lst_eat_valid = False

    def wave(self, line, vector):
+        """Returns the desired waveform from the simulation state. Only valid, if simulator was
+        instanciated with ``keep_waveforms=True``."""
        if line < 0:
            return [TMAX]
        mem, wcap, _ = self.sat[line]
@ -307,7 +347,34 @@ class WaveSim:
				@@ -307,7 +347,34 @@ class WaveSim:
    def wave_ppo(self, o, vector):
        return self.wave(self.ppo_offset + o, vector)

-    def capture(self, time=TMAX, sd=0, seed=1, cdata=None, offset=0):
+    def capture(self, time=TMAX, sd=0.0, seed=1, cdata=None, offset=0):
+        """Simulates a capture operation at all state-elements and primary outputs.
+
+        The capture analyzes the propagated waveforms at and around the given capture time and returns
+        various results for each capture operation.
+
+        :param time: The desired capture time. By default, a capture of the settled value is performed.
+        :param sd: A standard deviation for uncertainty in the actual capture time.
+        :param seed: The random seed for a capture with uncertainty.
+        :param cdata: An array to copy capture data into (optional). See the return value for details.
+        :param offset: An offset into the supplied capture data array.
+        :return: The capture data as numpy array.
+
+            The 3-dimensional capture data array contains for each interface node (axis 0),
+            and each test (axis 1), seven values:
+
+            0. Probability of capturing a 1 at the given capture time (same as next value, if no
+               standard deviation given).
+            1. A capture value decided by random sampling according to above probability and given seed.
+            2. The final value (assume a very late capture time).
+            3. True, if there was a premature capture (capture error), i.e. final value is different
+               from captured value.
+            4. Earliest arrival time. The time at which the output transitioned from its initial value.
+            5. Latest stabilization time. The time at which the output transitioned to its final value.
+            6. Overflow indicator. If non-zero, some signals in the input cone of this output had more
+               transitions than specified in ``wavecaps``. Some transitions have been discarded, the
+               final values in the waveforms are still valid.
+        """
        for i, node in enumerate(self.interface):
            if len(node.ins) == 0: continue
            for p in range(self.sims):
@ -320,6 +387,14 @@ class WaveSim:
				@@ -320,6 +387,14 @@ class WaveSim:
        return self.cdata

    def reassign(self, time=0.0):
+        """Re-assigns the last capture to the appropriate pseudo-primary inputs. Generates a new set of
+        waveforms at the PPIs that start with the previous final value of that PPI, and transitions at the
+        given time to the value captured in a previous simulation. :py:func:`~WaveSim.capture` must be called
+        prior to this function. The final value of each PPI is taken from the randomly sampled concrete logic
+        values in the capture data.
+
+        :param time: The transition time at the inputs (usually 0.0).
+        """
        for i in range(len(self.interface)):
            ppi_loc = self.sat[self.ppi_offset + i, 0]
            ppo_loc = self.sat[self.ppo_offset + i, 0]
@ -544,7 +619,12 @@ def wave_eval(op, state, sat, st_idx, line_times, sd=0.0, seed=0):
				@@ -544,7 +619,12 @@ def wave_eval(op, state, sat, st_idx, line_times, sd=0.0, seed=0):


 class WaveSimCuda(WaveSim):
-    """A GPU-accelerated waveform-based combinational logic timing simulator."""
+    """A GPU-accelerated waveform-based combinational logic timing simulator.
+
+    The API is the same as for :py:class:`WaveSim`.
+    All internal memories are mirrored into GPU memory upon construction.
+    Some operations like access to single waveforms can involve large communication overheads.
+    """
    def __init__(self, circuit, timing, sims=8, wavecaps=16, strip_forks=False, keep_waveforms=True):
        super().__init__(circuit, timing, sims, wavecaps, strip_forks, keep_waveforms)

@ -602,10 +682,10 @@ class WaveSimCuda(WaveSim):
				@@ -602,10 +682,10 @@ class WaveSimCuda(WaveSim):

    def wave(self, line, vector):
        if line < 0:
-            return None
+            return [TMAX]
        mem, wcap, _ = self.sat[line]
        if mem < 0:
-            return None
+            return [TMAX]
        return self.d_state[mem:mem + wcap, vector]

    def capture(self, time=TMAX, sd=0, seed=1, cdata=None, offset=0):