diff --git a/src/kyupy/__init__.py b/src/kyupy/__init__.py index 5b461bf..1dd07d5 100644 --- a/src/kyupy/__init__.py +++ b/src/kyupy/__init__.py @@ -10,6 +10,24 @@ import gzip import numpy as np +_pop_count_lut = np.asarray([bin(x).count('1') for x in range(256)]) + + +def popcount(a): + return np.sum(_pop_count_lut[a]) + + +def readtext(file): + if hasattr(file, 'read'): + return file.read() + if str(file).endswith('.gz'): + with gzip.open(file, 'rt') as f: + return f.read() + else: + with open(file, 'rt') as f: + return f.read() + + def hr_sci(value): multiplier = 0 while abs(value) >= 1000: @@ -18,7 +36,7 @@ def hr_sci(value): while abs(value) < 1: value *= 1000 multiplier -= 1 - return f'{value:.3f}{" kMGTafpnµm"[multiplier]}' + return f'{value:.3f}{" kMGTPEafpnµm"[multiplier]}' def hr_bytes(nbytes): @@ -89,6 +107,10 @@ class Log: log = Log() +# +# Code below mocks basic numba and cuda functions for pure-python fallback. +# + class MockNumba: @staticmethod def njit(func): @@ -113,12 +135,10 @@ class MockCuda: self.func = funcc def __call__(self, *args, **kwargs): - # print(f'device func call {self.func.__name__}') return self.func(*args, **kwargs) def __getitem__(self, item): grid_dim, block_dim = item - # print(f'kernel call {self.func.__name__} grid_dim:{grid_dim} block_dim:{block_dim}') def inner(*args, **kwargs): for grid_x in range(grid_dim[0]): @@ -162,21 +182,3 @@ else: numba = MockNumba() cuda = MockCuda() log.warn('Numba unavailable. Falling back to pure Python.') - - -_pop_count_lut = np.asarray([bin(x).count('1') for x in range(256)]) - - -def popcount(a): - return np.sum(_pop_count_lut[a]) - - -def readtext(file): - if hasattr(file, 'read'): - return file.read() - if str(file).endswith('.gz'): - with gzip.open(file, 'rt') as f: - return f.read() - else: - with open(file, 'rt') as f: - return f.read() diff --git a/src/kyupy/circuit.py b/src/kyupy/circuit.py index 31828b1..63b132a 100644 --- a/src/kyupy/circuit.py +++ b/src/kyupy/circuit.py @@ -75,6 +75,9 @@ class Node: """A list of output connections (:class:`Line` objects). """ + def __index__(self): + return self.index + def __repr__(self): ins = ' '.join([f'<{line.index}' if line is not None else '{line.index}' if line is not None else '>None' for line in self.outs]) @@ -160,6 +163,9 @@ class Line: self.reader = None self.circuit = None + def __index__(self): + return self.index + def __repr__(self): return f'{self.index}' diff --git a/src/kyupy/logic_sim.py b/src/kyupy/logic_sim.py index 484456c..1c27f42 100644 --- a/src/kyupy/logic_sim.py +++ b/src/kyupy/logic_sim.py @@ -1,4 +1,4 @@ -"""A High-Throughput combinational logic simulator. +"""A high-throughput combinational logic simulator. The class :py:class:`~kyupy.logic_sim.LogicSim` performs parallel simulations of the combinational part of a circuit. The logic operations are performed bit-parallel on packed numpy arrays. @@ -31,6 +31,8 @@ class LogicSim: self.sims = sims nbytes = (sims - 1) // 8 + 1 self.interface = list(circuit.interface) + [n for n in circuit.nodes if 'dff' in n.kind.lower()] + self.width = len(self.interface) + """The number of bits in the circuit state (number of ports + number of state-elements).""" self.state = np.zeros((len(circuit.lines), mdim, nbytes), dtype='uint8') self.state_epoch = np.zeros(len(circuit.nodes), dtype='int8') - 1 self.tmp = np.zeros((5, mdim, nbytes), dtype='uint8') diff --git a/src/kyupy/sdf.py b/src/kyupy/sdf.py index 9c4ecfd..d26c6ee 100644 --- a/src/kyupy/sdf.py +++ b/src/kyupy/sdf.py @@ -43,19 +43,29 @@ class DelayFile: Currently, only ABSOLUTE IOPATH and INTERCONNECT delays are supported. Pulse rejection limits are derived from absolute delays, explicit declarations (PATHPULSE etc.) are ignored. - :param circuit: - :param pin_index_f: - :param ffdelays: - :param interconnect: - :type dataset: int or tuple + :param circuit: The circuit to annotate. Names from the STIL file are matched to the node names. + :type circuit: :class:`~kyupy.circuit.Circuit` + :param pin_index_f: A function that returns a pin position by node type and pin name. + :param dataset: SDFs store multiple values for each delay (e.g. minimum, typical, maximum). + An integer selects the dataset to use (default is 1 for 'typical'). + If a tuple is given, the annotator will calculate the average of multiple datasets. + :type dataset: ``int`` or ``tuple`` + :param interconnect: Whether or not to include the delays of interconnects in the annotation. + To properly annotate interconnect delays, the circuit model has to include a '__fork__' node on + every signal and every fanout-branch. The Verilog parser aids in this by setting the parameter + `branchforks=True` in :py:func:`kyupy.verilog.parse`. + :type interconnect: ``bool`` + :param ffdelays: Whether or not to include the delays of flip-flops in the annotation. + :type ffdelays: ``bool`` :return: A 3-dimensional ndarray with timing data. * Axis 0: line index. - * Axis 1: type of timing data: 0=`delay`, 1=`pulse rejection limit`. - * Axis 2: The polarity of the output transition of the reading node: 0=`rising`, 1=`falling`. + * Axis 1: type of timing data: 0='delay', 1='pulse rejection limit'. + * Axis 2: The polarity of the output transition of the reading node: 0='rising', 1='falling'. The polarity for pulse rejection is determined by the latter transition of the pulse. - E.g., timing[42,1,0] is the rejection limit of a negative pulse at the output of the reader of line 42. + E.g., ``timing[42, 1, 0]`` is the rejection limit of a negative pulse at the output + of the reader of line 42. """ def select_del(_delvals, idx): if isinstance(dataset, tuple): diff --git a/src/kyupy/stil.py b/src/kyupy/stil.py index 5faf56b..75bffc2 100644 --- a/src/kyupy/stil.py +++ b/src/kyupy/stil.py @@ -4,7 +4,7 @@ The main purpose of this parser is to load scan pattern sets from STIL files. It supports only a very limited subset of STIL. The functions :py:func:`load` and :py:func:`read` return an intermediate representation (:class:`StilFile` object). -Call :py:func:`StilFile.tests4v`, :py:func:`StilFile.tests8v`, or :py:func:`StilFile.responses4v` to +Call :py:func:`StilFile.tests`, :py:func:`StilFile.tests_loc`, or :py:func:`StilFile.responses` to obtain the appropriate vector sets. """ diff --git a/src/kyupy/wave_sim.py b/src/kyupy/wave_sim.py index d9e95cf..4902f1a 100644 --- a/src/kyupy/wave_sim.py +++ b/src/kyupy/wave_sim.py @@ -1,10 +1,10 @@ -"""High-Throughput combinational logic timing simulators. +"""High-throughput combinational logic timing simulators. These simulators work similarly to :py:class:`~kyupy.logic_sim.LogicSim`. They propagate values through the combinational circuit from (pseudo) primary inputs to (pseudo) primary outputs. Instead of propagating logic values, these simulators propagate signal histories (waveforms). -They are designed to run many simulations in parallel and while their latencies are quite high, they achieve -high throughput performance. +They are designed to run many simulations in parallel and while their latencies are quite high, they can achieve +high throughput. The simulators are not event-based and are not capable of simulating sequential circuits directly. @@ -20,9 +20,13 @@ import numpy as np from . import numba, cuda, hr_bytes -TMAX = np.float32(2 ** 127) # almost np.PINF for 32-bit floating point values -TMAX_OVL = np.float32(1.1 * 2 ** 127) # almost np.PINF with overflow mark -TMIN = np.float32(-2 ** 127) # almost np.NINF for 32-bit floating point values +TMAX = np.float32(2 ** 127) +"""A large 32-bit floating point value used to mark the end of a waveform.""" +TMAX_OVL = np.float32(1.1 * 2 ** 127) +"""A large 32-bit floating point value used to mark the end of a waveform that +may be incomplete due to an overflow.""" +TMIN = np.float32(-2 ** 127) +"""A large negative 32-bit floating point value used at the beginning of waveforms that start with logic-1.""" class Heap: @@ -92,7 +96,23 @@ class Heap: class WaveSim: - """A waveform-based combinational logic timing simulator.""" + """A waveform-based combinational logic timing simulator running on CPU. + + :param circuit: The circuit to simulate. + :param timing: The timing annotation of the circuit (see :py:func:`kyupy.sdf.DelayFile.annotation` for details) + :param sims: The number of parallel simulations. + :param wavecaps: The number of floats available in each waveform. Waveforms are encoding the signal switching + history by storing transition times. The waveform capacity roughly corresponds to the number of transitions + that can be stored. A capacity of ``n`` can store at least ``n-2`` transitions. If more transitions are + generated during simulation, the latest glitch is removed (freeing up two transition times) and an overflow + flag is set. If an integer is given, all waveforms are set to that same capacity. With an array of length + ``len(circuit.lines)`` the capacity can be controlled for each intermediate waveform individually. + :param strip_forks: If enabled, the simulator will not evaluate fork nodes explicitly. This saves simulation time + by reducing the number of nodes to simulate, but (interconnect) delay annotations of lines read by fork nodes + are ignored. + :param keep_waveforms: If disabled, memory of intermediate signal waveforms will be re-used. This greatly reduces + memory footprint, but intermediate signal waveforms become unaccessible after a propagation. + """ def __init__(self, circuit, timing, sims=8, wavecaps=16, strip_forks=False, keep_waveforms=True): self.circuit = circuit self.sims = sims @@ -258,12 +278,24 @@ class WaveSim: f'levels={len(self.level_starts)} mem={hr_bytes(total_mem)}>' def get_line_delay(self, line, polarity): + """Returns the current delay of the given ``line`` and ``polarity`` in the simulation model.""" return self.timing[line, 0, polarity] def set_line_delay(self, line, polarity, delay): + """Sets a new ``delay`` for the given ``line`` and ``polarity`` in the simulation model.""" self.timing[line, 0, polarity] = delay def assign(self, vectors, time=0.0, offset=0): + """Assigns new values to the primary inputs and state-elements. + + :param vectors: The values to assign preferably in 8-valued logic. The values are converted to + appropriate waveforms with or one transition (``RISE``, ``FALL``) no transitions + (``ZERO``, ``ONE``, and others). + :type vectors: :py:class:`~kyupy.logic.BPArray` + :param time: The transition time of the generated waveforms. + :param offset: The offset into the vector set. The vector assigned to the first simulator is + ``vectors[offset]``. + """ nvectors = min(len(vectors) - offset, self.sims) for i in range(len(self.interface)): ppi_loc = self.sat[self.ppi_offset + i, 0] @@ -287,6 +319,12 @@ class WaveSim: self.state[ppi_loc + toggle, p] = TMAX def propagate(self, sims=None, sd=0.0, seed=1): + """Propagates all waveforms from the (pseudo) primary inputs to the (pseudo) primary outputs. + + :param sims: Number of parallel simulations to execute. If None, all available simulations are performed. + :param sd: Standard deviation for injection of random delay variation. Active, if value is positive. + :param seed: Random seed for delay variations. + """ sims = min(sims or self.sims, self.sims) for op_start, op_stop in zip(self.level_starts, self.level_stops): self.overflows += level_eval(self.ops, op_start, op_stop, self.state, self.sat, 0, sims, @@ -294,6 +332,8 @@ class WaveSim: self.lst_eat_valid = False def wave(self, line, vector): + """Returns the desired waveform from the simulation state. Only valid, if simulator was + instanciated with ``keep_waveforms=True``.""" if line < 0: return [TMAX] mem, wcap, _ = self.sat[line] @@ -307,7 +347,34 @@ class WaveSim: def wave_ppo(self, o, vector): return self.wave(self.ppo_offset + o, vector) - def capture(self, time=TMAX, sd=0, seed=1, cdata=None, offset=0): + def capture(self, time=TMAX, sd=0.0, seed=1, cdata=None, offset=0): + """Simulates a capture operation at all state-elements and primary outputs. + + The capture analyzes the propagated waveforms at and around the given capture time and returns + various results for each capture operation. + + :param time: The desired capture time. By default, a capture of the settled value is performed. + :param sd: A standard deviation for uncertainty in the actual capture time. + :param seed: The random seed for a capture with uncertainty. + :param cdata: An array to copy capture data into (optional). See the return value for details. + :param offset: An offset into the supplied capture data array. + :return: The capture data as numpy array. + + The 3-dimensional capture data array contains for each interface node (axis 0), + and each test (axis 1), seven values: + + 0. Probability of capturing a 1 at the given capture time (same as next value, if no + standard deviation given). + 1. A capture value decided by random sampling according to above probability and given seed. + 2. The final value (assume a very late capture time). + 3. True, if there was a premature capture (capture error), i.e. final value is different + from captured value. + 4. Earliest arrival time. The time at which the output transitioned from its initial value. + 5. Latest stabilization time. The time at which the output transitioned to its final value. + 6. Overflow indicator. If non-zero, some signals in the input cone of this output had more + transitions than specified in ``wavecaps``. Some transitions have been discarded, the + final values in the waveforms are still valid. + """ for i, node in enumerate(self.interface): if len(node.ins) == 0: continue for p in range(self.sims): @@ -320,6 +387,14 @@ class WaveSim: return self.cdata def reassign(self, time=0.0): + """Re-assigns the last capture to the appropriate pseudo-primary inputs. Generates a new set of + waveforms at the PPIs that start with the previous final value of that PPI, and transitions at the + given time to the value captured in a previous simulation. :py:func:`~WaveSim.capture` must be called + prior to this function. The final value of each PPI is taken from the randomly sampled concrete logic + values in the capture data. + + :param time: The transition time at the inputs (usually 0.0). + """ for i in range(len(self.interface)): ppi_loc = self.sat[self.ppi_offset + i, 0] ppo_loc = self.sat[self.ppo_offset + i, 0] @@ -544,7 +619,12 @@ def wave_eval(op, state, sat, st_idx, line_times, sd=0.0, seed=0): class WaveSimCuda(WaveSim): - """A GPU-accelerated waveform-based combinational logic timing simulator.""" + """A GPU-accelerated waveform-based combinational logic timing simulator. + + The API is the same as for :py:class:`WaveSim`. + All internal memories are mirrored into GPU memory upon construction. + Some operations like access to single waveforms can involve large communication overheads. + """ def __init__(self, circuit, timing, sims=8, wavecaps=16, strip_forks=False, keep_waveforms=True): super().__init__(circuit, timing, sims, wavecaps, strip_forks, keep_waveforms) @@ -602,10 +682,10 @@ class WaveSimCuda(WaveSim): def wave(self, line, vector): if line < 0: - return None + return [TMAX] mem, wcap, _ = self.sat[line] if mem < 0: - return None + return [TMAX] return self.d_state[mem:mem + wcap, vector] def capture(self, time=TMAX, sd=0, seed=1, cdata=None, offset=0):