From c9445f2d79d81f404688c68f71fb63802ca64f31 Mon Sep 17 00:00:00 2001 From: Stefan Holst Date: Sat, 16 Jan 2021 14:48:27 +0900 Subject: [PATCH] Docs, __index__, fault injection and TechLib - Documentation improvements - Node and Line objects now provide __index__ - LogicSim cleanup and improvements (inject_cb, cycle, ...) - Introduce TechLib class to organize tech-specific info - More human-readable output - De-linting --- Demo.ipynb | 24 ++-- LICENSE.txt | 2 +- docs/conf.py | 4 +- docs/index.rst | 1 + docs/miscellaneous.rst | 10 ++ docs/simulators.rst | 3 + setup.py | 3 +- src/kyupy/__init__.py | 141 +++++++++++++++---- src/kyupy/bench.py | 14 +- src/kyupy/circuit.py | 41 +++--- src/kyupy/logic.py | 244 ++++++++++++++++++++------------ src/kyupy/logic_sim.py | 292 +++++++++++++++++--------------------- src/kyupy/saed.py | 289 -------------------------------------- src/kyupy/sdf.py | 76 +++++----- src/kyupy/stil.py | 36 ++--- src/kyupy/techlib.py | 301 ++++++++++++++++++++++++++++++++++++++++ src/kyupy/verilog.py | 52 ++++--- src/kyupy/wave_sim.py | 157 ++++++++++++++++----- tests/test_bench.py | 4 +- tests/test_logic_sim.py | 4 +- tests/test_sdf.py | 19 ++- tests/test_stil.py | 7 +- tests/test_wave_sim.py | 49 ++++--- 23 files changed, 1002 insertions(+), 771 deletions(-) create mode 100644 docs/miscellaneous.rst delete mode 100644 src/kyupy/saed.py create mode 100644 src/kyupy/techlib.py diff --git a/Demo.ipynb b/Demo.ipynb index 288f1bd..805c60c 100644 --- a/Demo.ipynb +++ b/Demo.ipynb @@ -44,7 +44,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 2, @@ -64,7 +64,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 3, @@ -362,7 +362,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 13, @@ -445,7 +445,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 15, @@ -489,11 +489,11 @@ "\n", "for cell in b14.topological_order():\n", " if 'DFF' in cell.kind or 'input' == cell.kind:\n", - " levels[cell.index] = 0\n", + " levels[cell] = 0\n", " elif '__fork__' == cell.kind:\n", - " levels[cell.index] = levels[cell.ins[0].driver.index] # forks only have exactly one driver\n", + " levels[cell] = levels[cell.ins[0].driver] # forks only have exactly one driver\n", " else:\n", - " levels[cell.index] = max([levels[line.driver.index] for line in cell.ins]) + 1\n", + " levels[cell] = max([levels[line.driver] for line in cell.ins]) + 1\n", " \n", "print(f'Maximum logic depth: {np.max(levels)}')" ] @@ -591,7 +591,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 19, @@ -697,7 +697,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 23, @@ -829,7 +829,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 29, @@ -962,10 +962,9 @@ "outputs": [], "source": [ "from kyupy import sdf\n", - "from kyupy.saed import pin_index\n", "\n", "df = sdf.load('tests/b14.sdf.gz')\n", - "lt = df.annotation(b14, pin_index, dataset=0, interconnect=False)" + "lt = df.annotation(b14, dataset=0, interconnect=False)" ] }, { @@ -1118,6 +1117,7 @@ "metadata": {}, "source": [ "The capture data contains for each PI, PO, and scan flip-flop (axis 0), and each test (axis 1) seven values:\n", + "\n", "0. Probability of capturing a 1 at the given capture time (same as next value, if no standard deviation given).\n", "1. A capture value decided by random sampling according to above probability.\n", "2. The final value (assume a very late capture time).\n", diff --git a/LICENSE.txt b/LICENSE.txt index 1e4a002..293fa79 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2020 s-holst +Copyright (c) 2020-2021 Stefan Holst Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/docs/conf.py b/docs/conf.py index cb2e436..540783b 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -20,11 +20,11 @@ sys.path.insert(0, os.path.abspath('../src')) # -- Project information ----------------------------------------------------- project = 'KyuPy' -copyright = '2020, Stefan Holst' +copyright = '2020-2021, Stefan Holst' author = 'Stefan Holst' # The full version, including alpha/beta/rc tags -release = '0.0.2' +release = '0.0.3' # -- General configuration --------------------------------------------------- diff --git a/docs/index.rst b/docs/index.rst index 3caa343..3fc74e8 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -9,4 +9,5 @@ API Reference datastructures parsers simulators + miscellaneous diff --git a/docs/miscellaneous.rst b/docs/miscellaneous.rst new file mode 100644 index 0000000..fff469f --- /dev/null +++ b/docs/miscellaneous.rst @@ -0,0 +1,10 @@ +Miscellaneous +============= + +.. automodule:: kyupy + :members: + +.. automodule:: kyupy.techlib + :members: + + diff --git a/docs/simulators.rst b/docs/simulators.rst index 8d5f6b6..bcc0ea4 100644 --- a/docs/simulators.rst +++ b/docs/simulators.rst @@ -4,6 +4,8 @@ Simulators Logic Simulation - :mod:`kyupy.logic_sim` ----------------------------------------- +.. automodule:: kyupy.logic_sim + .. autoclass:: kyupy.logic_sim.LogicSim :members: @@ -12,6 +14,7 @@ Timing Simulation - :mod:`kyupy.wave_sim` ----------------------------------------- .. automodule:: kyupy.wave_sim + :members: TMAX, TMAX_OVL, TMIN .. autoclass:: kyupy.wave_sim.WaveSim :members: diff --git a/setup.py b/setup.py index 9a0bb1b..efb49ec 100644 --- a/setup.py +++ b/setup.py @@ -5,9 +5,10 @@ with open('README.rst', 'r') as f: setup( name='kyupy', - version='0.0.2', + version='0.0.3', description='High-performance processing and analysis of non-hierarchical VLSI designs', long_description=long_description, + long_description_content_type='text/x-rst', packages=find_packages(where='src'), package_dir={'': 'src'}, url='https://github.com/s-holst/kyupy', diff --git a/src/kyupy/__init__.py b/src/kyupy/__init__.py index d1bb8db..8bbfc9f 100644 --- a/src/kyupy/__init__.py +++ b/src/kyupy/__init__.py @@ -1,6 +1,8 @@ """A package for processing and analysis of non-hierarchical gate-level VLSI designs. -It contains fundamental building blocks for research software in the fields of VLSI test, diagnosis and reliability. +The kyupy package itself contains a logger and other simple utility functions. +In addition, it defines a ``numba`` and a ``cuda`` objects that point to the actual packages +if they are available and otherwise point to mocks. """ import time @@ -10,10 +12,78 @@ import gzip import numpy as np +_pop_count_lut = np.asarray([bin(x).count('1') for x in range(256)]) + + +def popcount(a): + """Returns the number of 1-bits in a given packed numpy array.""" + return np.sum(_pop_count_lut[a]) + + +def readtext(file): + """Reads and returns the text in a given file. Transparently decompresses \\*.gz files.""" + if hasattr(file, 'read'): + return file.read() + if str(file).endswith('.gz'): + with gzip.open(file, 'rt') as f: + return f.read() + else: + with open(file, 'rt') as f: + return f.read() + + +def hr_sci(value): + """Formats a value in a human-readible scientific notation.""" + multiplier = 0 + while abs(value) >= 1000: + value /= 1000 + multiplier += 1 + while abs(value) < 1: + value *= 1000 + multiplier -= 1 + return f'{value:.3f}{" kMGTPEafpnµm"[multiplier]}' + + +def hr_bytes(nbytes): + """Formats a given number of bytes for human readability.""" + multiplier = 0 + while abs(nbytes) >= 1000: + nbytes /= 1024 + multiplier += 1 + return f'{nbytes:.1f}{["", "ki", "Mi", "Gi", "Ti", "Pi"][multiplier]}B' + + +def hr_time(seconds): + """Formats a given time interval for human readability.""" + s = '' + if seconds >= 86400: + d = seconds // 86400 + seconds -= d * 86400 + s += f'{int(d)}d' + if seconds >= 3600: + h = seconds // 3600 + seconds -= h * 3600 + s += f'{int(h)}h' + if seconds >= 60: + m = seconds // 60 + seconds -= m * 60 + if 'd' not in s: + s += f'{int(m)}m' + if 'h' not in s and 'd' not in s: + s += f'{int(seconds)}s' + return s + + class Log: + """A very simple logger that formats the messages with the number of seconds since + program start. + """ def __init__(self): self.start = time.perf_counter() self.logfile = None + """When set to a file handle, log messages are written to it instead to standard output. + After each write, ``flush()`` is called as well. + """ def log(self, level, message): t = time.perf_counter() - self.start @@ -23,15 +93,45 @@ class Log: self.logfile.write(f'{t:011.3f} {level} {message}\n') self.logfile.flush() - def info(self, message): self.log('-', message) - - def warn(self, message): self.log('W', message) - - def error(self, message): self.log('E', message) + def info(self, message): + """Log an informational message.""" + self.log('-', message) + + def warn(self, message): + """Log a warning message.""" + self.log('W', message) + + def error(self, message): + """Log an error message.""" + self.log('E', message) + + def range(self, *args): + """A generator that operates just like the ``range()`` built-in, and also occasionally logs the progress + and compute time estimates.""" + elems = len(range(*args)) + start_time = time.perf_counter() + lastlog_time = start_time + log_interval = 5 + for elem, i in enumerate(range(*args)): + yield i + current_time = time.perf_counter() + if current_time > lastlog_time + log_interval: + done = (elem + 1) / elems + elapsed_time = current_time - start_time + total_time = elapsed_time / done + rem_time = total_time - elapsed_time + self.log(':', f'{done*100:.0f}% done {hr_time(elapsed_time)} elapsed {hr_time(rem_time)} remaining') + log_interval = min(600, int(log_interval*1.5)) + lastlog_time = current_time log = Log() +"""The standard logger instance.""" + +# +# Code below mocks basic numba and cuda functions for pure-python fallback. +# class MockNumba: @staticmethod @@ -52,17 +152,15 @@ class MockCuda: outer = self def make_launcher(func): - class Launcher(object): + class Launcher: def __init__(self, funcc): self.func = funcc def __call__(self, *args, **kwargs): - # print(f'device func call {self.func.__name__}') return self.func(*args, **kwargs) def __getitem__(self, item): grid_dim, block_dim = item - # print(f'kernel call {self.func.__name__} grid_dim:{grid_dim} block_dim:{block_dim}') def inner(*args, **kwargs): for grid_x in range(grid_dim[0]): @@ -104,23 +202,12 @@ if importlib.util.find_spec('numba') is not None: cuda = MockCuda() else: numba = MockNumba() + """If Numba is available on the system, it is the actual ``numba`` package. + Otherwise, it simply defines an ``njit`` decorator that does nothing. + """ cuda = MockCuda() + """If Numba is installed and Cuda GPUs are available, it is the actual ``numba.cuda`` package. + Otherwise, it is an object that defines basic methods and decorators so that cuda-code can still + run in the Python interpreter. + """ log.warn('Numba unavailable. Falling back to pure Python.') - - -_pop_count_lut = np.asarray([bin(x).count('1') for x in range(256)]) - - -def popcount(a): - return np.sum(_pop_count_lut[a]) - - -def readtext(file): - if hasattr(file, 'read'): - return file.read() - if str(file).endswith('.gz'): - with gzip.open(file, 'rt') as f: - return f.read() - else: - with open(file, 'rt') as f: - return f.read() diff --git a/src/kyupy/bench.py b/src/kyupy/bench.py index 7ec1e1e..21310d5 100644 --- a/src/kyupy/bench.py +++ b/src/kyupy/bench.py @@ -14,25 +14,25 @@ from . import readtext class BenchTransformer(Transformer): - + def __init__(self, name): super().__init__() self.c = Circuit(name) - + def start(self, _): return self.c - + def parameters(self, args): return [self.c.get_or_add_fork(name) for name in args] - + def interface(self, args): self.c.interface.extend(args[0]) def assignment(self, args): name, cell_type, drivers = args cell = Node(self.c, str(name), str(cell_type)) Line(self.c, cell, self.c.get_or_add_fork(str(name))) - [Line(self.c, d, cell) for d in drivers] + for d in drivers: Line(self.c, d, cell) -grammar = r""" +GRAMMAR = r""" start: (statement)* statement: input | output | assignment input: ("INPUT" | "input") parameters -> interface @@ -51,7 +51,7 @@ def parse(text, name=None): :param name: The name of the circuit. Circuit names are not included in bench descriptions. :return: A :class:`Circuit` object. """ - return Lark(grammar, parser="lalr", transformer=BenchTransformer(name)).parse(text) + return Lark(GRAMMAR, parser="lalr", transformer=BenchTransformer(name)).parse(text) def load(file, name=None): diff --git a/src/kyupy/circuit.py b/src/kyupy/circuit.py index 84cc96c..5801f0c 100644 --- a/src/kyupy/circuit.py +++ b/src/kyupy/circuit.py @@ -53,7 +53,7 @@ class Node: """ self.kind = kind """A string describing the type of the node. - + Common types are the names from a standard cell library or general gate names like 'AND' or 'NOR'. If :py:attr:`kind` is set to '__fork__', it receives special treatment. A `fork` describes a named signal or a fan-out point in the circuit and not a physical `cell` like a gate. @@ -75,6 +75,9 @@ class Node: """A list of output connections (:class:`Line` objects). """ + def __index__(self): + return self.index + def __repr__(self): ins = ' '.join([f'<{line.index}' if line is not None else '{line.index}' if line is not None else '>None' for line in self.outs]) @@ -130,7 +133,7 @@ class Line: """ self.driver_pin = driver[1] """The output pin position of the driver node this line is connected to. - + This is the position in the outs-list of the driving node this line referenced from: :code:`self.driver.outs[self.driver_pin] == self`. """ @@ -160,6 +163,9 @@ class Line: self.reader = None self.circuit = None + def __index__(self): + return self.index + def __repr__(self): return f'{self.index}' @@ -187,17 +193,17 @@ class Circuit: """ self.nodes = IndexList() """A list of all :class:`Node` objects contained in the circuit. - + The position of a node in this list equals its index :code:`self.nodes[42].index == 42`. """ self.lines = IndexList() """A list of all :class:`Line` objects contained in the circuit. - + The position of a line in this list equals its index :code:`self.lines[42].index == 42`. """ self.interface = GrowingList() """A list of nodes that are designated as primary input- or output-ports. - + Port-nodes are contained in :py:attr:`nodes` as well as :py:attr:`interface`. The position of a node in the interface list corresponds to positions of logic values in test vectors. The port direction is not stored explicitly. @@ -213,7 +219,7 @@ class Circuit: def get_or_add_fork(self, name): return self.forks[name] if name in self.forks else Node(self, name) - + def copy(self): """Returns a deep copy of the circuit. """ @@ -231,7 +237,7 @@ class Circuit: n = c.cells[node.name] c.interface.append(n) return c - + def dump(self): """Returns a string representation of the circuit and all its nodes. """ @@ -239,8 +245,9 @@ class Circuit: return header + '\n'.join([str(n) for n in self.nodes]) def __repr__(self): - name = f" '{self.name}'" if self.name else '' - return f'' + name = f' {self.name}' if self.name else '' + return f'' def topological_order(self): """Generator function to iterate over all nodes in topological order. @@ -255,8 +262,8 @@ class Circuit: for line in n.outs: if line is None: continue succ = line.reader - visit_count[succ.index] += 1 - if visit_count[succ.index] == len(succ.ins) and 'DFF' not in succ.kind: + visit_count[succ] += 1 + if visit_count[succ] == len(succ.ins) and 'DFF' not in succ.kind: queue.append(succ) yield n @@ -280,8 +287,8 @@ class Circuit: n = queue.popleft() for line in n.ins: pred = line.driver - visit_count[pred.index] += 1 - if visit_count[pred.index] == len(pred.outs) and 'DFF' not in pred.kind: + visit_count[pred] += 1 + if visit_count[pred] == len(pred.outs) and 'DFF' not in pred.kind: queue.append(pred) yield n @@ -292,13 +299,13 @@ class Circuit: """ marks = [False] * len(self.nodes) for n in origin_nodes: - marks[n.index] = True + marks[n] = True for n in self.reversed_topological_order(): - if not marks[n.index]: + if not marks[n]: for line in n.outs: if line is not None: - marks[n.index] |= marks[line.reader.index] - if marks[n.index]: + marks[n] |= marks[line.reader] + if marks[n]: yield n def fanout_free_regions(self): diff --git a/src/kyupy/logic.py b/src/kyupy/logic.py index d30fd55..7b0c149 100644 --- a/src/kyupy/logic.py +++ b/src/kyupy/logic.py @@ -25,7 +25,7 @@ from collections.abc import Iterable import numpy as np -from . import numba +from . import numba, hr_bytes ZERO = 0b000 @@ -58,6 +58,12 @@ on a signal. ``'N'``, ``'n'``, and ``'v'`` are interpreted as ``NPULSE``. def interpret(value): + """Converts characters, strings, and lists of them to lists of logic constants defined above. + + :param value: A character (string of length 1), Boolean, Integer, None, or Iterable. + Iterables (such as strings) are traversed and their individual characters are interpreted. + :return: A logic constant or a (possibly multi-dimensional) list of logic constants. + """ if isinstance(value, Iterable) and not (isinstance(value, str) and len(value) == 1): return list(map(interpret, value)) if value in [0, '0', False, 'L', 'l']: @@ -85,6 +91,79 @@ def bit_in(a, pos): return a[pos >> 3] & _bit_in_lut[pos & 7] +class MVArray: + """An n-dimensional array of m-valued logic values. + + This class wraps a numpy.ndarray of type uint8 and adds support for encoding and + interpreting 2-valued, 4-valued, and 8-valued logic values. + Each logic value is stored as an uint8, manipulations of individual values are cheaper than in + :py:class:`BPArray`. + + :param a: If a tuple is given, it is interpreted as desired shape. To make an array of ``n`` vectors + compatible with a simulator ``sim``, use ``(len(sim.interface), n)``. If a :py:class:`BPArray` or + :py:class:`MVArray` is given, a deep copy is made. If a string, a list of strings, a list of characters, + or a list of lists of characters are given, the data is interpreted best-effort and the array is + initialized accordingly. + :param m: The arity of the logic. Can be set to 2, 4, or 8. If None is given, the arity of a given + :py:class:`BPArray` or :py:class:`MVArray` is used, or, if the array is initialized differently, 8 is used. + """ + + def __init__(self, a, m=None): + self.m = m or 8 + assert self.m in [2, 4, 8] + + # Try our best to interpret given a. + if isinstance(a, MVArray): + self.data = a.data.copy() + """The wrapped 2-dimensional ndarray of logic values. + + * Axis 0 is PI/PO/FF position, the length of this axis is called "width". + * Axis 1 is vector/pattern, the length of this axis is called "length". + """ + self.m = m or a.m + elif hasattr(a, 'data'): # assume it is a BPArray. Can't use isinstance() because BPArray isn't declared yet. + self.data = np.zeros((a.width, a.length), dtype=np.uint8) + self.m = m or a.m + for i in range(a.data.shape[-2]): + self.data[...] <<= 1 + self.data[...] |= np.unpackbits(a.data[..., -i-1, :], axis=1)[:, :a.length] + if a.data.shape[-2] == 1: + self.data *= 3 + elif isinstance(a, int): + self.data = np.full((a, 1), UNASSIGNED, dtype=np.uint8) + elif isinstance(a, tuple): + self.data = np.full(a, UNASSIGNED, dtype=np.uint8) + else: + if isinstance(a, str): a = [a] + self.data = np.asarray(interpret(a), dtype=np.uint8) + self.data = self.data[:, np.newaxis] if self.data.ndim == 1 else np.moveaxis(self.data, -2, -1) + + # Cast data to m-valued logic. + if self.m == 2: + self.data[...] = ((self.data & 0b001) & ((self.data >> 1) & 0b001) | (self.data == RISE)) * ONE + elif self.m == 4: + self.data[...] = (self.data & 0b011) & ((self.data != FALL) * ONE) | ((self.data == RISE) * ONE) + elif self.m == 8: + self.data[...] = self.data & 0b111 + + self.length = self.data.shape[-1] + self.width = self.data.shape[-2] + + def __repr__(self): + return f'' + + def __str__(self): + return str([self[idx] for idx in range(self.length)]) + + def __getitem__(self, vector_idx): + """Returns a string representing the desired vector.""" + chars = ["0", "X", "-", "1", "P", "R", "F", "N"] + return ''.join(chars[v] for v in self.data[:, vector_idx]) + + def __len__(self): + return self.length + + def mv_cast(*args, m=8): return [a if isinstance(a, MVArray) else MVArray(a, m=m) for a in args] @@ -100,6 +179,13 @@ def _mv_not(m, out, inp): def mv_not(x1, out=None): + """A multi-valued NOT operator. + + :param x1: An :py:class:`MVArray` or data the :py:class:`MVArray` constructor accepts. + :param out: Optionally an :py:class:`MVArray` as storage destination. If None, a new :py:class:`MVArray` + is returned. + :return: An :py:class:`MVArray` with the result. + """ m = mv_getm(x1) x1 = mv_cast(x1, m=m)[0] out = out or MVArray(x1.data.shape, m=m) @@ -125,6 +211,14 @@ def _mv_or(m, out, *ins): def mv_or(x1, x2, out=None): + """A multi-valued OR operator. + + :param x1: An :py:class:`MVArray` or data the :py:class:`MVArray` constructor accepts. + :param x2: An :py:class:`MVArray` or data the :py:class:`MVArray` constructor accepts. + :param out: Optionally an :py:class:`MVArray` as storage destination. If None, a new :py:class:`MVArray` + is returned. + :return: An :py:class:`MVArray` with the result. + """ m = mv_getm(x1, x2) x1, x2 = mv_cast(x1, x2, m=m) out = out or MVArray(np.broadcast(x1.data, x2.data).shape, m=m) @@ -151,6 +245,14 @@ def _mv_and(m, out, *ins): def mv_and(x1, x2, out=None): + """A multi-valued AND operator. + + :param x1: An :py:class:`MVArray` or data the :py:class:`MVArray` constructor accepts. + :param x2: An :py:class:`MVArray` or data the :py:class:`MVArray` constructor accepts. + :param out: Optionally an :py:class:`MVArray` as storage destination. If None, a new :py:class:`MVArray` + is returned. + :return: An :py:class:`MVArray` with the result. + """ m = mv_getm(x1, x2) x1, x2 = mv_cast(x1, x2, m=m) out = out or MVArray(np.broadcast(x1.data, x2.data).shape, m=m) @@ -174,6 +276,14 @@ def _mv_xor(m, out, *ins): def mv_xor(x1, x2, out=None): + """A multi-valued XOR operator. + + :param x1: An :py:class:`MVArray` or data the :py:class:`MVArray` constructor accepts. + :param x2: An :py:class:`MVArray` or data the :py:class:`MVArray` constructor accepts. + :param out: Optionally an :py:class:`MVArray` as storage destination. If None, a new :py:class:`MVArray` + is returned. + :return: An :py:class:`MVArray` with the result. + """ m = mv_getm(x1, x2) x1, x2 = mv_cast(x1, x2, m=m) out = out or MVArray(np.broadcast(x1.data, x2.data).shape, m=m) @@ -182,6 +292,16 @@ def mv_xor(x1, x2, out=None): def mv_transition(init, final, out=None): + """Computes the logic transitions from the initial values of ``init`` to the final values of ``final``. + Pulses in the input data are ignored. If any of the inputs are ``UNKNOWN``, the result is ``UNKNOWN``. + If both inputs are ``UNASSIGNED``, the result is ``UNASSIGNED``. + + :param init: An :py:class:`MVArray` or data the :py:class:`MVArray` constructor accepts. + :param final: An :py:class:`MVArray` or data the :py:class:`MVArray` constructor accepts. + :param out: Optionally an :py:class:`MVArray` as storage destination. If None, a new :py:class:`MVArray` + is returned. + :return: An :py:class:`MVArray` with the result. + """ m = mv_getm(init, final) init, final = mv_cast(init, final, m=m) init = init.data @@ -196,65 +316,46 @@ def mv_transition(init, final, out=None): return out -class MVArray: - """An n-dimensional array of m-valued logic values. - - This class wraps a numpy.ndarray of type uint8 and adds support for encoding and - interpreting 2-valued, 4-valued, and 8-valued logic values. - Each logic value is stored as an uint8, value manipulations are cheaper than in BPArray. - - An MVArray always has 2 axes: +class BPArray: + """An n-dimensional array of m-valued logic values that uses bit-parallel storage. - * Axis 0 is PI/PO/FF position, the length of this axis is called "width". - * Axis 1 is vector/pattern, the length of this axis is called "length". + The primary use of this format is in aiding efficient bit-parallel logic simulation. + The secondary benefit over :py:class:`MVArray` is its memory efficiency. + Accessing individual values is more expensive than with :py:class:`MVArray`. + Therefore it may be more efficient to unpack the data into an :py:class:`MVArray` and pack it again into a + :py:class:`BPArray` for simulation. + See :py:class:`MVArray` for constructor parameters. """ def __init__(self, a, m=None): - self.m = m or 8 - assert self.m in [2, 4, 8] - - # Try our best to interpret given a. + if not isinstance(a, MVArray) and not isinstance(a, BPArray): + a = MVArray(a, m) + self.m = a.m if isinstance(a, MVArray): - self.data = a.data.copy() - self.m = m or a.m - elif hasattr(a, 'data'): # assume it is a BPArray. Can't use isinstance() because BPArray isn't declared yet. - self.data = np.zeros((a.width, a.length), dtype=np.uint8) - self.m = m or a.m - for i in range(a.data.shape[-2]): - self.data[...] <<= 1 - self.data[...] |= np.unpackbits(a.data[..., -i-1, :], axis=1)[:, :a.length] - if a.data.shape[-2] == 1: - self.data *= 3 - elif isinstance(a, int): - self.data = np.full((a, 1), UNASSIGNED, dtype=np.uint8) - elif isinstance(a, tuple): - self.data = np.full(a, UNASSIGNED, dtype=np.uint8) - else: - if isinstance(a, str): a = [a] - self.data = np.asarray(interpret(a), dtype=np.uint8) - self.data = self.data[:, np.newaxis] if self.data.ndim == 1 else np.moveaxis(self.data, -2, -1) - - # Cast data to m-valued logic. - if self.m == 2: - self.data[...] = ((self.data & 0b001) & ((self.data >> 1) & 0b001) | (self.data == RISE)) * ONE - elif self.m == 4: - self.data[...] = (self.data & 0b011) & ((self.data != FALL) * ONE) | ((self.data == RISE) * ONE) - elif self.m == 8: - self.data[...] = self.data & 0b111 + if m is not None and m != a.m: + a = MVArray(a, m) # cast data + self.m = a.m + assert self.m in [2, 4, 8] + nwords = math.ceil(math.log2(self.m)) + nbytes = (a.data.shape[-1] - 1) // 8 + 1 + self.data = np.zeros(a.data.shape[:-1] + (nwords, nbytes), dtype=np.uint8) + """The wrapped 3-dimensional ndarray. - self.length = self.data.shape[-1] - self.width = self.data.shape[-2] + * Axis 0 is PI/PO/FF position, the length of this axis is called "width". + * Axis 1 has length ``ceil(log2(m))`` for storing all bits. + * Axis 2 are the vectors/patterns packed into uint8 words. + """ + for i in range(self.data.shape[-2]): + self.data[..., i, :] = np.packbits((a.data >> i) & 1, axis=-1) + else: # we have a BPArray + self.data = a.data.copy() # TODO: support conversion to different m + self.m = a.m + self.length = a.length + self.width = a.width def __repr__(self): - return f'' - - def __str__(self): - return str([self[idx] for idx in range(self.length)]) - - def __getitem__(self, vector_idx): - chars = ["0", "X", "-", "1", "P", "R", "F", "N"] - return ''.join(chars[v] for v in self.data[:, vector_idx]) + return f'' def __len__(self): return self.length @@ -359,44 +460,3 @@ def bp_xor(out, *ins): out[..., 0, :] |= any_unknown out[..., 1, :] &= ~any_unknown out[..., 2, :] &= ~any_unknown - - -class BPArray: - """An n-dimensional array of m-valued logic values that uses bit-parallel storage. - - The primary use of this format is in aiding efficient bit-parallel logic simulation. - The secondary benefit over MVArray is its memory efficiency. - Accessing individual values is more expensive than with :py:class:`MVArray`. - It is advised to first construct a MVArray, pack it into a :py:class:`BPArray` for simulation and unpack the results - back into a :py:class:`MVArray` for value access. - - The values along the last axis (vectors/patterns) are packed into uint8 words. - The second-last axis has length ceil(log2(m)) for storing all bits. - All other axes stay the same as in MVArray. - """ - - def __init__(self, a, m=None): - if not isinstance(a, MVArray) and not isinstance(a, BPArray): - a = MVArray(a, m) - self.m = a.m - if isinstance(a, MVArray): - if m is not None and m != a.m: - a = MVArray(a, m) # cast data - self.m = a.m - assert self.m in [2, 4, 8] - nwords = math.ceil(math.log2(self.m)) - nbytes = (a.data.shape[-1] - 1) // 8 + 1 - self.data = np.zeros(a.data.shape[:-1] + (nwords, nbytes), dtype=np.uint8) - for i in range(self.data.shape[-2]): - self.data[..., i, :] = np.packbits((a.data >> i) & 1, axis=-1) - else: # we have a BPArray - self.data = a.data.copy() # TODO: support conversion to different m - self.m = a.m - self.length = a.length - self.width = a.width - - def __repr__(self): - return f'' - - def __len__(self): - return self.length diff --git a/src/kyupy/logic_sim.py b/src/kyupy/logic_sim.py index cddde47..993938a 100644 --- a/src/kyupy/logic_sim.py +++ b/src/kyupy/logic_sim.py @@ -1,14 +1,29 @@ +"""A high-throughput combinational logic simulator. + +The class :py:class:`~kyupy.logic_sim.LogicSim` performs parallel simulations of the combinational part of a circuit. +The logic operations are performed bit-parallel on packed numpy arrays. +Simple sequential circuits can be simulated by repeated assignments and propagations. +However, this simulator ignores the clock network and simply assumes that all state-elements are clocked all the time. +""" + import math import numpy as np -from . import logic +from . import logic, hr_bytes class LogicSim: """A bit-parallel naïve combinational simulator for 2-, 4-, or 8-valued logic. + + :param circuit: The circuit to simulate. + :type circuit: :py:class:`~kyupy.circuit.Circuit` + :param sims: The number of parallel logic simulations to perform. + :type sims: int + :param m: The arity of the logic, must be 2, 4, or 8. + :type m: int """ - def __init__(self, circuit, sims=1, m=8): + def __init__(self, circuit, sims=8, m=8): assert m in [2, 4, 8] self.m = m mdim = math.ceil(math.log2(m)) @@ -16,216 +31,165 @@ class LogicSim: self.sims = sims nbytes = (sims - 1) // 8 + 1 self.interface = list(circuit.interface) + [n for n in circuit.nodes if 'dff' in n.kind.lower()] + self.width = len(self.interface) + """The number of bits in the circuit state (number of ports + number of state-elements).""" self.state = np.zeros((len(circuit.lines), mdim, nbytes), dtype='uint8') self.state_epoch = np.zeros(len(circuit.nodes), dtype='int8') - 1 self.tmp = np.zeros((5, mdim, nbytes), dtype='uint8') self.zero = np.zeros((mdim, nbytes), dtype='uint8') self.epoch = 0 - self.fork_vd1 = self.fork_vdx - self.const0_vd1 = self.const0_vdx - self.input_vd1 = self.fork_vd1 - self.output_vd1 = self.fork_vd1 - self.inv_vd1 = self.not_vd1 - self.ibuff_vd1 = self.not_vd1 - self.nbuff_vd1 = self.fork_vd1 - self.xor2_vd1 = self.xor_vd1 - - self.fork_vd2 = self.fork_vdx - self.const0_vd2 = self.const0_vdx - self.input_vd2 = self.fork_vd2 - self.output_vd2 = self.fork_vd2 - self.inv_vd2 = self.not_vd2 - self.ibuff_vd2 = self.not_vd2 - self.nbuff_vd2 = self.fork_vd2 - self.xor2_vd2 = self.xor_vd2 - - self.fork_vd3 = self.fork_vdx - self.const0_vd3 = self.const0_vdx - self.input_vd3 = self.fork_vd3 - self.output_vd3 = self.fork_vd3 - self.inv_vd3 = self.not_vd3 - self.ibuff_vd3 = self.not_vd3 - self.nbuff_vd3 = self.fork_vd3 - self.xor2_vd3 = self.xor_vd3 - - known_fct = [(f[:-4], getattr(self, f)) for f in dir(self) if f.endswith(f'_vd{mdim}')] + known_fct = [(f[:-4], getattr(self, f)) for f in dir(self) if f.endswith('_fct')] self.node_fct = [] for n in circuit.nodes: t = n.kind.lower().replace('__fork__', 'fork') + t = t.replace('nbuff', 'fork') + t = t.replace('input', 'fork') + t = t.replace('output', 'fork') t = t.replace('__const0__', 'const0') t = t.replace('__const1__', 'const1') t = t.replace('tieh', 'const1') + t = t.replace('ibuff', 'not') + t = t.replace('inv', 'not') + fcts = [f for n, f in known_fct if t.startswith(n)] if len(fcts) < 1: raise ValueError(f'Unknown node kind {n.kind}') self.node_fct.append(fcts[0]) + def __repr__(self): + return f'' + def assign(self, stimuli): - """Assign stimuli to the primary inputs and state-elements (flip-flops).""" - if hasattr(stimuli, 'data'): - stimuli = stimuli.data - for stim, node in zip(stimuli, self.interface): + """Assign stimuli to the primary inputs and state-elements (flip-flops). + + :param stimuli: The input data to assign. Must be in bit-parallel storage format and in a compatible shape. + :type stimuli: :py:class:`~kyupy.logic.BPArray` + :returns: The given stimuli object. + """ + for node, stim in zip(self.interface, stimuli.data if hasattr(stimuli, 'data') else stimuli): if len(node.outs) == 0: continue - outputs = [self.state[line.index] if line else self.tmp[3] for line in node.outs] - self.node_fct[node.index]([stim], outputs) + outputs = [self.state[line] if line else self.tmp[3] for line in node.outs] + self.node_fct[node]([stim], outputs) for line in node.outs: - if line: - self.state_epoch[line.reader.index] = self.epoch + if line is not None: self.state_epoch[line.reader] = self.epoch for n in self.circuit.nodes: - if (n.kind == '__const1__') or (n.kind == '__const0__'): - outputs = [self.state[line.index] if line else self.tmp[3] for line in n.outs] - self.node_fct[n.index]([], outputs) - # print('assign const') + if n.kind in ('__const1__', '__const0__'): + outputs = [self.state[line] if line else self.tmp[3] for line in n.outs] + self.node_fct[n]([], outputs) for line in n.outs: - if line: - self.state_epoch[line.reader.index] = self.epoch + if line is not None: self.state_epoch[line.reader] = self.epoch + return stimuli def capture(self, responses): - """Capture the current values at the primary outputs and in the state-elements (flip-flops).""" - if hasattr(responses, 'data'): - responses = responses.data - for resp, node in zip(responses, self.interface): - if len(node.ins) == 0: continue - resp[...] = self.state[node.ins[0].index] - # print(responses) - - def propagate(self): - """Propagate the input values towards the outputs (Perform all logic operations in topological order).""" + """Capture the current values at the primary outputs and in the state-elements (flip-flops). + + :param responses: A bit-parallel storage target for the responses in a compatible shape. + :type responses: :py:class:`~kyupy.logic.BPArray` + :returns: The given responses object. + """ + for node, resp in zip(self.interface, responses.data if hasattr(responses, 'data') else responses): + if len(node.ins) > 0: resp[...] = self.state[node.ins[0]] + return responses + + def propagate(self, inject_cb=None): + """Propagate the input values towards the outputs (Perform all logic operations in topological order). + + If the circuit is sequential (it contains flip-flops), one call simulates one clock cycle. + Multiple clock cycles are simulated by a assign-propagate-capture loop: + + .. code-block:: python + + # initial state in state_bp + for cycle in range(10): # simulate 10 clock cycles + sim.assign(state_bp) + sim.propagate() + sim.capture(state_bp) + + :param inject_cb: A callback function for manipulating intermediate signal values. + This function is called with a line index and its new logic values (in bit-parallel format) after + evaluation of a node. The callback may manipulate the given values in-place, the simulation + resumes with the manipulated values after the callback returns. + :type inject_cb: ``f(int, ndarray)`` + """ for node in self.circuit.topological_order(): - if self.state_epoch[node.index] != self.epoch: continue - inputs = [self.state[line.index] if line else self.zero for line in node.ins] - outputs = [self.state[line.index] if line else self.tmp[3] for line in node.outs] + if self.state_epoch[node] != self.epoch: continue + inputs = [self.state[line] if line else self.zero for line in node.ins] + outputs = [self.state[line] if line else self.tmp[3] for line in node.outs] # print('sim', node) - self.node_fct[node.index](inputs, outputs) + self.node_fct[node](inputs, outputs) for line in node.outs: - self.state_epoch[line.reader.index] = self.epoch + if inject_cb is not None: inject_cb(line, self.state[line]) + self.state_epoch[line.reader] = self.epoch self.epoch = (self.epoch + 1) % 128 - def fork_vdx(self, inputs, outputs): + def cycle(self, state, inject_cb=None): + """Assigns the given state, propagates it and captures the new state. + + :param state: A bit-parallel array in a compatible shape holding the current circuit state. + The contained data is assigned to the PI and PPI and overwritten by data at the PO and PPO after + propagation. + :type state: :py:class:`~kyupy.logic.BPArray` + :param inject_cb: A callback function for manipulating intermediate signal values. See :py:func:`propagate`. + :returns: The given state object. + """ + self.assign(state) + self.propagate(inject_cb) + return self.capture(state) + + @staticmethod + def fork_fct(inputs, outputs): for o in outputs: o[...] = inputs[0] - - def const0_vdx(self, _, outputs): - for o in outputs: o[...] = self.zero - - # 2-valued simulation - - def not_vd1(self, inputs, outputs): - outputs[0][0] = ~inputs[0][0] - - def const1_vd1(self, _, outputs): - for o in outputs: o[...] = self.zero - self.not_vd1(outputs, outputs) - - def and_vd1(self, inputs, outputs): - o = outputs[0] - o[0] = inputs[0][0] - for i in inputs[1:]: o[0] &= i[0] - - def or_vd1(self, inputs, outputs): - o = outputs[0] - o[0] = inputs[0][0] - for i in inputs[1:]: o[0] |= i[0] - - def xor_vd1(self, inputs, outputs): - o = outputs[0] - o[0] = inputs[0][0] - for i in inputs[1:]: o[0] ^= i[0] - - def sdff_vd1(self, inputs, outputs): - outputs[0][0] = inputs[0][0] - if len(outputs) > 1: - outputs[1][0] = ~inputs[0][0] - - def dff_vd1(self, inputs, outputs): - outputs[0][0] = inputs[0][0] - if len(outputs) > 1: - outputs[1][0] = ~inputs[0][0] - - def nand_vd1(self, inputs, outputs): - self.and_vd1(inputs, outputs) - self.not_vd1(outputs, outputs) - def nor_vd1(self, inputs, outputs): - self.or_vd1(inputs, outputs) - self.not_vd1(outputs, outputs) + @staticmethod + def const0_fct(_, outputs): + for o in outputs: o[...] = 0 - def xnor_vd1(self, inputs, outputs): - self.xor_vd1(inputs, outputs) - self.not_vd1(outputs, outputs) + @staticmethod + def const1_fct(_, outputs): + for o in outputs: + o[...] = 0 + logic.bp_not(o, o) - # 4-valued simulation - - def not_vd2(self, inputs, outputs): + @staticmethod + def not_fct(inputs, outputs): logic.bp_not(outputs[0], inputs[0]) - def and_vd2(self, inputs, outputs): + @staticmethod + def and_fct(inputs, outputs): logic.bp_and(outputs[0], *inputs) - def or_vd2(self, inputs, outputs): + @staticmethod + def or_fct(inputs, outputs): logic.bp_or(outputs[0], *inputs) - def xor_vd2(self, inputs, outputs): + @staticmethod + def xor_fct(inputs, outputs): logic.bp_xor(outputs[0], *inputs) - def sdff_vd2(self, inputs, outputs): - self.dff_vd2(inputs, outputs) + @staticmethod + def sdff_fct(inputs, outputs): + logic.bp_buf(outputs[0], inputs[0]) if len(outputs) > 1: logic.bp_not(outputs[1], inputs[0]) - def dff_vd2(self, inputs, outputs): + @staticmethod + def dff_fct(inputs, outputs): logic.bp_buf(outputs[0], inputs[0]) + if len(outputs) > 1: + logic.bp_not(outputs[1], inputs[0]) - def nand_vd2(self, inputs, outputs): - self.and_vd2(inputs, outputs) - self.not_vd2(outputs, outputs) - - def nor_vd2(self, inputs, outputs): - self.or_vd2(inputs, outputs) - self.not_vd2(outputs, outputs) - - def xnor_vd2(self, inputs, outputs): - self.xor_vd2(inputs, outputs) - self.not_vd2(outputs, outputs) - - def const1_vd2(self, _, outputs): - for o in outputs: o[...] = self.zero - self.not_vd2(outputs, outputs) - - # 8-valued simulation - - def not_vd3(self, inputs, outputs): - logic.bp_not(outputs[0], inputs[0]) - - def and_vd3(self, inputs, outputs): + @staticmethod + def nand_fct(inputs, outputs): logic.bp_and(outputs[0], *inputs) + logic.bp_not(outputs[0], outputs[0]) - def or_vd3(self, inputs, outputs): + @staticmethod + def nor_fct(inputs, outputs): logic.bp_or(outputs[0], *inputs) + logic.bp_not(outputs[0], outputs[0]) - def xor_vd3(self, inputs, outputs): + @staticmethod + def xnor_fct(inputs, outputs): logic.bp_xor(outputs[0], *inputs) - - def sdff_vd3(self, inputs, outputs): - self.dff_vd3(inputs, outputs) - if len(outputs) > 1: - logic.bp_not(outputs[1], inputs[0]) - - def dff_vd3(self, inputs, outputs): - logic.bp_buf(outputs[0], inputs[0]) - - def nand_vd3(self, inputs, outputs): - self.and_vd3(inputs, outputs) - self.not_vd3(outputs, outputs) - - def nor_vd3(self, inputs, outputs): - self.or_vd3(inputs, outputs) - self.not_vd3(outputs, outputs) - - def xnor_vd3(self, inputs, outputs): - self.xor_vd3(inputs, outputs) - self.not_vd3(outputs, outputs) - - def const1_vd3(self, _, outputs): - for o in outputs: o[...] = self.zero - self.not_vd3(outputs, outputs) + logic.bp_not(outputs[0], outputs[0]) diff --git a/src/kyupy/saed.py b/src/kyupy/saed.py deleted file mode 100644 index 21771fd..0000000 --- a/src/kyupy/saed.py +++ /dev/null @@ -1,289 +0,0 @@ -from kyupy.circuit import Node, Line - - -def pin_index(cell_type, pin): - if cell_type.startswith('HADD') and pin == 'B0': return 1 - if cell_type.startswith('HADD') and pin == 'SO': return 1 - if cell_type.startswith('MUX21') and pin == 'S': return 2 - if cell_type.startswith('SDFF') and pin == 'QN': return 1 - if cell_type.startswith('DFF') and pin == 'QN': return 1 - if cell_type.startswith('DFF') and pin == 'CLK': return 1 - if cell_type.startswith('DFF') and pin == 'RSTB': return 2 - if cell_type.startswith('DFF') and pin == 'SETB': return 3 - if pin in ['A2', 'IN2', 'SE', 'B', 'CO']: return 1 - if pin in ['A3', 'IN3', 'SI', 'CI']: return 2 - if pin == 'A4' or pin == 'IN4' or pin == 'CLK': return 3 # CLK for scan cells SDFF - if pin == 'A5' or pin == 'IN5' or pin == 'RSTB': return 4 - if pin == 'A6' or pin == 'IN6' or pin == 'SETB': return 5 - return 0 - - -def pin_is_output(kind, pin): - if 'MUX' in kind and pin == 'S': - return False - return pin in ['Q', 'QN', 'Z', 'ZN', 'Y', 'CO', 'S', 'SO', 'C1'] - - -def add_and_connect(circuit, name, kind, in1=None, in2=None, out=None): - n = Node(circuit, name, kind) - if in1 is not None: - n.ins[0] = in1 - in1.reader = n - in1.reader_pin = 0 - if in2 is not None: - n.ins[1] = in2 - in2.reader = n - in2.reader_pin = 1 - if out is not None: - n.outs[0] = out - out.driver = n - out.driver_pin = 0 - return n - - -def split_complex_gates(circuit): - node_list = circuit.nodes - for n in node_list: - name = n.name - ins = n.ins - outs = n.outs - if n.kind.startswith('AO21X'): - n.remove() - n_and = add_and_connect(circuit, name+'~and', 'AND2', ins[0], ins[1], None) - n_or = add_and_connect(circuit, name+'~or', 'OR2', None, ins[2], outs[0]) - Line(circuit, n_and, n_or) - elif n.kind.startswith('AOI21X'): - n.remove() - n_and = add_and_connect(circuit, name+'~and', 'AND2', ins[0], ins[1], None) - n_nor = add_and_connect(circuit, name+'~nor', 'NOR2', None, ins[2], outs[0]) - Line(circuit, n_and, n_nor) - elif n.kind.startswith('OA21X'): - n.remove() - n_or = add_and_connect(circuit, name+'~or', 'OR2', ins[0], ins[1], None) - n_and = add_and_connect(circuit, name+'~and', 'AND2', None, ins[2], outs[0]) - Line(circuit, n_or, n_and) - elif n.kind.startswith('OAI21X'): - n.remove() - n_or = add_and_connect(circuit, name+'~or', 'OR2', ins[0], ins[1], None) - n_nand = add_and_connect(circuit, name+'~nand', 'NAND2', None, ins[2], outs[0]) - Line(circuit, n_or, n_nand) - elif n.kind.startswith('OA22X'): - n.remove() - n_or0 = add_and_connect(circuit, name+'~or0', 'OR2', ins[0], ins[1], None) - n_or1 = add_and_connect(circuit, name+'~or1', 'OR2', ins[2], ins[3], None) - n_and = add_and_connect(circuit, name+'~and', 'AND2', None, None, outs[0]) - Line(circuit, n_or0, n_and) - Line(circuit, n_or1, n_and) - elif n.kind.startswith('OAI22X'): - n.remove() - n_or0 = add_and_connect(circuit, name+'~or0', 'OR2', ins[0], ins[1], None) - n_or1 = add_and_connect(circuit, name+'~or1', 'OR2', ins[2], ins[3], None) - n_nand = add_and_connect(circuit, name+'~nand', 'NAND2', None, None, outs[0]) - Line(circuit, n_or0, n_nand) - Line(circuit, n_or1, n_nand) - elif n.kind.startswith('AO22X'): - n.remove() - n_and0 = add_and_connect(circuit, name+'~and0', 'AND2', ins[0], ins[1], None) - n_and1 = add_and_connect(circuit, name+'~and1', 'AND2', ins[2], ins[3], None) - n_or = add_and_connect(circuit, name+'~or', 'OR2', None, None, outs[0]) - Line(circuit, n_and0, n_or) - Line(circuit, n_and1, n_or) - elif n.kind.startswith('AOI22X'): - n.remove() - n_and0 = add_and_connect(circuit, name+'~and0', 'AND2', ins[0], ins[1], None) - n_and1 = add_and_connect(circuit, name+'~and1', 'AND2', ins[2], ins[3], None) - n_nor = add_and_connect(circuit, name+'~nor', 'NOR2', None, None, outs[0]) - Line(circuit, n_and0, n_nor) - Line(circuit, n_and1, n_nor) - elif n.kind.startswith('AO221X'): - n.remove() - n_and0 = add_and_connect(circuit, name+'~and0', 'AND2', ins[0], ins[1], None) - n_and1 = add_and_connect(circuit, name+'~and1', 'AND2', ins[2], ins[3], None) - n_or0 = add_and_connect(circuit, name+'~or0', 'OR2', None, None, None) - n_or1 = add_and_connect(circuit, name+'~or1', 'OR2', None, ins[4], outs[0]) - Line(circuit, n_and0, n_or0) - Line(circuit, n_and1, n_or0) - Line(circuit, n_or0, n_or1) - elif n.kind.startswith('AOI221X'): - n.remove() - n_and0 = add_and_connect(circuit, name+'~and0', 'AND2', ins[0], ins[1], None) - n_and1 = add_and_connect(circuit, name+'~and1', 'AND2', ins[2], ins[3], None) - n_or = add_and_connect(circuit, name+'~or', 'OR2', None, None, None) - n_nor = add_and_connect(circuit, name+'~nor', 'NOR2', None, ins[4], outs[0]) - Line(circuit, n_and0, n_or) - Line(circuit, n_and1, n_or) - Line(circuit, n_or, n_nor) - elif n.kind.startswith('OA221X'): - n.remove() - n_or0 = add_and_connect(circuit, name+'~or0', 'OR2', ins[0], ins[1], None) - n_or1 = add_and_connect(circuit, name+'~or1', 'OR2', ins[2], ins[3], None) - n_and0 = add_and_connect(circuit, name+'~and0', 'AND2', None, None, None) - n_and1 = add_and_connect(circuit, name+'~and1', 'AND2', None, ins[4], outs[0]) - Line(circuit, n_or0, n_and0) - Line(circuit, n_or1, n_and0) - Line(circuit, n_and0, n_and1) - elif n.kind.startswith('OAI221X'): - n.remove() - n_or0 = add_and_connect(circuit, name+'~or0', 'OR2', ins[0], ins[1], None) - n_or1 = add_and_connect(circuit, name+'~or1', 'OR2', ins[2], ins[3], None) - n_and0 = add_and_connect(circuit, name+'~and0', 'AND2', None, None, None) - n_nand1 = add_and_connect(circuit, name+'~nand1', 'NAND2', None, ins[4], outs[0]) - Line(circuit, n_or0, n_and0) - Line(circuit, n_or1, n_and0) - Line(circuit, n_and0, n_nand1) - elif n.kind.startswith('AO222X'): - n.remove() - n_and0 = add_and_connect(circuit, name+'~and0', 'AND2', ins[0], ins[1], None) - n_and1 = add_and_connect(circuit, name+'~and1', 'AND2', ins[2], ins[3], None) - n_and2 = add_and_connect(circuit, name+'~and2', 'AND2', ins[4], ins[5], None) - n_or0 = add_and_connect(circuit, name+'~or0', 'OR2', None, None, None) - n_or1 = add_and_connect(circuit, name+'~or1', 'OR2', None, None, outs[0]) - Line(circuit, n_and0, n_or0) - Line(circuit, n_and1, n_or0) - Line(circuit, n_and2, n_or1) - Line(circuit, n_or0, n_or1) - elif n.kind.startswith('AOI222X'): - n.remove() - n_and0 = add_and_connect(circuit, name+'~and0', 'AND2', ins[0], ins[1], None) - n_and1 = add_and_connect(circuit, name+'~and1', 'AND2', ins[2], ins[3], None) - n_and2 = add_and_connect(circuit, name+'~and2', 'AND2', ins[4], ins[5], None) - n_or0 = add_and_connect(circuit, name+'~or0', 'OR2', None, None, None) - n_nor1 = add_and_connect(circuit, name+'~nor1', 'NOR2', None, None, outs[0]) - Line(circuit, n_and0, n_or0) - Line(circuit, n_and1, n_or0) - Line(circuit, n_and2, n_nor1) - Line(circuit, n_or0, n_nor1) - elif n.kind.startswith('OA222X'): - n.remove() - n_or0 = add_and_connect(circuit, name+'~or0', 'OR2', ins[0], ins[1], None) - n_or1 = add_and_connect(circuit, name+'~or1', 'OR2', ins[2], ins[3], None) - n_or2 = add_and_connect(circuit, name+'~or2', 'OR2', ins[4], ins[5], None) - n_and0 = add_and_connect(circuit, name+'~and0', 'AND2', None, None, None) - n_and1 = add_and_connect(circuit, name+'~and1', 'AND2', None, None, outs[0]) - Line(circuit, n_or0, n_and0) - Line(circuit, n_or1, n_and0) - Line(circuit, n_or2, n_and1) - Line(circuit, n_and0, n_and1) - elif n.kind.startswith('OAI222X'): - n.remove() - n0 = add_and_connect(circuit, name+'~or0', 'OR2', ins[0], ins[1], None) - n1 = add_and_connect(circuit, name+'~or1', 'OR2', ins[2], ins[3], None) - n2 = add_and_connect(circuit, name+'~or2', 'OR2', ins[4], ins[5], None) - n3 = add_and_connect(circuit, name+'~and0', 'AND2', None, None, None) - n4 = add_and_connect(circuit, name+'~nand1', 'NAND2', None, None, outs[0]) - Line(circuit, n0, n3) - Line(circuit, n1, n3) - Line(circuit, n2, n4) - Line(circuit, n3, n4) - elif n.kind.startswith('AND3X'): - n.remove() - n0 = add_and_connect(circuit, name+'~and0', 'AND2', ins[0], ins[1], None) - n1 = add_and_connect(circuit, name+'~and1', 'AND2', None, ins[2], outs[0]) - Line(circuit, n0, n1) - elif n.kind.startswith('OR3X'): - n.remove() - n0 = add_and_connect(circuit, name+'~or0', 'OR2', ins[0], ins[1], None) - n1 = add_and_connect(circuit, name+'~or1', 'OR2', None, ins[2], outs[0]) - Line(circuit, n0, n1) - elif n.kind.startswith('XOR3X'): - n.remove() - n0 = add_and_connect(circuit, name+'~xor0', 'XOR2', ins[0], ins[1], None) - n1 = add_and_connect(circuit, name+'~xor1', 'XOR2', None, ins[2], outs[0]) - Line(circuit, n0, n1) - elif n.kind.startswith('NAND3X'): - n.remove() - n0 = add_and_connect(circuit, name+'~and', 'AND2', ins[0], ins[1], None) - n1 = add_and_connect(circuit, name+'~nand', 'NAND2', None, ins[2], outs[0]) - Line(circuit, n0, n1) - elif n.kind.startswith('NOR3X'): - n.remove() - n0 = add_and_connect(circuit, name+'~or', 'OR2', ins[0], ins[1], None) - n1 = add_and_connect(circuit, name+'~nor', 'NOR2', None, ins[2], outs[0]) - Line(circuit, n0, n1) - elif n.kind.startswith('XNOR3X'): - n.remove() - n0 = add_and_connect(circuit, name+'~xor', 'XOR2', ins[0], ins[1], None) - n1 = add_and_connect(circuit, name+'~xnor', 'XNOR2', None, ins[2], outs[0]) - Line(circuit, n0, n1) - elif n.kind.startswith('AND4X'): - n.remove() - n0 = add_and_connect(circuit, name+'~and0', 'AND2', ins[0], ins[1], None) - n1 = add_and_connect(circuit, name+'~and1', 'AND2', ins[2], ins[3], None) - n2 = add_and_connect(circuit, name+'~and2', 'AND2', None, None, outs[0]) - Line(circuit, n0, n2) - Line(circuit, n1, n2) - elif n.kind.startswith('OR4X'): - n.remove() - n0 = add_and_connect(circuit, name+'~or0', 'OR2', ins[0], ins[1], None) - n1 = add_and_connect(circuit, name+'~or1', 'OR2', ins[2], ins[3], None) - n2 = add_and_connect(circuit, name+'~or2', 'OR2', None, None, outs[0]) - Line(circuit, n0, n2) - Line(circuit, n1, n2) - elif n.kind.startswith('NAND4X'): - n.remove() - n0 = add_and_connect(circuit, name+'~and0', 'AND2', ins[0], ins[1], None) - n1 = add_and_connect(circuit, name+'~and1', 'AND2', ins[2], ins[3], None) - n2 = add_and_connect(circuit, name+'~nand2', 'NAND2', None, None, outs[0]) - Line(circuit, n0, n2) - Line(circuit, n1, n2) - elif n.kind.startswith('NOR4X'): - n.remove() - n0 = add_and_connect(circuit, name+'~or0', 'OR2', ins[0], ins[1], None) - n1 = add_and_connect(circuit, name+'~or1', 'OR2', ins[2], ins[3], None) - n2 = add_and_connect(circuit, name+'~nor2', 'NOR2', None, None, outs[0]) - Line(circuit, n0, n2) - Line(circuit, n1, n2) - elif n.kind.startswith('FADDX'): - n.remove() - # forks for fan-outs - f_a = add_and_connect(circuit, name + '~fork0', '__fork__', ins[0]) - f_b = add_and_connect(circuit, name + '~fork1', '__fork__', ins[1]) - f_ci = add_and_connect(circuit, name + '~fork2', '__fork__', ins[2]) - f_ab = Node(circuit, name + '~fork3') - # sum-block - n_xor0 = Node(circuit, name + '~xor0', 'XOR2') - Line(circuit, f_a, n_xor0) - Line(circuit, f_b, n_xor0) - Line(circuit, n_xor0, f_ab) - if len(outs) > 0 and outs[0] is not None: - n_xor1 = add_and_connect(circuit, name + '~xor1', 'XOR2', None, None, outs[0]) - Line(circuit, f_ab, n_xor1) - Line(circuit, f_ci, n_xor1) - # carry-block - if len(outs) > 1 and outs[1] is not None: - n_and0 = Node(circuit, name + '~and0', 'AND2') - Line(circuit, f_ab, n_and0) - Line(circuit, f_ci, n_and0) - n_and1 = Node(circuit, name + '~and1', 'AND2') - Line(circuit, f_a, n_and1) - Line(circuit, f_b, n_and1) - n_or = add_and_connect(circuit, name + '~or0', 'OR2', None, None, outs[1]) - Line(circuit, n_and0, n_or) - Line(circuit, n_and1, n_or) - elif n.kind.startswith('HADDX'): - n.remove() - # forks for fan-outs - f_a = add_and_connect(circuit, name + '~fork0', '__fork__', ins[0]) - f_b = add_and_connect(circuit, name + '~fork1', '__fork__', ins[1]) - n_xor0 = add_and_connect(circuit, name + '~xor0', 'XOR2', None, None, outs[1]) - Line(circuit, f_a, n_xor0) - Line(circuit, f_b, n_xor0) - n_and0 = add_and_connect(circuit, name + '~and0', 'AND2', None, None, outs[0]) - Line(circuit, f_a, n_and0) - Line(circuit, f_b, n_and0) - elif n.kind.startswith('MUX21X'): - n.remove() - f_s = add_and_connect(circuit, name + '~fork0', '__fork__', ins[2]) - n_not = Node(circuit, name + '~not', 'INV') - Line(circuit, f_s, n_not) - n_and0 = add_and_connect(circuit, name + '~and0', 'AND2', ins[0]) - n_and1 = add_and_connect(circuit, name + '~and1', 'AND2', ins[1]) - n_or0 = add_and_connect(circuit, name + '~or0', 'OR2', None, None, outs[0]) - Line(circuit, n_not, n_and0) - Line(circuit, f_s, n_and1) - Line(circuit, n_and0, n_or0) - Line(circuit, n_and1, n_or0) - elif n.kind.startswith('DFFSSR'): - n.kind = 'DFFX1' - n_and0 = add_and_connect(circuit, name + '~and0', 'AND2', ins[0], ins[2], None) - Line(circuit, n_and0, (n, 0)) diff --git a/src/kyupy/sdf.py b/src/kyupy/sdf.py index beb58c0..f89d7b3 100644 --- a/src/kyupy/sdf.py +++ b/src/kyupy/sdf.py @@ -14,6 +14,7 @@ import numpy as np from lark import Lark, Transformer from . import log, readtext +from .techlib import TechLib Interconnect = namedtuple('Interconnect', ['orig', 'dest', 'r', 'f']) @@ -35,7 +36,7 @@ class DelayFile: return '\n'.join(f'{n}: {l}' for n, l in self.cells.items()) + '\n' + \ '\n'.join(str(i) for i in self.interconnects) - def annotation(self, circuit, pin_index_f, dataset=1, interconnect=True, ffdelays=True): + def annotation(self, circuit, tlib=TechLib(), dataset=1, interconnect=True, ffdelays=True): """Constructs an 3-dimensional ndarray with timing data for each line in ``circuit``. An IOPATH delay for a node is annotated to the line connected to the input pin specified in the IOPATH. @@ -43,29 +44,36 @@ class DelayFile: Currently, only ABSOLUTE IOPATH and INTERCONNECT delays are supported. Pulse rejection limits are derived from absolute delays, explicit declarations (PATHPULSE etc.) are ignored. - :param circuit: - :param pin_index_f: - :param ffdelays: - :param interconnect: - :type dataset: int or tuple + :param circuit: The circuit to annotate. Names from the STIL file are matched to the node names. + :type circuit: :class:`~kyupy.circuit.Circuit` + :param tlib: A technology library object that provides pin name mappings. + :type tlib: :py:class:`~kyupy.techlib.TechLib` + :param dataset: SDFs store multiple values for each delay (e.g. minimum, typical, maximum). + An integer selects the dataset to use (default is 1 for 'typical'). + If a tuple is given, the annotator will calculate the average of multiple datasets. + :type dataset: ``int`` or ``tuple`` + :param interconnect: Whether or not to include the delays of interconnects in the annotation. + To properly annotate interconnect delays, the circuit model has to include a '__fork__' node on + every signal and every fanout-branch. The Verilog parser aids in this by setting the parameter + `branchforks=True` in :py:func:`kyupy.verilog.parse`. + :type interconnect: ``bool`` + :param ffdelays: Whether or not to include the delays of flip-flops in the annotation. + :type ffdelays: ``bool`` :return: A 3-dimensional ndarray with timing data. * Axis 0: line index. - * Axis 1: type of timing data: 0=`delay`, 1=`pulse rejection limit`. - * Axis 2: The polarity of the output transition of the reading node: 0=`rising`, 1=`falling`. + * Axis 1: type of timing data: 0='delay', 1='pulse rejection limit'. + * Axis 2: The polarity of the output transition of the reading node: 0='rising', 1='falling'. The polarity for pulse rejection is determined by the latter transition of the pulse. - E.g., timing[42,1,0] is the rejection limit of a negative pulse at the output of the reader of line 42. + E.g., ``timing[42, 1, 0]`` is the rejection limit of a negative pulse at the output + of the reader of line 42. """ def select_del(_delvals, idx): - if type(dataset) is tuple: - s = 0 - for d in dataset: - s += _delvals[idx][d] - return s / len(dataset) - else: - return _delvals[idx][dataset] - + if isinstance(dataset, tuple): + return sum(_delvals[idx][d] for d in dataset) / len(dataset) + return _delvals[idx][dataset] + def find_cell(name): if name not in circuit.cells: name = name.replace('\\', '') @@ -74,7 +82,7 @@ class DelayFile: if name not in circuit.cells: return None return circuit.cells[name] - + timing = np.zeros((len(circuit.lines), 2, 2)) for cn, iopaths in self.cells.items(): for ipn, opn, *delvals in iopaths: @@ -85,17 +93,17 @@ class DelayFile: if cell is None: log.warn(f'Cell from SDF not found in circuit: {cn}') continue - ipin = pin_index_f(cell.kind, ipn) - opin = pin_index_f(cell.kind, opn) + ipin = tlib.pin_index(cell.kind, ipn) + opin = tlib.pin_index(cell.kind, opn) kind = cell.kind.lower() ipn2 = ipn.replace('(posedge A1)', 'A1').replace('(negedge A1)', 'A1')\ .replace('(posedge A2)', 'A2').replace('(negedge A2)', 'A2') - + def add_delays(_line): if _line is not None: - timing[_line.index, :, 0] += select_del(delvals, 0) - timing[_line.index, :, 1] += select_del(delvals, 1) + timing[_line, :, 0] += select_del(delvals, 0) + timing[_line, :, 1] += select_del(delvals, 1) take_avg = False if kind.startswith('sdff'): @@ -105,16 +113,16 @@ class DelayFile: add_delays(cell.outs[opin]) else: if kind.startswith(('xor', 'xnor')): - ipin = pin_index_f(cell.kind, ipn2) - # print(ipn, ipin, times[cell.i_lines[ipin].index, 0, 0]) - take_avg = timing[cell.ins[ipin].index].sum() > 0 + ipin = tlib.pin_index(cell.kind, ipn2) + # print(ipn, ipin, times[cell.i_lines[ipin], 0, 0]) + take_avg = timing[cell.ins[ipin]].sum() > 0 add_delays(cell.ins[ipin]) if take_avg: - timing[cell.ins[ipin].index] /= 2 - + timing[cell.ins[ipin]] /= 2 + if not interconnect or self.interconnects is None: return timing - + for n1, n2, *delvals in self.interconnects: delvals = [d if len(d) > 0 else [0, 0, 0] for d in delvals] if max(max(delvals)) == 0: @@ -139,7 +147,7 @@ class DelayFile: if c2 is None: log.warn(f'Cell from SDF not found in circuit: {cn2}') continue - p1, p2 = pin_index_f(c1.kind, pn1), pin_index_f(c2.kind, pn2) + p1, p2 = tlib.pin_index(c1.kind, pn1), tlib.pin_index(c2.kind, pn2) line = None f1, f2 = c1.outs[p1].reader, c2.ins[p2].driver if f1 != f2: # possible branchfork @@ -149,8 +157,8 @@ class DelayFile: elif len(f2.outs) == 1: # no fanout? line = f2.ins[0] if line is not None: - timing[line.index, :, 0] += select_del(delvals, 0) - timing[line.index, :, 1] += select_del(delvals, 1) + timing[line, :, 0] += select_del(delvals, 0) + timing[line, :, 1] += select_del(delvals, 1) else: log.warn(f'No branchfork for annotating interconnect delay {c1.name}/{p1}->{c2.name}/{p2}') return timing @@ -184,7 +192,7 @@ class SdfTransformer(Transformer): return DelayFile(name, cells) -grammar = r""" +GRAMMAR = r""" start: "(DELAYFILE" ( "(SDFVERSION" _NOB ")" | "(DESIGN" "\"" NAME "\"" ")" | "(DATE" _NOB ")" @@ -218,7 +226,7 @@ grammar = r""" def parse(text): """Parses the given ``text`` and returns a :class:`DelayFile` object.""" - return Lark(grammar, parser="lalr", transformer=SdfTransformer()).parse(text) + return Lark(GRAMMAR, parser="lalr", transformer=SdfTransformer()).parse(text) def load(file): diff --git a/src/kyupy/stil.py b/src/kyupy/stil.py index 5c022ca..75bffc2 100644 --- a/src/kyupy/stil.py +++ b/src/kyupy/stil.py @@ -4,7 +4,7 @@ The main purpose of this parser is to load scan pattern sets from STIL files. It supports only a very limited subset of STIL. The functions :py:func:`load` and :py:func:`read` return an intermediate representation (:class:`StilFile` object). -Call :py:func:`StilFile.tests4v`, :py:func:`StilFile.tests8v`, or :py:func:`StilFile.responses4v` to +Call :py:func:`StilFile.tests`, :py:func:`StilFile.tests_loc`, or :py:func:`StilFile.responses` to obtain the appropriate vector sets. """ @@ -54,26 +54,26 @@ class StilFile: launch = dict((k, v.replace('\n', '')) for k, v in call.parameters.items()) else: capture = dict((k, v.replace('\n', '')) for k, v in call.parameters.items()) - + def _maps(self, c): interface = list(c.interface) + [n for n in c.nodes if 'DFF' in n.kind] - intf_pos = dict([(n.name, i) for i, n in enumerate(interface)]) + intf_pos = dict((n.name, i) for i, n in enumerate(interface)) pi_map = [intf_pos[n] for n in self.signal_groups['_pi']] po_map = [intf_pos[n] for n in self.signal_groups['_po']] scan_maps = {} scan_inversions = {} - for chain_name, chain in self.scan_chains.items(): + for chain in self.scan_chains.values(): scan_map = [] scan_in_inversion = [] scan_out_inversion = [] inversion = False for n in chain[1:-1]: - if n == '!': + if n == '!': inversion = not inversion else: scan_in_inversion.append(inversion) scan_in_inversion = list(reversed(scan_in_inversion)) - inversion = False + inversion = False for n in reversed(chain[1:-1]): if n == '!': inversion = not inversion @@ -85,13 +85,13 @@ class StilFile: scan_inversions[chain[0]] = scan_in_inversion scan_inversions[chain[-1]] = scan_out_inversion return interface, pi_map, po_map, scan_maps, scan_inversions - + def tests(self, circuit): """Assembles and returns a scan test pattern set for given circuit. This function assumes a static (stuck-at fault) test. """ - interface, pi_map, po_map, scan_maps, scan_inversions = self._maps(circuit) + interface, pi_map, _, scan_maps, scan_inversions = self._maps(circuit) tests = logic.MVArray((len(interface), len(self.patterns))) for i, p in enumerate(self.patterns): for si_port in self.si_ports.keys(): @@ -133,10 +133,10 @@ class StilFile: launch.data[po_map, i] = logic.UNASSIGNED return logic.mv_transition(init, launch) - + def responses(self, circuit): """Assembles and returns a scan test response pattern set for given circuit.""" - interface, pi_map, po_map, scan_maps, scan_inversions = self._maps(circuit) + interface, _, po_map, scan_maps, scan_inversions = self._maps(circuit) resp = logic.MVArray((len(interface), len(self.patterns))) # resp = PackedVectors(len(self.patterns), len(interface), 2) for i, p in enumerate(self.patterns): @@ -150,27 +150,27 @@ class StilFile: resp.data[scan_maps[so_port], i] = pattern.data[:, 0] # resp.set_values(i, p.unload[so_port], scan_maps[so_port], scan_inversions[so_port]) return resp - - + + class StilTransformer(Transformer): def __init__(self): super().__init__() self._signal_groups = None self._calls = None self._scan_chains = None - + @staticmethod def quoted(args): return args[0][1:-1] @staticmethod def call(args): return Call(args[0], dict(args[1:])) - + @staticmethod def call_parameter(args): return args[0], args[1].value @staticmethod def signal_group(args): return args[0], args[1:] - + @staticmethod def scan_chain(args): scan_in = None @@ -187,7 +187,7 @@ class StilTransformer(Transformer): return args[0], ([scan_in] + scan_cells + [scan_out]) def signal_groups(self, args): self._signal_groups = dict(args) - + def pattern(self, args): self._calls = [c for c in args if isinstance(c, Call)] def scan_structures(self, args): self._scan_chains = dict(args) @@ -196,7 +196,7 @@ class StilTransformer(Transformer): return StilFile(float(args[0]), self._signal_groups, self._scan_chains, self._calls) -grammar = r""" +GRAMMAR = r""" start: "STIL" FLOAT _ignore _block* _block: signal_groups | scan_structures | pattern | "Header" _ignore @@ -240,7 +240,7 @@ grammar = r""" def parse(text): """Parses the given ``text`` and returns a :class:`StilFile` object.""" - return Lark(grammar, parser="lalr", transformer=StilTransformer()).parse(text) + return Lark(GRAMMAR, parser="lalr", transformer=StilTransformer()).parse(text) def load(file): diff --git a/src/kyupy/techlib.py b/src/kyupy/techlib.py new file mode 100644 index 0000000..5a5a01b --- /dev/null +++ b/src/kyupy/techlib.py @@ -0,0 +1,301 @@ +from .circuit import Node, Line + + +def add_and_connect(circuit, name, kind, in1=None, in2=None, out=None): + n = Node(circuit, name, kind) + if in1 is not None: + n.ins[0] = in1 + in1.reader = n + in1.reader_pin = 0 + if in2 is not None: + n.ins[1] = in2 + in2.reader = n + in2.reader_pin = 1 + if out is not None: + n.outs[0] = out + out.driver = n + out.driver_pin = 0 + return n + + +class TechLib: + """Provides some information specific to standard cell libraries necessary + for loading gate-level designs. :py:class:`~kyupy.circuit.Node` objects do not + have pin names. The methods defined here map pin names to pin directions and defined + positions in the ``node.ins`` and ``node.outs`` lists. The default implementation + provides mappings for SAED-inspired standard cell libraries. + """ + + @staticmethod + def pin_index(kind, pin): + """Returns a pin list position for a given node kind and pin name.""" + for prefix, pins, index in [('HADD', ('B0', 'SO'), 1), + ('MUX21', ('S',), 2), + ('DFF', ('QN',), 1), + ('SDFF', ('QN',), 1), + ('SDFF', ('CLK',), 3), + ('SDFF', ('RSTB',), 4), + ('SDFF', ('SETB',), 5)]: + if kind.startswith(prefix) and pin in pins: return index + for index, pins in enumerate([('A1', 'IN1', 'D', 'S', 'INP', 'A', 'Q', 'QN', 'Y', 'Z', 'ZN'), + ('A2', 'IN2', 'CLK', 'CO', 'SE', 'B'), + ('A3', 'IN3', 'RSTB', 'CI', 'SI'), + ('A4', 'IN4', 'SETB'), + ('A5', 'IN5'), + ('A6', 'IN6')]): + if pin in pins: return index + raise ValueError(f'Unknown pin index for {kind}.{pin}') + + @staticmethod + def pin_is_output(kind, pin): + """Returns True, if given pin name of a node kind is an output.""" + if 'MUX' in kind and pin == 'S': return False + return pin in ('Q', 'QN', 'Z', 'ZN', 'Y', 'CO', 'S', 'SO', 'C1') + + @staticmethod + def split_complex_gates(circuit): + node_list = circuit.nodes + for n in node_list: + name = n.name + ins = n.ins + outs = n.outs + if n.kind.startswith('AO21X'): + n.remove() + n_and = add_and_connect(circuit, name+'~and', 'AND2', ins[0], ins[1], None) + n_or = add_and_connect(circuit, name+'~or', 'OR2', None, ins[2], outs[0]) + Line(circuit, n_and, n_or) + elif n.kind.startswith('AOI21X'): + n.remove() + n_and = add_and_connect(circuit, name+'~and', 'AND2', ins[0], ins[1], None) + n_nor = add_and_connect(circuit, name+'~nor', 'NOR2', None, ins[2], outs[0]) + Line(circuit, n_and, n_nor) + elif n.kind.startswith('OA21X'): + n.remove() + n_or = add_and_connect(circuit, name+'~or', 'OR2', ins[0], ins[1], None) + n_and = add_and_connect(circuit, name+'~and', 'AND2', None, ins[2], outs[0]) + Line(circuit, n_or, n_and) + elif n.kind.startswith('OAI21X'): + n.remove() + n_or = add_and_connect(circuit, name+'~or', 'OR2', ins[0], ins[1], None) + n_nand = add_and_connect(circuit, name+'~nand', 'NAND2', None, ins[2], outs[0]) + Line(circuit, n_or, n_nand) + elif n.kind.startswith('OA22X'): + n.remove() + n_or0 = add_and_connect(circuit, name+'~or0', 'OR2', ins[0], ins[1], None) + n_or1 = add_and_connect(circuit, name+'~or1', 'OR2', ins[2], ins[3], None) + n_and = add_and_connect(circuit, name+'~and', 'AND2', None, None, outs[0]) + Line(circuit, n_or0, n_and) + Line(circuit, n_or1, n_and) + elif n.kind.startswith('OAI22X'): + n.remove() + n_or0 = add_and_connect(circuit, name+'~or0', 'OR2', ins[0], ins[1], None) + n_or1 = add_and_connect(circuit, name+'~or1', 'OR2', ins[2], ins[3], None) + n_nand = add_and_connect(circuit, name+'~nand', 'NAND2', None, None, outs[0]) + Line(circuit, n_or0, n_nand) + Line(circuit, n_or1, n_nand) + elif n.kind.startswith('AO22X'): + n.remove() + n_and0 = add_and_connect(circuit, name+'~and0', 'AND2', ins[0], ins[1], None) + n_and1 = add_and_connect(circuit, name+'~and1', 'AND2', ins[2], ins[3], None) + n_or = add_and_connect(circuit, name+'~or', 'OR2', None, None, outs[0]) + Line(circuit, n_and0, n_or) + Line(circuit, n_and1, n_or) + elif n.kind.startswith('AOI22X'): + n.remove() + n_and0 = add_and_connect(circuit, name+'~and0', 'AND2', ins[0], ins[1], None) + n_and1 = add_and_connect(circuit, name+'~and1', 'AND2', ins[2], ins[3], None) + n_nor = add_and_connect(circuit, name+'~nor', 'NOR2', None, None, outs[0]) + Line(circuit, n_and0, n_nor) + Line(circuit, n_and1, n_nor) + elif n.kind.startswith('AO221X'): + n.remove() + n_and0 = add_and_connect(circuit, name+'~and0', 'AND2', ins[0], ins[1], None) + n_and1 = add_and_connect(circuit, name+'~and1', 'AND2', ins[2], ins[3], None) + n_or0 = add_and_connect(circuit, name+'~or0', 'OR2', None, None, None) + n_or1 = add_and_connect(circuit, name+'~or1', 'OR2', None, ins[4], outs[0]) + Line(circuit, n_and0, n_or0) + Line(circuit, n_and1, n_or0) + Line(circuit, n_or0, n_or1) + elif n.kind.startswith('AOI221X'): + n.remove() + n_and0 = add_and_connect(circuit, name+'~and0', 'AND2', ins[0], ins[1], None) + n_and1 = add_and_connect(circuit, name+'~and1', 'AND2', ins[2], ins[3], None) + n_or = add_and_connect(circuit, name+'~or', 'OR2', None, None, None) + n_nor = add_and_connect(circuit, name+'~nor', 'NOR2', None, ins[4], outs[0]) + Line(circuit, n_and0, n_or) + Line(circuit, n_and1, n_or) + Line(circuit, n_or, n_nor) + elif n.kind.startswith('OA221X'): + n.remove() + n_or0 = add_and_connect(circuit, name+'~or0', 'OR2', ins[0], ins[1], None) + n_or1 = add_and_connect(circuit, name+'~or1', 'OR2', ins[2], ins[3], None) + n_and0 = add_and_connect(circuit, name+'~and0', 'AND2', None, None, None) + n_and1 = add_and_connect(circuit, name+'~and1', 'AND2', None, ins[4], outs[0]) + Line(circuit, n_or0, n_and0) + Line(circuit, n_or1, n_and0) + Line(circuit, n_and0, n_and1) + elif n.kind.startswith('OAI221X'): + n.remove() + n_or0 = add_and_connect(circuit, name+'~or0', 'OR2', ins[0], ins[1], None) + n_or1 = add_and_connect(circuit, name+'~or1', 'OR2', ins[2], ins[3], None) + n_and0 = add_and_connect(circuit, name+'~and0', 'AND2', None, None, None) + n_nand1 = add_and_connect(circuit, name+'~nand1', 'NAND2', None, ins[4], outs[0]) + Line(circuit, n_or0, n_and0) + Line(circuit, n_or1, n_and0) + Line(circuit, n_and0, n_nand1) + elif n.kind.startswith('AO222X'): + n.remove() + n_and0 = add_and_connect(circuit, name+'~and0', 'AND2', ins[0], ins[1], None) + n_and1 = add_and_connect(circuit, name+'~and1', 'AND2', ins[2], ins[3], None) + n_and2 = add_and_connect(circuit, name+'~and2', 'AND2', ins[4], ins[5], None) + n_or0 = add_and_connect(circuit, name+'~or0', 'OR2', None, None, None) + n_or1 = add_and_connect(circuit, name+'~or1', 'OR2', None, None, outs[0]) + Line(circuit, n_and0, n_or0) + Line(circuit, n_and1, n_or0) + Line(circuit, n_and2, n_or1) + Line(circuit, n_or0, n_or1) + elif n.kind.startswith('AOI222X'): + n.remove() + n_and0 = add_and_connect(circuit, name+'~and0', 'AND2', ins[0], ins[1], None) + n_and1 = add_and_connect(circuit, name+'~and1', 'AND2', ins[2], ins[3], None) + n_and2 = add_and_connect(circuit, name+'~and2', 'AND2', ins[4], ins[5], None) + n_or0 = add_and_connect(circuit, name+'~or0', 'OR2', None, None, None) + n_nor1 = add_and_connect(circuit, name+'~nor1', 'NOR2', None, None, outs[0]) + Line(circuit, n_and0, n_or0) + Line(circuit, n_and1, n_or0) + Line(circuit, n_and2, n_nor1) + Line(circuit, n_or0, n_nor1) + elif n.kind.startswith('OA222X'): + n.remove() + n_or0 = add_and_connect(circuit, name+'~or0', 'OR2', ins[0], ins[1], None) + n_or1 = add_and_connect(circuit, name+'~or1', 'OR2', ins[2], ins[3], None) + n_or2 = add_and_connect(circuit, name+'~or2', 'OR2', ins[4], ins[5], None) + n_and0 = add_and_connect(circuit, name+'~and0', 'AND2', None, None, None) + n_and1 = add_and_connect(circuit, name+'~and1', 'AND2', None, None, outs[0]) + Line(circuit, n_or0, n_and0) + Line(circuit, n_or1, n_and0) + Line(circuit, n_or2, n_and1) + Line(circuit, n_and0, n_and1) + elif n.kind.startswith('OAI222X'): + n.remove() + n0 = add_and_connect(circuit, name+'~or0', 'OR2', ins[0], ins[1], None) + n1 = add_and_connect(circuit, name+'~or1', 'OR2', ins[2], ins[3], None) + n2 = add_and_connect(circuit, name+'~or2', 'OR2', ins[4], ins[5], None) + n3 = add_and_connect(circuit, name+'~and0', 'AND2', None, None, None) + n4 = add_and_connect(circuit, name+'~nand1', 'NAND2', None, None, outs[0]) + Line(circuit, n0, n3) + Line(circuit, n1, n3) + Line(circuit, n2, n4) + Line(circuit, n3, n4) + elif n.kind.startswith('AND3X'): + n.remove() + n0 = add_and_connect(circuit, name+'~and0', 'AND2', ins[0], ins[1], None) + n1 = add_and_connect(circuit, name+'~and1', 'AND2', None, ins[2], outs[0]) + Line(circuit, n0, n1) + elif n.kind.startswith('OR3X'): + n.remove() + n0 = add_and_connect(circuit, name+'~or0', 'OR2', ins[0], ins[1], None) + n1 = add_and_connect(circuit, name+'~or1', 'OR2', None, ins[2], outs[0]) + Line(circuit, n0, n1) + elif n.kind.startswith('XOR3X'): + n.remove() + n0 = add_and_connect(circuit, name+'~xor0', 'XOR2', ins[0], ins[1], None) + n1 = add_and_connect(circuit, name+'~xor1', 'XOR2', None, ins[2], outs[0]) + Line(circuit, n0, n1) + elif n.kind.startswith('NAND3X'): + n.remove() + n0 = add_and_connect(circuit, name+'~and', 'AND2', ins[0], ins[1], None) + n1 = add_and_connect(circuit, name+'~nand', 'NAND2', None, ins[2], outs[0]) + Line(circuit, n0, n1) + elif n.kind.startswith('NOR3X'): + n.remove() + n0 = add_and_connect(circuit, name+'~or', 'OR2', ins[0], ins[1], None) + n1 = add_and_connect(circuit, name+'~nor', 'NOR2', None, ins[2], outs[0]) + Line(circuit, n0, n1) + elif n.kind.startswith('XNOR3X'): + n.remove() + n0 = add_and_connect(circuit, name+'~xor', 'XOR2', ins[0], ins[1], None) + n1 = add_and_connect(circuit, name+'~xnor', 'XNOR2', None, ins[2], outs[0]) + Line(circuit, n0, n1) + elif n.kind.startswith('AND4X'): + n.remove() + n0 = add_and_connect(circuit, name+'~and0', 'AND2', ins[0], ins[1], None) + n1 = add_and_connect(circuit, name+'~and1', 'AND2', ins[2], ins[3], None) + n2 = add_and_connect(circuit, name+'~and2', 'AND2', None, None, outs[0]) + Line(circuit, n0, n2) + Line(circuit, n1, n2) + elif n.kind.startswith('OR4X'): + n.remove() + n0 = add_and_connect(circuit, name+'~or0', 'OR2', ins[0], ins[1], None) + n1 = add_and_connect(circuit, name+'~or1', 'OR2', ins[2], ins[3], None) + n2 = add_and_connect(circuit, name+'~or2', 'OR2', None, None, outs[0]) + Line(circuit, n0, n2) + Line(circuit, n1, n2) + elif n.kind.startswith('NAND4X'): + n.remove() + n0 = add_and_connect(circuit, name+'~and0', 'AND2', ins[0], ins[1], None) + n1 = add_and_connect(circuit, name+'~and1', 'AND2', ins[2], ins[3], None) + n2 = add_and_connect(circuit, name+'~nand2', 'NAND2', None, None, outs[0]) + Line(circuit, n0, n2) + Line(circuit, n1, n2) + elif n.kind.startswith('NOR4X'): + n.remove() + n0 = add_and_connect(circuit, name+'~or0', 'OR2', ins[0], ins[1], None) + n1 = add_and_connect(circuit, name+'~or1', 'OR2', ins[2], ins[3], None) + n2 = add_and_connect(circuit, name+'~nor2', 'NOR2', None, None, outs[0]) + Line(circuit, n0, n2) + Line(circuit, n1, n2) + elif n.kind.startswith('FADDX'): + n.remove() + # forks for fan-outs + f_a = add_and_connect(circuit, name + '~fork0', '__fork__', ins[0]) + f_b = add_and_connect(circuit, name + '~fork1', '__fork__', ins[1]) + f_ci = add_and_connect(circuit, name + '~fork2', '__fork__', ins[2]) + f_ab = Node(circuit, name + '~fork3') + # sum-block + n_xor0 = Node(circuit, name + '~xor0', 'XOR2') + Line(circuit, f_a, n_xor0) + Line(circuit, f_b, n_xor0) + Line(circuit, n_xor0, f_ab) + if len(outs) > 0 and outs[0] is not None: + n_xor1 = add_and_connect(circuit, name + '~xor1', 'XOR2', None, None, outs[0]) + Line(circuit, f_ab, n_xor1) + Line(circuit, f_ci, n_xor1) + # carry-block + if len(outs) > 1 and outs[1] is not None: + n_and0 = Node(circuit, name + '~and0', 'AND2') + Line(circuit, f_ab, n_and0) + Line(circuit, f_ci, n_and0) + n_and1 = Node(circuit, name + '~and1', 'AND2') + Line(circuit, f_a, n_and1) + Line(circuit, f_b, n_and1) + n_or = add_and_connect(circuit, name + '~or0', 'OR2', None, None, outs[1]) + Line(circuit, n_and0, n_or) + Line(circuit, n_and1, n_or) + elif n.kind.startswith('HADDX'): + n.remove() + # forks for fan-outs + f_a = add_and_connect(circuit, name + '~fork0', '__fork__', ins[0]) + f_b = add_and_connect(circuit, name + '~fork1', '__fork__', ins[1]) + n_xor0 = add_and_connect(circuit, name + '~xor0', 'XOR2', None, None, outs[1]) + Line(circuit, f_a, n_xor0) + Line(circuit, f_b, n_xor0) + n_and0 = add_and_connect(circuit, name + '~and0', 'AND2', None, None, outs[0]) + Line(circuit, f_a, n_and0) + Line(circuit, f_b, n_and0) + elif n.kind.startswith('MUX21X'): + n.remove() + f_s = add_and_connect(circuit, name + '~fork0', '__fork__', ins[2]) + n_not = Node(circuit, name + '~not', 'INV') + Line(circuit, f_s, n_not) + n_and0 = add_and_connect(circuit, name + '~and0', 'AND2', ins[0]) + n_and1 = add_and_connect(circuit, name + '~and1', 'AND2', ins[1]) + n_or0 = add_and_connect(circuit, name + '~or0', 'OR2', None, None, outs[0]) + Line(circuit, n_not, n_and0) + Line(circuit, f_s, n_and1) + Line(circuit, n_and0, n_or0) + Line(circuit, n_and1, n_or0) + elif n.kind.startswith('DFFSSR'): + n.kind = 'DFFX1' + n_and0 = add_and_connect(circuit, name + '~and0', 'AND2', ins[0], ins[2], None) + Line(circuit, n_and0, (n, 0)) diff --git a/src/kyupy/verilog.py b/src/kyupy/verilog.py index 61e76ee..c6b5ab0 100644 --- a/src/kyupy/verilog.py +++ b/src/kyupy/verilog.py @@ -10,13 +10,13 @@ from lark import Lark, Transformer from . import readtext from .circuit import Circuit, Node, Line -from .saed import pin_index, pin_is_output +from .techlib import TechLib Instantiation = namedtuple('Instantiation', ['type', 'name', 'pins']) class SignalDeclaration: - + def __init__(self, kind, tokens): self.left = None self.right = None @@ -27,25 +27,25 @@ class SignalDeclaration: self.basename = tokens.children[2] self.left = int(tokens.children[0].value) self.right = int(tokens.children[1].value) - + @property def names(self): if self.left is None: return [self.basename] if self.left <= self.right: return [f'{self.basename}[{i}]' for i in range(self.left, self.right + 1)] - else: - return [f'{self.basename}[{i}]' for i in range(self.left, self.right - 1, -1)] - + return [f'{self.basename}[{i}]' for i in range(self.left, self.right - 1, -1)] + def __repr__(self): return f"{self.kind}:{self.basename}[{self.left}:{self.right}]" class VerilogTransformer(Transformer): - def __init__(self, branchforks=False): + def __init__(self, branchforks=False, tlib=TechLib()): super().__init__() self._signal_declarations = {} self.branchforks = branchforks + self.tlib = tlib @staticmethod def name(args): @@ -57,24 +57,24 @@ class VerilogTransformer(Transformer): @staticmethod def instantiation(args): return Instantiation(args[0], args[1], - dict([(pin.children[0], pin.children[1]) for pin in args[2:]])) - + dict((pin.children[0], pin.children[1]) for pin in args[2:])) + def input(self, args): for sd in [SignalDeclaration('input', signal) for signal in args]: self._signal_declarations[sd.basename] = sd - + def inout(self, args): for sd in [SignalDeclaration('input', signal) for signal in args]: # just treat as input self._signal_declarations[sd.basename] = sd - + def output(self, args): for sd in [SignalDeclaration('output', signal) for signal in args]: self._signal_declarations[sd.basename] = sd - + def wire(self, args): for sd in [SignalDeclaration('wire', signal) for signal in args]: self._signal_declarations[sd.basename] = sd - + def module(self, args): c = Circuit(args[0]) positions = {} @@ -85,11 +85,11 @@ class VerilogTransformer(Transformer): pos += 1 assignments = [] for stmt in args[2:]: # pass 1: instantiate cells and driven signals - if type(stmt) is Instantiation: + if isinstance(stmt, Instantiation): n = Node(c, stmt.name, kind=stmt.type) for p, s in stmt.pins.items(): - if pin_is_output(n.kind, p): - Line(c, (n, pin_index(stmt.type, p)), Node(c, s)) + if self.tlib.pin_is_output(n.kind, p): + Line(c, (n, self.tlib.pin_index(stmt.type, p)), Node(c, s)) elif stmt is not None and stmt.data == 'assign': assignments.append((stmt.children[0], stmt.children[1])) for sd in self._signal_declarations.values(): @@ -108,10 +108,10 @@ class VerilogTransformer(Transformer): assert s1 not in c.forks, 'assignment between two driven signals' Line(c, c.forks[s2], Node(c, s1)) for stmt in args[2:]: # pass 2: connect signals to readers - if type(stmt) is Instantiation: + if isinstance(stmt, Instantiation): for p, s in stmt.pins.items(): n = c.cells[stmt.name] - if pin_is_output(n.kind, p): continue + if self.tlib.pin_is_output(n.kind, p): continue if s.startswith("1'b"): const = f'__const{s[3]}__' if const not in c.cells: @@ -121,7 +121,7 @@ class VerilogTransformer(Transformer): branchfork = Node(c, fork.name + "~" + n.name + "/" + p) Line(c, fork, branchfork) fork = branchfork - Line(c, fork, (n, pin_index(stmt.type, p))) + Line(c, fork, (n, self.tlib.pin_index(stmt.type, p))) for sd in self._signal_declarations.values(): if sd.kind == 'output': for name in sd.names: @@ -129,14 +129,10 @@ class VerilogTransformer(Transformer): return c @staticmethod - def start(args): - if len(args) == 1: - return args[0] - else: - return args + def start(args): return args[0] if len(args) == 1 else args -grammar = """ +GRAMMAR = """ start: (module)* module: "module" name parameters ";" (_statement)* "endmodule" parameters: "(" [ name ( "," name )* ] ")" @@ -158,16 +154,18 @@ grammar = """ """ -def parse(text, *, branchforks=False): +def parse(text, *, branchforks=False, tlib=TechLib()): """Parses the given ``text`` as Verilog code. :param text: A string with Verilog code. :param branchforks: If set to ``True``, the returned circuit will include additional `forks` on each fanout branch. These forks are needed to correctly annotate interconnect delays (see :py:func:`kyupy.sdf.DelayFile.annotation`). + :param tlib: A technology library object that provides pin name mappings. + :type tlib: :py:class:`~kyupy.techlib.TechLib` :return: A :class:`~kyupy.circuit.Circuit` object. """ - return Lark(grammar, parser="lalr", transformer=VerilogTransformer(branchforks)).parse(text) + return Lark(GRAMMAR, parser="lalr", transformer=VerilogTransformer(branchforks, tlib)).parse(text) def load(file, *args, **kwargs): diff --git a/src/kyupy/wave_sim.py b/src/kyupy/wave_sim.py index 2766997..bd04f10 100644 --- a/src/kyupy/wave_sim.py +++ b/src/kyupy/wave_sim.py @@ -1,10 +1,10 @@ -"""High-Throughput combinational logic timing simulators. +"""High-throughput combinational logic timing simulators. -These simulators work similarly to :py:class:`kyupy.logic_sim.LogicSim`. +These simulators work similarly to :py:class:`~kyupy.logic_sim.LogicSim`. They propagate values through the combinational circuit from (pseudo) primary inputs to (pseudo) primary outputs. Instead of propagating logic values, these simulators propagate signal histories (waveforms). -They are designed to run many simulations in parallel and while their latencies are quite high, they achieve -high throughput performance. +They are designed to run many simulations in parallel and while their latencies are quite high, they can achieve +high throughput. The simulators are not event-based and are not capable of simulating sequential circuits directly. @@ -17,13 +17,16 @@ from bisect import bisect, insort_left import numpy as np -from . import numba -from . import cuda +from . import numba, cuda, hr_bytes -TMAX = np.float32(2 ** 127) # almost np.PINF for 32-bit floating point values -TMAX_OVL = np.float32(1.1 * 2 ** 127) # almost np.PINF with overflow mark -TMIN = np.float32(-2 ** 127) # almost np.NINF for 32-bit floating point values +TMAX = np.float32(2 ** 127) +"""A large 32-bit floating point value used to mark the end of a waveform.""" +TMAX_OVL = np.float32(1.1 * 2 ** 127) +"""A large 32-bit floating point value used to mark the end of a waveform that +may be incomplete due to an overflow.""" +TMIN = np.float32(-2 ** 127) +"""A large negative 32-bit floating point value used at the beginning of waveforms that start with logic-1.""" class Heap: @@ -38,7 +41,7 @@ class Heap: if self.chunks[loc] == size: del self.released[idx] return loc - elif self.chunks[loc] > size: # split chunk + if self.chunks[loc] > size: # split chunk chunksize = self.chunks[loc] self.chunks[loc] = size self.chunks[loc + size] = chunksize - size @@ -93,7 +96,23 @@ class Heap: class WaveSim: - """A waveform-based combinational logic timing simulator.""" + """A waveform-based combinational logic timing simulator running on CPU. + + :param circuit: The circuit to simulate. + :param timing: The timing annotation of the circuit (see :py:func:`kyupy.sdf.DelayFile.annotation` for details) + :param sims: The number of parallel simulations. + :param wavecaps: The number of floats available in each waveform. Waveforms are encoding the signal switching + history by storing transition times. The waveform capacity roughly corresponds to the number of transitions + that can be stored. A capacity of ``n`` can store at least ``n-2`` transitions. If more transitions are + generated during simulation, the latest glitch is removed (freeing up two transition times) and an overflow + flag is set. If an integer is given, all waveforms are set to that same capacity. With an array of length + ``len(circuit.lines)`` the capacity can be controlled for each intermediate waveform individually. + :param strip_forks: If enabled, the simulator will not evaluate fork nodes explicitly. This saves simulation time + by reducing the number of nodes to simulate, but (interconnect) delay annotations of lines read by fork nodes + are ignored. + :param keep_waveforms: If disabled, memory of intermediate signal waveforms will be re-used. This greatly reduces + memory footprint, but intermediate signal waveforms become unaccessible after a propagation. + """ def __init__(self, circuit, timing, sims=8, wavecaps=16, strip_forks=False, keep_waveforms=True): self.circuit = circuit self.sims = sims @@ -104,7 +123,7 @@ class WaveSim: self.cdata = np.zeros((len(self.interface), sims, 7), dtype='float32') - if type(wavecaps) is int: + if isinstance(wavecaps, int): wavecaps = [wavecaps] * len(circuit.lines) intf_wavecap = 4 # sufficient for storing only 1 transition. @@ -118,7 +137,7 @@ class WaveSim: # translate circuit structure into self.ops ops = [] - interface_dict = dict([(n, i) for i, n in enumerate(self.interface)]) + interface_dict = dict((n, i) for i, n in enumerate(self.interface)) for n in circuit.topological_order(): if n in interface_dict: inp_idx = self.ppi_offset + interface_dict[n] @@ -152,7 +171,7 @@ class WaveSim: ops.append((0b0110, o0_idx, i0_idx, i1_idx)) elif kind.startswith('xnor'): ops.append((0b1001, o0_idx, i0_idx, i1_idx)) - elif kind.startswith('not') or kind.startswith('inv'): + elif kind.startswith('not') or kind.startswith('inv') or kind.startswith('ibuf'): ops.append((0b0101, o0_idx, i0_idx, i1_idx)) elif kind.startswith('buf') or kind.startswith('nbuf'): ops.append((0b1010, o0_idx, i0_idx, i1_idx)) @@ -173,7 +192,7 @@ class WaveSim: prev_line = prev_line.driver.ins[0] stem_idx = prev_line.index for ol in f.outs: - stems[ol.index] = stem_idx + stems[ol] = stem_idx # calculate level (distance from PI/PPI) and reference count for each line levels = np.zeros(self.sat_length, dtype='int32') @@ -211,7 +230,7 @@ class WaveSim: self.sat[self.ppi_offset + i] = h.alloc(intf_wavecap), intf_wavecap, 0 ref_count[self.ppi_offset + i] += 1 if len(n.ins) > 0: - i0_idx = stems[n.ins[0].index] if stems[n.ins[0].index] >= 0 else n.ins[0].index + i0_idx = stems[n.ins[0]] if stems[n.ins[0]] >= 0 else n.ins[0] ref_count[i0_idx] += 1 # allocate memory for the rest of the circuit @@ -240,7 +259,7 @@ class WaveSim: # copy memory location to PO/PPO area for i, n in enumerate(self.interface): if len(n.ins) > 0: - self.sat[self.ppo_offset + i] = self.sat[n.ins[0].index] + self.sat[self.ppo_offset + i] = self.sat[n.ins[0]] # pad timing self.timing = np.zeros((self.sat_length, 2, 2)) @@ -253,15 +272,32 @@ class WaveSim: m0 = ~m1 self.mask = np.rollaxis(np.vstack((m0, m1)), 1) + def __repr__(self): + total_mem = self.state.nbytes + self.sat.nbytes + self.ops.nbytes + self.cdata.nbytes + return f'' + def get_line_delay(self, line, polarity): + """Returns the current delay of the given ``line`` and ``polarity`` in the simulation model.""" return self.timing[line, 0, polarity] def set_line_delay(self, line, polarity, delay): + """Sets a new ``delay`` for the given ``line`` and ``polarity`` in the simulation model.""" self.timing[line, 0, polarity] = delay def assign(self, vectors, time=0.0, offset=0): + """Assigns new values to the primary inputs and state-elements. + + :param vectors: The values to assign preferably in 8-valued logic. The values are converted to + appropriate waveforms with or one transition (``RISE``, ``FALL``) no transitions + (``ZERO``, ``ONE``, and others). + :type vectors: :py:class:`~kyupy.logic.BPArray` + :param time: The transition time of the generated waveforms. + :param offset: The offset into the vector set. The vector assigned to the first simulator is + ``vectors[offset]``. + """ nvectors = min(len(vectors) - offset, self.sims) - for i, node in enumerate(self.interface): + for i in range(len(self.interface)): ppi_loc = self.sat[self.ppi_offset + i, 0] if ppi_loc < 0: continue for p in range(nvectors): @@ -283,16 +319,21 @@ class WaveSim: self.state[ppi_loc + toggle, p] = TMAX def propagate(self, sims=None, sd=0.0, seed=1): - if sims is None: - sims = self.sims - else: - sims = min(sims, self.sims) + """Propagates all waveforms from the (pseudo) primary inputs to the (pseudo) primary outputs. + + :param sims: Number of parallel simulations to execute. If None, all available simulations are performed. + :param sd: Standard deviation for injection of random delay variation. Active, if value is positive. + :param seed: Random seed for delay variations. + """ + sims = min(sims or self.sims, self.sims) for op_start, op_stop in zip(self.level_starts, self.level_stops): self.overflows += level_eval(self.ops, op_start, op_stop, self.state, self.sat, 0, sims, self.timing, sd, seed) self.lst_eat_valid = False def wave(self, line, vector): + # """Returns the desired waveform from the simulation state. Only valid, if simulator was + # instantiated with ``keep_waveforms=True``.""" if line < 0: return [TMAX] mem, wcap, _ = self.sat[line] @@ -306,7 +347,34 @@ class WaveSim: def wave_ppo(self, o, vector): return self.wave(self.ppo_offset + o, vector) - def capture(self, time=TMAX, sd=0, seed=1, cdata=None, offset=0): + def capture(self, time=TMAX, sd=0.0, seed=1, cdata=None, offset=0): + """Simulates a capture operation at all state-elements and primary outputs. + + The capture analyzes the propagated waveforms at and around the given capture time and returns + various results for each capture operation. + + :param time: The desired capture time. By default, a capture of the settled value is performed. + :param sd: A standard deviation for uncertainty in the actual capture time. + :param seed: The random seed for a capture with uncertainty. + :param cdata: An array to copy capture data into (optional). See the return value for details. + :param offset: An offset into the supplied capture data array. + :return: The capture data as numpy array. + + The 3-dimensional capture data array contains for each interface node (axis 0), + and each test (axis 1), seven values: + + 0. Probability of capturing a 1 at the given capture time (same as next value, if no + standard deviation given). + 1. A capture value decided by random sampling according to above probability and given seed. + 2. The final value (assume a very late capture time). + 3. True, if there was a premature capture (capture error), i.e. final value is different + from captured value. + 4. Earliest arrival time. The time at which the output transitioned from its initial value. + 5. Latest stabilization time. The time at which the output transitioned to its final value. + 6. Overflow indicator. If non-zero, some signals in the input cone of this output had more + transitions than specified in ``wavecaps``. Some transitions have been discarded, the + final values in the waveforms are still valid. + """ for i, node in enumerate(self.interface): if len(node.ins) == 0: continue for p in range(self.sims): @@ -319,7 +387,15 @@ class WaveSim: return self.cdata def reassign(self, time=0.0): - for i, node in enumerate(self.interface): + """Re-assigns the last capture to the appropriate pseudo-primary inputs. Generates a new set of + waveforms at the PPIs that start with the previous final value of that PPI, and transitions at the + given time to the value captured in a previous simulation. :py:func:`~WaveSim.capture` must be called + prior to this function. The final value of each PPI is taken from the randomly sampled concrete logic + values in the capture data. + + :param time: The transition time at the inputs (usually 0.0). + """ + for i in range(len(self.interface)): ppi_loc = self.sat[self.ppi_offset + i, 0] ppo_loc = self.sat[self.ppo_offset + i, 0] if ppi_loc < 0 or ppo_loc < 0: continue @@ -384,8 +460,7 @@ class WaveSim: accs[idx] += 1 if s_sqrt2 == 0: return values - else: - return accs + return accs def vals(self, line, vector, times, sd=0): return self._vals(line, vector, times, sd) @@ -462,7 +537,7 @@ def rand_gauss(seed, sd): return 1.0 while True: x = -6.0 - for i in range(12): + for _ in range(12): seed = int(0xDEECE66D) * seed + 0xB x += float((seed >> 8) & 0xffffff) / float(1 << 24) x *= sd @@ -539,12 +614,17 @@ def wave_eval(op, state, sat, st_idx, line_times, sd=0.0, seed=0): state[z_mem + z_cur, st_idx] = TMAX_OVL else: state[z_mem + z_cur, st_idx] = a if a > b else b # propagate overflow flags by storing biggest TMAX from input - + return overflows class WaveSimCuda(WaveSim): - """A GPU-accelerated waveform-based combinational logic timing simulator.""" + """A GPU-accelerated waveform-based combinational logic timing simulator. + + The API is the same as for :py:class:`WaveSim`. + All internal memories are mirrored into GPU memory upon construction. + Some operations like access to single waveforms can involve large communication overheads. + """ def __init__(self, circuit, timing, sims=8, wavecaps=16, strip_forks=False, keep_waveforms=True): super().__init__(circuit, timing, sims, wavecaps, strip_forks, keep_waveforms) @@ -559,6 +639,12 @@ class WaveSimCuda(WaveSim): self._block_dim = (32, 16) + def __repr__(self): + total_mem = self.state.nbytes + self.sat.nbytes + self.ops.nbytes + self.timing.nbytes + \ + self.tdata.nbytes + self.cdata.nbytes + return f'' + def get_line_delay(self, line, polarity): return self.d_timing[line, 0, polarity] @@ -586,10 +672,7 @@ class WaveSimCuda(WaveSim): return gx, gy def propagate(self, sims=None, sd=0.0, seed=1): - if sims is None: - sims = self.sims - else: - sims = min(sims, self.sims) + sims = min(sims or self.sims, self.sims) for op_start, op_stop in zip(self.level_starts, self.level_stops): grid_dim = self._grid_dim(sims, op_stop - op_start) wave_kernel[grid_dim, self._block_dim](self.d_ops, op_start, op_stop, self.d_state, self.sat, int(0), @@ -599,10 +682,10 @@ class WaveSimCuda(WaveSim): def wave(self, line, vector): if line < 0: - return None + return [TMAX] mem, wcap, _ = self.sat[line] if mem < 0: - return None + return [TMAX] return self.d_state[mem:mem + wcap, vector] def capture(self, time=TMAX, sd=0, seed=1, cdata=None, offset=0): @@ -655,7 +738,7 @@ def reassign_kernel(state, sat, ppi_offset, ppo_offset, cdata, ppi_time): if vector >= state.shape[-1]: return if ppo_offset + y >= len(sat): return - ppo, ppo_cap, _ = sat[ppo_offset + y] + ppo, _, _ = sat[ppo_offset + y] ppi, ppi_cap, _ = sat[ppi_offset + y] if ppo < 0: return if ppi < 0: return @@ -765,7 +848,7 @@ def rand_gauss_dev(seed, sd): return 1.0 while True: x = -6.0 - for i in range(12): + for _ in range(12): seed = int(0xDEECE66D) * seed + 0xB x += float((seed >> 8) & 0xffffff) / float(1 << 24) x *= sd diff --git a/tests/test_bench.py b/tests/test_bench.py index 25b9b1b..44ddf7c 100644 --- a/tests/test_bench.py +++ b/tests/test_bench.py @@ -4,9 +4,9 @@ from kyupy import bench def test_b01(mydir): with open(mydir / 'b01.bench', 'r') as f: c = bench.parse(f.read()) - assert 92 == len(c.nodes) + assert len(c.nodes) == 92 c = bench.load(mydir / 'b01.bench') - assert 92 == len(c.nodes) + assert len(c.nodes) == 92 def test_simple(): diff --git a/tests/test_logic_sim.py b/tests/test_logic_sim.py index 990eec7..76edb95 100644 --- a/tests/test_logic_sim.py +++ b/tests/test_logic_sim.py @@ -49,7 +49,7 @@ def test_4v(): assert mva[14] == 'X-XXX' assert mva[15] == 'XXXXX' - + def test_8v(): c = bench.parse('input(x, y) output(a, o, n, xo) a=and(x,y) o=or(x,y) n=not(x) xo=xor(x,y)') s = LogicSim(c, 64, m=8) @@ -71,7 +71,7 @@ def test_8v(): for i in range(64): assert resp[i] == mva[i] - + def test_b01(mydir): c = bench.load(mydir / 'b01.bench') diff --git a/tests/test_sdf.py b/tests/test_sdf.py index 8b30b68..b09469e 100644 --- a/tests/test_sdf.py +++ b/tests/test_sdf.py @@ -1,5 +1,4 @@ from kyupy import sdf, verilog -from kyupy.saed import pin_index def test_parse(): @@ -81,20 +80,20 @@ def test_b14(mydir): def test_gates(mydir): c = verilog.load(mydir / 'gates.v') df = sdf.load(mydir / 'gates.sdf') - lt = df.annotation(c, pin_index, dataset=1) + lt = df.annotation(c, dataset=1) nand_a = c.cells['nandgate'].ins[0] nand_b = c.cells['nandgate'].ins[1] and_a = c.cells['andgate'].ins[0] and_b = c.cells['andgate'].ins[1] - assert lt[nand_a.index, 0, 0] == 0.103 - assert lt[nand_a.index, 0, 1] == 0.127 + assert lt[nand_a, 0, 0] == 0.103 + assert lt[nand_a, 0, 1] == 0.127 - assert lt[nand_b.index, 0, 0] == 0.086 - assert lt[nand_b.index, 0, 1] == 0.104 + assert lt[nand_b, 0, 0] == 0.086 + assert lt[nand_b, 0, 1] == 0.104 - assert lt[and_a.index, 0, 0] == 0.378 - assert lt[and_a.index, 0, 1] == 0.377 + assert lt[and_a, 0, 0] == 0.378 + assert lt[and_a, 0, 1] == 0.377 - assert lt[and_b.index, 0, 0] == 0.375 - assert lt[and_b.index, 0, 1] == 0.370 + assert lt[and_b, 0, 0] == 0.375 + assert lt[and_b, 0, 1] == 0.370 diff --git a/tests/test_stil.py b/tests/test_stil.py index 1f0d89b..63f19e4 100644 --- a/tests/test_stil.py +++ b/tests/test_stil.py @@ -3,7 +3,6 @@ from kyupy import stil def test_b14(mydir): s = stil.load(mydir / 'b14.stuck.stil.gz') - assert 10 == len(s.signal_groups) - assert 1 == len(s.scan_chains) - assert 2163 == len(s.calls) - + assert len(s.signal_groups) == 10 + assert len(s.scan_chains) == 1 + assert len(s.calls) == 2163 diff --git a/tests/test_wave_sim.py b/tests/test_wave_sim.py index bea26d3..8ddb94d 100644 --- a/tests/test_wave_sim.py +++ b/tests/test_wave_sim.py @@ -3,7 +3,6 @@ import numpy as np from kyupy.wave_sim import WaveSim, WaveSimCuda, wave_eval, TMIN, TMAX from kyupy.logic_sim import LogicSim from kyupy import verilog, sdf, logic -from kyupy.saed import pin_index from kyupy.logic import MVArray, BPArray @@ -19,7 +18,7 @@ def test_wave_eval(): line_times[1, 0, 1] = 0.4 line_times[1, 1, 0] = 0.3 line_times[1, 1, 1] = 0.4 - + state = np.zeros((3*16, 1)) + TMAX # 3 waveforms of capacity 16 state[::16, 0] = 16 # first entry is capacity a = state[0:16, 0] @@ -31,29 +30,29 @@ def test_wave_eval(): sat[2] = 32, 16, 0 wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times) - assert TMIN == z[0] + assert z[0] == TMIN a[0] = TMIN wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times) - assert TMIN == z[0] - + assert z[0] == TMIN + b[0] = TMIN wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times) - assert TMAX == z[0] + assert z[0] == TMAX a[0] = 1 # A _/^^^ b[0] = 2 # B __/^^ wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times) - assert TMIN == z[0] # ^^^\___ B -> Z fall delay - assert 2.4 == z[1] - assert TMAX == z[2] + assert z[0] == TMIN # ^^^\___ B -> Z fall delay + assert z[1] == 2.4 + assert z[2] == TMAX a[0] = TMIN # A ^^^^^^ b[0] = TMIN # B ^^^\__ b[1] = 2 wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times) - assert 2.3 == z[0] # ___/^^^ B -> Z rise delay - assert TMAX == z[1] + assert z[0] == 2.3 # ___/^^^ B -> Z rise delay + assert z[1] == TMAX # pos pulse of 0.35 at B -> 0.45 after delays a[0] = TMIN # A ^^^^^^^^ @@ -61,9 +60,9 @@ def test_wave_eval(): b[1] = 2 # B ^^\__/^^ b[2] = 2.35 wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times) - assert 2.3 == z[0] # __/^^\__ - assert 2.75 == z[1] - assert TMAX == z[2] + assert z[0] == 2.3 # __/^^\__ + assert z[1] == 2.75 + assert z[2] == TMAX # neg pulse of 0.45 at B -> 0.35 after delays a[0] = TMIN # A ^^^^^^^^ @@ -71,10 +70,10 @@ def test_wave_eval(): b[1] = 2.45 b[2] = TMAX wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times) - assert TMIN == z[0] # ^^\__/^^ - assert 2.4 == z[1] - assert 2.75 == z[2] - assert TMAX == z[3] + assert z[0] == TMIN # ^^\__/^^ + assert z[1] == 2.4 + assert z[2] == 2.75 + assert z[3] == TMAX # neg pulse of 0.35 at B -> 0.25 after delays (filtered) a[0] = TMIN # A ^^^^^^^^ @@ -82,8 +81,8 @@ def test_wave_eval(): b[1] = 2.35 b[2] = TMAX wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times) - assert TMIN == z[0] # ^^^^^^ - assert TMAX == z[1] + assert z[0] == TMIN # ^^^^^^ + assert z[1] == TMAX # pos pulse of 0.25 at B -> 0.35 after delays (filtered) a[0] = TMIN # A ^^^^^^^^ @@ -91,7 +90,7 @@ def test_wave_eval(): b[1] = 2 # B ^^\__/^^ b[2] = 2.25 wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times) - assert TMAX == z[0] # ______ + assert z[0] == TMAX # ______ def compare_to_logic_sim(wsim): @@ -118,7 +117,7 @@ def compare_to_logic_sim(wsim): exp_bp = BPArray(tests_bp) lsim.capture(exp_bp) exp = MVArray(exp_bp) - + for i in range(8): exp_str = exp[i].replace('R', '1').replace('F', '0').replace('P', '0').replace('N', '1') res_str = resp[i].replace('R', '1').replace('F', '0').replace('P', '0').replace('N', '1') @@ -128,7 +127,7 @@ def compare_to_logic_sim(wsim): def test_b14(mydir): c = verilog.load(mydir / 'b14.v.gz', branchforks=True) df = sdf.load(mydir / 'b14.sdf.gz') - lt = df.annotation(c, pin_index) + lt = df.annotation(c) wsim = WaveSim(c, lt, 8) compare_to_logic_sim(wsim) @@ -136,7 +135,7 @@ def test_b14(mydir): def test_b14_strip_forks(mydir): c = verilog.load(mydir / 'b14.v.gz', branchforks=True) df = sdf.load(mydir / 'b14.sdf.gz') - lt = df.annotation(c, pin_index) + lt = df.annotation(c) wsim = WaveSim(c, lt, 8, strip_forks=True) compare_to_logic_sim(wsim) @@ -144,6 +143,6 @@ def test_b14_strip_forks(mydir): def test_b14_cuda(mydir): c = verilog.load(mydir / 'b14.v.gz', branchforks=True) df = sdf.load(mydir / 'b14.sdf.gz') - lt = df.annotation(c, pin_index) + lt = df.annotation(c) wsim = WaveSimCuda(c, lt, 8) compare_to_logic_sim(wsim)