diff --git a/docs/conf.py b/docs/conf.py index 53f405e..83cafc9 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -24,7 +24,7 @@ copyright = '2020-2023, Stefan Holst' author = 'Stefan Holst' # The full version, including alpha/beta/rc tags -release = '0.0.4' +release = '0.0.5' # -- General configuration --------------------------------------------------- diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..b8aaecb --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,32 @@ +[project] +name = "kyupy" +version = "0.0.5" +authors = [ + { name="Stefan Holst", email="mail@s-holst.de" }, +] +description = 'High-performance processing and analysis of non-hierarchical VLSI designs' +readme = "README.rst" +requires_python = ">=3.8" +dependencies = [ + "numpy>=1.17.0", + "lark-parser>=0.8.0", +] +classifiers = [ + "Development Status :: 3 - Alpha", + "Environment :: GPU :: NVIDIA CUDA", + "Intended Audience :: Science/Research", + "Topic :: Scientific/Engineering :: Electronic Design Automation (EDA)", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3", +] +license = "MIT" +license-files = ["LICENSE.txt"] + +[project.urls] +homepage = "https://github.com/s-holst/kyupy" + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + diff --git a/setup.py b/setup.py deleted file mode 100644 index 47ef1f7..0000000 --- a/setup.py +++ /dev/null @@ -1,40 +0,0 @@ -from setuptools import setup, find_packages - -with open('README.rst', 'r') as f: - long_description = f.read() - -setup( - name='kyupy', - version='0.0.4', - description='High-performance processing and analysis of non-hierarchical VLSI designs', - long_description=long_description, - long_description_content_type='text/x-rst', - packages=find_packages(where='src'), - package_dir={'': 'src'}, - url='https://github.com/s-holst/kyupy', - author='Stefan Holst', - author_email='mail@s-holst.de', - python_requires='>=3.8', - install_requires=[ - 'numpy>=1.17.0', - 'lark-parser>=0.8.0' - ], - extras_requires={ - 'dev': [ - 'pytest>=6.1', - ], - }, - classifiers=[ - 'Development Status :: 3 - Alpha', - 'Environment :: GPU :: NVIDIA CUDA', - 'Intended Audience :: Science/Research', - 'Topic :: Scientific/Engineering :: Electronic Design Automation (EDA)', - 'License :: OSI Approved :: MIT License', - 'Operating System :: OS Independent', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3 :: Only', - 'Programming Language :: Python :: 3.8', - 'Programming Language :: Python :: 3.9', - 'Programming Language :: Python :: 3.10', - ], -) diff --git a/src/kyupy/__init__.py b/src/kyupy/__init__.py index 123bd03..b473546 100644 --- a/src/kyupy/__init__.py +++ b/src/kyupy/__init__.py @@ -57,6 +57,18 @@ def hr_bytes(nbytes): multiplier += 1 return f'{nbytes:.1f}{["", "ki", "Mi", "Gi", "Ti", "Pi"][multiplier]}B' +def eng(number): + """Formats a given number using engineering notation.""" + exponent = 0 + if abs(number) < 1: + while abs(number) >= 1000: + number *= 1000 + exponent -= 3 + else: + while abs(number) >= 1000: + number /= 1000 + exponent += 3 + return f'{number:.0f}' + (f'e{exponent}' if exponent != 0 else '') def hr_time(seconds): """Formats a given time interval for human readability.""" @@ -138,10 +150,10 @@ class Log: self._limit = limit def stop_limit(self): + self._limit = -1 if self.filtered > 0: - log.info(f'{self.filtered} more messages (filtered).') + self.info(f'{self.filtered} more messages (filtered).') self.filtered = 0 - self._limit = -1 def __getstate__(self): return {'elapsed': time.perf_counter() - self.start} @@ -149,6 +161,8 @@ class Log: def __setstate__(self, state): self.logfile = sys.stdout self.indent = 0 + self._limit = -1 + self.filtered = 0 self.start = time.perf_counter() - state['elapsed'] def write(self, s, indent=0): @@ -169,7 +183,7 @@ class Log: return t = time.perf_counter() - self.start self.logfile.write(f'# {t:011.3f} {level} {message}\n') - self.logfile.flush() + #self.logfile.flush() self._limit -= 1 def info(self, message): diff --git a/src/kyupy/circuit.py b/src/kyupy/circuit.py index 46c9f38..e387ffc 100644 --- a/src/kyupy/circuit.py +++ b/src/kyupy/circuit.py @@ -10,20 +10,40 @@ Circuit graphs also define an ordering of inputs, outputs and other nodes to eas """ +from __future__ import annotations + from collections import deque, defaultdict import re +from typing import Union import numpy as np class GrowingList(list): def __setitem__(self, index, value): - if index >= len(self): - self.extend([None] * (index + 1 - len(self))) + if value is None: self.has_nones = True + if index == len(self): return super().append(value) + if index > len(self): + super().extend([None] * (index + 1 - len(self))) + self.has_nones = True super().__setitem__(index, value) - def free_index(self): - return next((i for i, x in enumerate(self) if x is None), len(self)) + def __getitem__(self, index): + if isinstance(index, slice): return super().__getitem__(index) + return super().__getitem__(index) if index < len(self) else None + + @property + def free_idx(self): + fi = len(self) + if hasattr(self, 'has_nones') and self.has_nones: + fi = next((i for i, x in enumerate(self) if x is None), len(self)) + self.has_nones = fi < len(self) + return fi + + def without_nones(self): + for item in self: + if item is not None: + yield item class IndexList(list): @@ -76,10 +96,10 @@ class Node: by allocating an array or list :code:`my_data` of length :code:`len(n.circuit.nodes)` and accessing it by :code:`my_data[n.index]` or simply by :code:`my_data[n]`. """ - self.ins = GrowingList() + self.ins: GrowingList[Line] = GrowingList() """A list of input connections (:class:`Line` objects). """ - self.outs = GrowingList() + self.outs: GrowingList[Line] = GrowingList() """A list of output connections (:class:`Line` objects). """ @@ -135,7 +155,7 @@ class Line: Use the explicit case only if connections to specific pins are required. It may overwrite any previous line references in the connection list of the nodes. """ - def __init__(self, circuit, driver, reader): + def __init__(self, circuit: Circuit, driver: Union[Node, tuple[Node, int]], reader: Union[Node, tuple[Node, int]]): self.circuit = circuit """The :class:`Circuit` object the line is part of. """ @@ -147,7 +167,7 @@ class Line: by allocating an array or list :code:`my_data` of length :code:`len(l.circuit.lines)` and accessing it by :code:`my_data[l.index]` or simply by :code:`my_data[l]`. """ - if not isinstance(driver, tuple): driver = (driver, driver.outs.free_index()) + if not isinstance(driver, tuple): driver = (driver, driver.outs.free_idx) self.driver = driver[0] """The :class:`Node` object that drives this line. """ @@ -157,7 +177,7 @@ class Line: This is the position in the list :py:attr:`Node.outs` of the driving node this line referenced from: :code:`self.driver.outs[self.driver_pin] == self`. """ - if not isinstance(reader, tuple): reader = (reader, reader.ins.free_index()) + if not isinstance(reader, tuple): reader = (reader, reader.ins.free_idx) self.reader = reader[0] """The :class:`Node` object that reads this line. """ @@ -292,7 +312,7 @@ class Circuit: def _locs(self, prefix, nodes): d_top = dict() for i, n in enumerate(nodes): - if m := re.match(fr'({prefix}.*?)((?:[\d_\[\]])*$)', n.name): + if m := re.match(fr'({re.escape(prefix)}.*?)((?:[\d_\[\]])*$)', n.name): path = [m[1]] + [int(v) for v in re.split(r'[_\[\]]+', m[2]) if len(v) > 0] d = d_top for j in path[:-1]: @@ -334,15 +354,16 @@ class Circuit: def get_or_add_fork(self, name): return self.forks[name] if name in self.forks else Node(self, name) - def remove_dangling_nodes(self, root_node:Node): + def remove_dangling_nodes(self, root_node:Node, keep=[]): if len([l for l in root_node.outs if l is not None]) > 0: return lines = [l for l in root_node.ins if l is not None] drivers = [l.driver for l in lines] + if root_node in keep: return root_node.remove() for l in lines: l.remove() for d in drivers: - self.remove_dangling_nodes(d) + self.remove_dangling_nodes(d, keep=keep) def eliminate_1to1_forks(self): """Removes all forks that drive only one node. @@ -370,6 +391,21 @@ class Circuit: in_line.reader_pin = out_reader_pin in_line.reader.ins[in_line.reader_pin] = in_line + def remove_forks(self): + ios = set(self.io_nodes) + for n in list(self.forks.values()): + if n in ios: continue + d = None + if (l := n.ins[0]) is not None: + d = l.driver + l.remove() + for l in list(n.outs): + if l is None: continue + r, rp = l.reader, l.reader_pin + l.remove() + if d is not None: Line(self, d, (r, rp)) + n.remove() + def substitute(self, node, impl): """Replaces a given node with the given implementation circuit. @@ -428,7 +464,7 @@ class Circuit: for l, ll in zip(impl_out_lines, node_out_lines): # connect outputs if ll is None: if l.driver in node_map: - self.remove_dangling_nodes(node_map[l.driver]) + self.remove_dangling_nodes(node_map[l.driver], keep=ios) continue if len(l.reader.outs) > 0: # output is also read by impl. circuit, connect to fork. ll.driver = node_map[l.reader] @@ -447,6 +483,21 @@ class Circuit: if n.kind in tlib.cells: self.substitute(n, tlib.cells[n.kind][0]) + def remove_constants(self): + c1gen = None + for n in self.nodes: + if n.kind == '__const0__': # just remove, unconnected inputs are defined 0. + for l in n.outs: + l.remove() + n.remove() + elif n.kind == '__const1__': + if c1gen is None: c1gen = Node(self, '__const1gen__', 'INV1') # one unique const 1 generator + for l in n.outs: + r, rp = l.reader, l.reader_pin + l.remove() + Line(self, c1gen, (r, rp)) + n.remove() + def copy(self): """Returns a deep copy of the circuit. """ @@ -501,14 +552,15 @@ class Circuit: substrings 'dff' or 'latch' are yielded first. """ visit_count = np.zeros(len(self.nodes), dtype=np.uint32) - queue = deque(n for n in self.nodes if len(n.ins) == 0 or 'dff' in n.kind.lower() or 'latch' in n.kind.lower()) + start = set(n for n in self.nodes if len(n.ins) == 0 or 'dff' in n.kind.lower() or 'latch' in n.kind.lower()) + queue = deque(start) while len(queue) > 0: n = queue.popleft() for line in n.outs: if line is None: continue succ = line.reader visit_count[succ] += 1 - if visit_count[succ] == len(succ.ins) and 'dff' not in succ.kind.lower() and 'latch' not in succ.kind.lower(): + if visit_count[succ] == len(succ.ins) and succ not in start: queue.append(succ) yield n @@ -563,6 +615,21 @@ class Circuit: if marks[n]: yield n + def fanout(self, origin_nodes): + """Generator function to iterate over the fan-out cone of a given list of origin nodes. + + Nodes are yielded in topological order. + """ + marks = [False] * len(self.nodes) + for n in origin_nodes: + marks[n] = True + for n in self.topological_order(): + if not marks[n]: + for line in n.ins.without_nones(): + marks[n] |= marks[line.driver] + if marks[n]: + yield n + def fanout_free_regions(self): for stem in self.reversed_topological_order(): if len(stem.outs) == 1 and 'dff' not in stem.kind.lower(): continue diff --git a/src/kyupy/logic.py b/src/kyupy/logic.py index ff49344..3e97f8e 100644 --- a/src/kyupy/logic.py +++ b/src/kyupy/logic.py @@ -241,6 +241,8 @@ def mv_latch(d, t, q_prev, out=None): def mv_transition(init, final, out=None): """Computes the logic transitions from the initial values of ``init`` to the final values of ``final``. Pulses in the input data are ignored. If any of the inputs are ``UNKNOWN``, the result is ``UNKNOWN``. + If init is ``UNASSIGNED``, the result is the final value of ``final``. + If final is ``UNASSIGNED``, the result is the initial value of ``init``. If both inputs are ``UNASSIGNED``, the result is ``UNASSIGNED``. :param init: A multi-valued array. @@ -251,7 +253,9 @@ def mv_transition(init, final, out=None): out = out or np.empty(np.broadcast(init, final).shape, dtype=np.uint8) out[...] = (init & 0b010) | (final & 0b001) out[...] |= ((out << 1) ^ (out << 2)) & 0b100 - unknown = (init == UNKNOWN) | (init == UNASSIGNED) | (final == UNKNOWN) | (final == UNASSIGNED) + out[...] = np.choose(init == UNASSIGNED, [out, (final & 0b001) * ONE]) + out[...] = np.choose(final == UNASSIGNED, [out, ((init & 0b010) >> 1) * ONE]) + unknown = (init == UNKNOWN) | (final == UNKNOWN) unassigned = (init == UNASSIGNED) & (final == UNASSIGNED) np.putmask(out, unknown, UNKNOWN) np.putmask(out, unassigned, UNASSIGNED) @@ -265,6 +269,18 @@ def mv_to_bp(mva): return np.packbits(unpackbits(mva)[...,:3], axis=-2, bitorder='little').swapaxes(-1,-2) +def mv_init(mva): + """Returns the initial binary values for mva. + """ + return (mva>>1) & ((mva>>2)|mva) & 1 + + +def mv_final(mva): + """Returns the final binary value of mva. + """ + return mva & ((mva>>2)|(mva>>1)) & 1 + + def bparray(*a): """Converts (lists of) Boolean values or strings into a bit-parallel array. diff --git a/src/kyupy/logic_sim.py b/src/kyupy/logic_sim.py index 81ef44b..3ac5233 100644 --- a/src/kyupy/logic_sim.py +++ b/src/kyupy/logic_sim.py @@ -10,9 +10,10 @@ import math import numpy as np -from . import numba, logic, hr_bytes, sim +from . import numba, logic, hr_bytes, sim, eng, cdiv from .circuit import Circuit + class LogicSim(sim.SimOps): """A bit-parallel naïve combinational simulator for 2-, 4-, or 8-valued logic. @@ -28,7 +29,7 @@ class LogicSim(sim.SimOps): self.m = m self.mdim = math.ceil(math.log2(m)) self.sims = sims - nbytes = (sims - 1) // 8 + 1 + nbytes = cdiv(sims, 8) self.c = np.zeros((self.c_len, self.mdim, nbytes), dtype=np.uint8) self.s = np.zeros((2, self.s_len, 3, nbytes), dtype=np.uint8) @@ -44,14 +45,14 @@ class LogicSim(sim.SimOps): self.s[:,:,1,:] = 255 # unassigned def __repr__(self): - return f'{{name: "{self.circuit.name}", sims: {self.sims}, m: {self.m}, c_bytes: {self.c.nbytes}}}' + return f'{{name: "{self.circuit.name}", sims: {self.sims}, m: {self.m}, c_bytes: {eng(self.c.nbytes)}}}' def s_to_c(self): """Copies the values from ``s[0]`` the inputs of the combinational portion. """ self.c[self.pippi_c_locs] = self.s[0, self.pippi_s_locs, :self.mdim] - def c_prop(self, inject_cb=None): + def c_prop(self, sims=None, inject_cb=None, fault_line=-1, fault_mask=None, fault_model=2): """Propagate the input values through the combinational circuit towards the outputs. Performs all logic operations in topological order. @@ -67,10 +68,17 @@ class LogicSim(sim.SimOps): t1 = self.c_locs[self.tmp2_idx] if self.m == 2: if inject_cb is None: - _prop_cpu(self.ops, self.c_locs, self.c) + if fault_mask is None: + fault_mask = np.full(self.c.shape[-1], 255, dtype=np.uint8) + else: + if len(fault_mask) < self.c.shape[-1]: + fault_mask2 = np.full(self.c.shape[-1], 0, dtype=np.uint8) + fault_mask2[:len(fault_mask)] = fault_mask + fault_mask = fault_mask2 + _prop_cpu(self.ops, self.c_locs, self.c, int(fault_line), fault_mask, int(fault_model)) else: - for op, o0, i0, i1, i2, i3 in self.ops[:,:6]: - o0, i0, i1, i2, i3 = [self.c_locs[x] for x in (o0, i0, i1, i2, i3)] + for op, o0l, i0l, i1l, i2l, i3l in self.ops[:,:6]: + o0, i0, i1, i2, i3 = [self.c_locs[x] for x in (o0l, i0l, i1l, i2l, i3l)] if op == sim.BUF1: self.c[o0]=self.c[i0] elif op == sim.INV1: self.c[o0] = ~self.c[i0] elif op == sim.AND2: self.c[o0] = self.c[i0] & self.c[i1] @@ -105,10 +113,10 @@ class LogicSim(sim.SimOps): elif op == sim.OAI211:self.c[o0] = ~((self.c[i0] | self.c[i1]) & self.c[i2] & self.c[i3]) elif op == sim.MUX21: self.c[o0] = (self.c[i0] & ~self.c[i2]) | (self.c[i1] & self.c[i2]) else: print(f'unknown op {op}') - inject_cb(o0, self.s[o0]) + inject_cb(o0l, self.c[o0]) elif self.m == 4: - for op, o0, i0, i1, i2, i3 in self.ops[:,:6]: - o0, i0, i1, i2, i3 = [self.c_locs[x] for x in (o0, i0, i1, i2, i3)] + for op, o0l, i0l, i1l, i2l, i3l in self.ops[:,:6]: + o0, i0, i1, i2, i3 = [self.c_locs[x] for x in (o0l, i0l, i1l, i2l, i3l)] if op == sim.BUF1: self.c[o0]=self.c[i0] elif op == sim.INV1: logic.bp4v_not(self.c[o0], self.c[i0]) elif op == sim.AND2: logic.bp4v_and(self.c[o0], self.c[i0], self.c[i1]) @@ -181,9 +189,10 @@ class LogicSim(sim.SimOps): logic.bp4v_and(self.c[t1], self.c[i1], self.c[i2]) logic.bp4v_or(self.c[o0], self.c[t0], self.c[t1]) else: print(f'unknown op {op}') + if inject_cb is not None: inject_cb(o0l, self.c[o0]) else: - for op, o0, i0, i1, i2, i3 in self.ops[:,:6]: - o0, i0, i1, i2, i3 = [self.c_locs[x] for x in (o0, i0, i1, i2, i3)] + for op, o0l, i0l, i1l, i2l, i3l in self.ops[:,:6]: + o0, i0, i1, i2, i3 = [self.c_locs[x] for x in (o0l, i0l, i1l, i2l, i3l)] if op == sim.BUF1: self.c[o0]=self.c[i0] elif op == sim.INV1: logic.bp8v_not(self.c[o0], self.c[i0]) elif op == sim.AND2: logic.bp8v_and(self.c[o0], self.c[i0], self.c[i1]) @@ -256,7 +265,7 @@ class LogicSim(sim.SimOps): logic.bp8v_and(self.c[t1], self.c[i1], self.c[i2]) logic.bp8v_or(self.c[o0], self.c[t0], self.c[t1]) else: print(f'unknown op {op}') - if inject_cb is not None: inject_cb(o0, self.s[o0]) + if inject_cb is not None: inject_cb(o0l, self.c[o0]) def c_to_s(self): """Copies (captures) the results of the combinational portion to ``s[1]``. @@ -296,9 +305,9 @@ class LogicSim(sim.SimOps): @numba.njit -def _prop_cpu(ops, c_locs, c): - for op, o0, i0, i1, i2, i3 in ops[:,:6]: - o0, i0, i1, i2, i3 = [c_locs[x] for x in (o0, i0, i1, i2, i3)] +def _prop_cpu(ops, c_locs, c, fault_line, fault_mask, fault_model): + for op, o0l, i0l, i1l, i2l, i3l in ops[:,:6]: + o0, i0, i1, i2, i3 = [c_locs[x] for x in (o0l, i0l, i1l, i2l, i3l)] if op == sim.BUF1: c[o0]=c[i0] elif op == sim.INV1: c[o0] = ~c[i0] elif op == sim.AND2: c[o0] = c[i0] & c[i1] @@ -333,3 +342,129 @@ def _prop_cpu(ops, c_locs, c): elif op == sim.OAI211: c[o0] = ~((c[i0] | c[i1]) & c[i2] & c[i3]) elif op == sim.MUX21: c[o0] = (c[i0] & ~c[i2]) | (c[i1] & c[i2]) else: print(f'unknown op {op}') + if fault_line >= 0 and o0l == fault_line: + #n = len(fault_mask) + if fault_model == 0: + c[o0] = c[o0] & ~fault_mask + elif fault_model == 1: + c[o0] = c[o0] | fault_mask + else: + c[o0] = c[o0] ^ fault_mask + + +class LogicSim6V(sim.SimOps): + """A bit-parallel naïve combinational simulator for 6-valued logic. + + :param circuit: The circuit to simulate. + :param sims: The number of parallel logic simulations to perform. + :param c_reuse: If True, intermediate signal values may get overwritten when not needed anymore to save memory. + :param strip_forks: If True, forks are not included in the simulation model to save memory and simulation time. + """ + def __init__(self, circuit: Circuit, sims: int = 8, c_reuse: bool = False, strip_forks: bool = False): + super().__init__(circuit, c_reuse=c_reuse, strip_forks=strip_forks) + self.sims = sims + nbytes = cdiv(sims, 8) + + self.c = np.zeros((self.c_len, 3, nbytes), dtype=np.uint8) + self.s = np.zeros((2, self.s_len, self.sims), dtype=np.uint8) + """Logic values of the sequential elements (flip-flops) and ports. + + It is a pair of arrays in mv storage format: + + * ``s[0]`` Assigned values. Simulator will read (P)PI value from here. + * ``s[1]`` Result values. Simulator will write (P)PO values here. + + Access this array to assign new values to the (P)PIs or read values from the (P)POs. + """ + + def __repr__(self): + return f'{{name: "{self.circuit.name}", sims: {self.sims}, c_bytes: {eng(self.c.nbytes)}}}' + + def s_to_c(self): + """Assigns the values from ``s[0]`` to the inputs of the combinational portion. + """ + self.c[self.pippi_c_locs] = logic.mv_to_bp(self.s[0, self.pippi_s_locs]) + + def c_prop(self): + c_prop_cpu(self.ops, self.c, self.c_locs, self.tmp_idx, self.tmp2_idx) + + def c_to_s(self): + """Captures the results of the combinational portion into ``s[1]``. + """ + self.s[1, self.poppo_s_locs] = logic.bp_to_mv(self.c[self.poppo_c_locs])[:,:self.sims] + + +@numba.njit +def c_prop_cpu(ops, c, c_locs, tmp_idx, tmp2_idx): + t0 = c[c_locs[tmp_idx]] + t1 = c[c_locs[tmp2_idx]] + inv_op = np.array([255, 255, 0], dtype=np.uint8)[np.newaxis, :, np.newaxis] + for op, o0l, i0l, i1l, i2l, i3l in ops[:,:6]: + o0, i0, i1, i2, i3 = [c[c_locs[x]] for x in (o0l, i0l, i1l, i2l, i3l)] + if op == sim.BUF1 or op == sim.INV1: + o0[...] = i0 + elif op == sim.AND2 or op == sim.NAND2: + o0[0] = i0[0] & i1[0] + o0[1] = i0[1] & i1[1] + o0[2] = (i0[2]&(i1[0]|i1[1]|i1[2])| + i1[2]&(i0[0]|i0[1]|i0[2])) + elif op == sim.AND3 or op == sim.NAND3: + o0[0] = i0[0] & i1[0] & i2[0] + o0[1] = i0[1] & i1[1] & i2[1] + o0[2] = (i0[2]&(i1[0]|i1[1]|i1[2])&(i2[0]|i2[1]|i2[2])| + i1[2]&(i0[0]|i0[1]|i0[2])&(i2[0]|i2[1]|i2[2])| + i2[2]&(i0[0]|i0[1]|i0[2])&(i1[0]|i1[1]|i1[2])) + elif op == sim.AND4 or op == sim.NAND4: + o0[0] = i0[0] & i1[0] & i2[0] & i3[0] + o0[1] = i0[1] & i1[1] & i2[1] & i3[1] + o0[2] = (i0[2]&(i1[0]|i1[1]|i1[2])&(i2[0]|i2[1]|i2[2])&(i3[0]|i3[1]|i3[2])| + i1[2]&(i0[0]|i0[1]|i0[2])&(i2[0]|i2[1]|i2[2])&(i3[0]|i3[1]|i3[2])| + i2[2]&(i0[0]|i0[1]|i0[2])&(i1[0]|i1[1]|i1[2])&(i3[0]|i3[1]|i3[2])| + i3[2]&(i0[0]|i0[1]|i0[2])&(i1[0]|i1[1]|i1[2])&(i2[0]|i2[1]|i2[2])) + elif op == sim.OR2 or op == sim.NOR2: + o0[0] = i0[0] | i1[0] + o0[1] = i0[1] | i1[1] + o0[2] = (i0[2]&(~i1[0]|~i1[1]|i1[2])| + i1[2]&(~i0[0]|~i0[1]|i0[2])) + elif op == sim.OR3 or op == sim.NOR3: + o0[0] = i0[0] | i1[0] | i2[0] + o0[1] = i0[1] | i1[1] | i2[1] + o0[2] = (i0[2]&(~i1[0]|~i1[1]|i1[2])&(~i2[0]|~i2[1]|i2[2])| + i1[2]&(~i0[0]|~i0[1]|i0[2])&(~i2[0]|~i2[1]|i2[2])| + i2[2]&(~i0[0]|~i0[1]|i0[2])&(~i1[0]|~i1[1]|i1[2])) + elif op == sim.OR4 or op == sim.NOR4: + o0[0] = i0[0] | i1[0] | i2[0] | i3[0] + o0[1] = i0[1] | i1[1] | i2[1] | i3[1] + o0[2] = (i0[2]&(~i1[0]|~i1[1]|i1[2])&(~i2[0]|~i2[1]|i2[2])&(~i3[0]|~i3[1]|i3[2])| + i1[2]&(~i0[0]|~i0[1]|i0[2])&(~i2[0]|~i2[1]|i2[2])&(~i3[0]|~i3[1]|i3[2])| + i2[2]&(~i0[0]|~i0[1]|i0[2])&(~i1[0]|~i1[1]|i1[2])&(~i3[0]|~i3[1]|i3[2])| + i3[2]&(~i0[0]|~i0[1]|i0[2])&(~i1[0]|~i1[1]|i1[2])&(~i2[0]|~i2[1]|i2[2])) + elif op == sim.XOR2 or op == sim.XNOR2: + o0[0] = i0[0] ^ i1[0] + o0[1] = i0[1] ^ i1[1] + o0[2] = i0[2] | i1[2] + elif op == sim.MUX21: + # t1 = ~i2 + t1[...] = i2 ^ inv_op + # t0 = i0 & t1 + t0[0] = i0[0] & t1[0] + t0[1] = i0[1] & t1[1] + t0[2] = (i0[2]&(t1[0]|t1[1]|t1[2])| + t1[2]&(i0[0]|i0[1]|i0[2])) + # t1 = i1 & i2 + t1[0] = i1[0] & i2[0] + t1[1] = i1[1] & i2[1] + t1[2] = (i1[2]&(i2[0]|i2[1]|i2[2])| + i2[2]&(i1[0]|i1[1]|i1[2])) + # o0 = t0 | t1 + o0[0] = t0[0] | t1[0] + o0[1] = t0[1] | t1[1] + o0[2] = (t0[2]&(~t1[0]|~t1[1]|t1[2])| + t1[2]&(~t0[0]|~t0[1]|t0[2])) + else: print(f'unknown op {op}') + + if (op == sim.INV1 or + op == sim.NAND2 or op == sim.NAND3 or op == sim.NAND4 or + op == sim.NOR2 or op == sim.NOR3 or op == sim.NOR4 or + op == sim.XNOR2): + o0[...] = o0 ^ inv_op diff --git a/src/kyupy/sdf.py b/src/kyupy/sdf.py index 015f975..5314efb 100644 --- a/src/kyupy/sdf.py +++ b/src/kyupy/sdf.py @@ -61,20 +61,21 @@ class DelayFile: delays = np.zeros((len(circuit.lines), 2, 2, 3)) # dataset last during construction. - for name, iopaths in self.cells.items(): - name = name.replace('\\', '') - if cell := circuit.cells.get(name, None): - for i_pin_spec, o_pin_spec, *dels in iopaths: - if i_pin_spec.startswith('(posedge '): i_pol_idxs = [0] - elif i_pin_spec.startswith('(negedge '): i_pol_idxs = [1] - else: i_pol_idxs = [0, 1] - i_pin_spec = re.sub(r'\((neg|pos)edge ([^)]+)\)', r'\2', i_pin_spec) - if line := cell.ins[tlib.pin_index(cell.kind, i_pin_spec)]: - delays[line, i_pol_idxs] = [d if len(d) > 0 else [0, 0, 0] for d in dels] - else: - log.warn(f'No line to annotate in circuit: {i_pin_spec} for {cell}') - else: - log.warn(f'Name from SDF not found in circuit: {name}') + with log.limit(50): + for name, iopaths in self.cells.items(): + name = name.replace('\\', '') + if cell := circuit.cells.get(name, None): + for i_pin_spec, o_pin_spec, *dels in iopaths: + if i_pin_spec.startswith('(posedge '): i_pol_idxs = [0] + elif i_pin_spec.startswith('(negedge '): i_pol_idxs = [1] + else: i_pol_idxs = [0, 1] + i_pin_spec = re.sub(r'\((neg|pos)edge ([^)]+)\)', r'\2', i_pin_spec) + if line := cell.ins[tlib.pin_index(cell.kind, i_pin_spec)]: + delays[line, i_pol_idxs] = [d if len(d) > 0 else [0, 0, 0] for d in dels] + else: + log.warn(f'No line to annotate in circuit: {i_pin_spec} for {cell}') + else: + log.warn(f'Name from SDF not found in circuit: {name}') return np.moveaxis(delays, -1, 0) @@ -102,11 +103,12 @@ class DelayFile: delays = np.zeros((len(circuit.lines), 2, 2, 3)) # dataset last during construction. + nonfork_annotations = 0 for n1, n2, *delvals in self._interconnects: delvals = [d if len(d) > 0 else [0, 0, 0] for d in delvals] if max(max(delvals)) == 0: continue - cn1, pn1 = n1.split('/') if '/' in n1 else (n1, None) - cn2, pn2 = n2.split('/') if '/' in n2 else (n2, None) + cn1, pn1 = (n1, None) if (slash := n1.rfind('/')) < 0 else (n1[:slash], n1[slash+1:]) + cn2, pn2 = (n2, None) if (slash := n2.rfind('/')) < 0 else (n2[:slash], n2[slash+1:]) cn1 = cn1.replace('\\','') cn2 = cn2.replace('\\','') c1, c2 = circuit.cells[cn1], circuit.cells[cn2] @@ -119,19 +121,27 @@ class DelayFile: log.warn(f'No line to annotate pin {pn2} of {c2}') continue f1, f2 = c1.outs[p1].reader, c2.ins[p2].driver # find the forks between cells. - assert f1.kind == '__fork__' - assert f2.kind == '__fork__' - if f1 != f2: # at least two forks, make sure f2 is a branchfork connected to f1 - assert len(f2.outs) == 1 - assert f1.outs[f2.ins[0].driver_pin] == f2.ins[0] - line = f2.ins[0] - elif len(f2.outs) == 1: # f1==f2, only OK when there is no fanout. - line = f2.ins[0] + if f1 == c2 and f2 == c1: + nonfork_annotations += 1 + if nonfork_annotations < 10: + log.warn(f'No fork between {c1.name}/{p1} and {c2.name}/{p2}, using {c2.name}/{p2}') + line = c2.ins[p2] else: - log.warn(f'No branchfork to annotate interconnect delay {c1.name}/{p1}->{c2.name}/{p2}') - continue + assert f1.kind == '__fork__' + assert f2.kind == '__fork__' + if len(f2.outs) == 1: + assert f1 == f2 or f1.outs[f2.ins[0].driver_pin] == f2.ins[0] + line = f2.ins[0] + else: + nonfork_annotations += 1 + if nonfork_annotations < 10: + log.warn(f'No branchfork between {c1.name}/{p1} and {c2.name}/{p2}, using {c2.name}/{p2}') + line = c2.ins[p2] delays[line, :] = delvals + if nonfork_annotations > 0: + log.warn(f'{nonfork_annotations} interconnect annotations were moved to gate inputs due to missing forks.') + return np.moveaxis(delays, -1, 0) @@ -156,6 +166,10 @@ class SdfTransformer(Transformer): entries = [e for a in args if hasattr(a, 'children') for e in a.children] return name, entries + @staticmethod + def cond(args): # ignore conditions + return args[1] + @staticmethod def start(args): name = next((a for a in args if isinstance(a, str)), None) @@ -180,9 +194,12 @@ GRAMMAR = r""" | "(INSTANCE" ID? ")" | "(TIMINGCHECK" _ignore* ")" | delay )* ")" - delay: "(DELAY" "(ABSOLUTE" (interconnect | iopath)* ")" ")" + delay: "(DELAY" "(ABSOLUTE" (interconnect | iopath | cond)* ")" ")" interconnect: "(INTERCONNECT" ID ID triple* ")" iopath: "(IOPATH" ID_OR_EDGE ID_OR_EDGE triple* ")" + cond: "(" "COND" cond_port_expr iopath ")" + ?cond_port_expr: ID | "(" cond_port_expr ")" | cond_port_expr BINARY_OP cond_port_expr + BINARY_OP: /&&/ | /==/ NAME: /[^"]+/ ID_OR_EDGE: ( /[^() ]+/ | "(" /[^)]+/ ")" ) ID: ( /[^"() ]+/ | "\"" /[^"]+/ "\"" ) diff --git a/src/kyupy/sim.py b/src/kyupy/sim.py index de21b27..32b7459 100644 --- a/src/kyupy/sim.py +++ b/src/kyupy/sim.py @@ -4,9 +4,14 @@ from bisect import bisect, insort_left import numpy as np +from .circuit import Circuit + BUF1 = np.uint16(0b1010_1010_1010_1010) INV1 = ~BUF1 +__const0__ = BUF1 +__const1__ = INV1 + AND2 = np.uint16(0b1000_1000_1000_1000) AND3 = np.uint16(0b1000_0000_1000_0000) AND4 = np.uint16(0b1000_0000_0000_0000) @@ -39,7 +44,10 @@ AOI211, OAI211 = ~AO211, ~OA211 MUX21 = np.uint16(0b1100_1010_1100_1010) # z = i1 if i2 else i0 (i2 is select) -names = dict([(v, k) for k, v in globals().items() if isinstance(v, np.uint16)]) +names = dict([(v, k) for k, v in globals().items() if isinstance(v, np.uint16) and '__' not in k]) + +prim2name = dict([(v, k) for k, v in globals().items() if isinstance(v, np.uint16) and '__' not in k]) +name2prim = dict([(k, v) for k, v in globals().items() if isinstance(v, np.uint16)]) kind_prefixes = { 'nand': (NAND4, NAND3, NAND2), @@ -156,7 +164,7 @@ class SimOps: :param c_reuse: If enabled, memory of intermediate signal waveforms will be re-used. This greatly reduces memory footprint, but intermediate signal waveforms become unaccessible after a propagation. """ - def __init__(self, circuit, c_caps=1, c_caps_min=1, a_ctrl=None, c_reuse=False, strip_forks=False): + def __init__(self, circuit: Circuit, c_caps=1, c_caps_min=1, a_ctrl=None, c_reuse=False, strip_forks=False): self.circuit = circuit self.s_len = len(circuit.s_nodes) @@ -175,84 +183,74 @@ class SimOps: self.ppo_offset = self.ppi_offset + self.s_len self.c_locs_len = self.ppo_offset + self.s_len - # translate circuit structure into self.ops - ops = [] - interface_dict = dict((n, i) for i, n in enumerate(circuit.s_nodes)) - for n in circuit.topological_order(): - if n in interface_dict: - inp_idx = self.ppi_offset + interface_dict[n] - if len(n.outs) > 0 and n.outs[0] is not None: # first output of a PI/PPI - ops.append((BUF1, n.outs[0].index, inp_idx, self.zero_idx, self.zero_idx, self.zero_idx, *a_ctrl[n.outs[0]])) - if 'dff' in n.kind.lower(): # second output of DFF is inverted - if len(n.outs) > 1 and n.outs[1] is not None: - ops.append((INV1, n.outs[1].index, inp_idx, self.zero_idx, self.zero_idx, self.zero_idx, *a_ctrl[n.outs[1]])) - else: # if not DFF, no output is inverted. - for o_line in n.outs[1:]: - if o_line is not None: - ops.append((BUF1, o_line.index, inp_idx, self.zero_idx, self.zero_idx, self.zero_idx, *a_ctrl[o_line])) - continue - # regular node, not PI/PPI or PO/PPO - o0_idx = n.outs[0].index if len(n.outs) > 0 and n.outs[0] is not None else self.tmp_idx - i0_idx = n.ins[0].index if len(n.ins) > 0 and n.ins[0] is not None else self.zero_idx - i1_idx = n.ins[1].index if len(n.ins) > 1 and n.ins[1] is not None else self.zero_idx - i2_idx = n.ins[2].index if len(n.ins) > 2 and n.ins[2] is not None else self.zero_idx - i3_idx = n.ins[3].index if len(n.ins) > 3 and n.ins[3] is not None else self.zero_idx - kind = n.kind.lower() - if kind == '__fork__': - if not strip_forks: - for o_line in n.outs: - if o_line is not None: - ops.append((BUF1, o_line.index, i0_idx, i1_idx, i2_idx, i3_idx, *a_ctrl[o_line])) - continue - sp = None - for prefix, prims in kind_prefixes.items(): - if kind.startswith(prefix): - sp = prims[0] - if i3_idx == self.zero_idx: - sp = prims[1] - if i2_idx == self.zero_idx: - sp = prims[2] - break - if sp is None: - print('unknown cell type', kind) - else: - ops.append((sp, o0_idx, i0_idx, i1_idx, i2_idx, i3_idx, *a_ctrl[o0_idx])) - - self.ops = np.asarray(ops, dtype='int32') + # ALAP-toposort the circuit into self.ops + levels = [] + + ppio2idx = dict((n, i) for i, n in enumerate(circuit.s_nodes)) + ppos = set([n for n in circuit.s_nodes if len(n.ins) > 0]) + readers = np.array([1 if l.reader in ppos else len(l.reader.outs) for l in circuit.lines], dtype=np.int32) # for ref-counting forks + + level_lines = [n.ins[0] for n in ppos] # start from PPOs + # FIXME: Should probably instanciate buffers for PPOs and attach DFF clocks + + while len(level_lines) > 0: # traverse the circuit level-wise back towards (P)PIs + level_ops = [] + prev_level_lines = [] + + for l in level_lines: + n = l.driver + in_idxs = [n.ins[x].index if len(n.ins) > x and n.ins[x] is not None else self.zero_idx for x in [0,1,2,3]] + if n in ppio2idx: + in_idxs[0] = self.ppi_offset + ppio2idx[n] + if l.driver_pin == 1 and 'dff' in n.kind.lower(): # second output of DFF is inverted + level_ops.append((INV1, l.index, *in_idxs, *a_ctrl[l])) + else: + level_ops.append((BUF1, l.index, *in_idxs, *a_ctrl[l])) + elif n.kind == '__fork__': + readers[n.ins[0]] -= 1 + if readers[n.ins[0]] == 0: prev_level_lines.append(n.ins[0]) + if not strip_forks: level_ops.append((BUF1, l.index, *in_idxs, *a_ctrl[l])) + else: + prev_level_lines += n.ins + sp = None + kind = n.kind.lower() + for prefix, prims in kind_prefixes.items(): + if kind.startswith(prefix): + sp = prims[0] + if in_idxs[3] == self.zero_idx: + sp = prims[1] + if in_idxs[2] == self.zero_idx: + sp = prims[2] + break + if sp is None: + print('unknown cell type', kind) + else: + level_ops.append((sp, l.index, *in_idxs, *a_ctrl[l])) + + if len(level_ops) > 0: levels.append(level_ops) + level_lines = prev_level_lines + + self.levels = [np.asarray(lv, dtype=np.int32) for lv in levels[::-1]] + level_sums = np.cumsum([0]+[len(lv) for lv in self.levels], dtype=np.int32) + self.level_starts, self.level_stops = level_sums[:-1], level_sums[1:] + self.ops = np.vstack(self.levels) # create a map from fanout lines to stem lines for fork stripping - stems = np.zeros(self.c_locs_len, dtype='int32') - 1 # default to -1: 'no fanout line' + stems = np.full(self.c_locs_len, -1, dtype=np.int32) # default to -1: 'no fanout line' if strip_forks: for f in circuit.forks.values(): prev_line = f.ins[0] while prev_line.driver.kind == '__fork__': prev_line = prev_line.driver.ins[0] - stem_idx = prev_line.index for ol in f.outs: if ol is not None: - stems[ol] = stem_idx - - # calculate level (distance from PI/PPI) and reference count for each line - levels = np.zeros(self.c_locs_len, dtype='int32') - ref_count = np.zeros(self.c_locs_len, dtype='int32') - level_starts = [0] - current_level = 1 - for i, op in enumerate(self.ops): - # if we fork-strip, always take the stems for determining fan-in level - i0_idx = stems[op[2]] if stems[op[2]] >= 0 else op[2] - i1_idx = stems[op[3]] if stems[op[3]] >= 0 else op[3] - i2_idx = stems[op[4]] if stems[op[4]] >= 0 else op[4] - i3_idx = stems[op[5]] if stems[op[5]] >= 0 else op[5] - if levels[i0_idx] >= current_level or levels[i1_idx] >= current_level or levels[i2_idx] >= current_level or levels[i3_idx] >= current_level: - current_level += 1 - level_starts.append(i) - levels[op[1]] = current_level # set level of the output line - ref_count[i0_idx] += 1 - ref_count[i1_idx] += 1 - ref_count[i2_idx] += 1 - ref_count[i3_idx] += 1 - self.level_starts = np.asarray(level_starts, dtype='int32') - self.level_stops = np.asarray(level_starts[1:] + [len(self.ops)], dtype='int32') + stems[ol] = prev_line.index + + ref_count = np.zeros(self.c_locs_len, dtype=np.int32) + + for op in self.ops: + for x in [2, 3, 4, 5]: + ref_count[stems[op[x]] if stems[op[x]] >= 0 else op[x]] += 1 # combinational signal allocation table. maps line and interface indices to self.c memory locations self.c_locs = np.full((self.c_locs_len,), -1, dtype=np.int32) @@ -278,9 +276,9 @@ class SimOps: ref_count[i0_idx] += 1 # allocate memory for the rest of the circuit - for op_start, op_stop in zip(self.level_starts, self.level_stops): + for ops in self.levels: free_set = set() - for op in self.ops[op_start:op_stop]: + for op in ops: # if we fork-strip, always take the stems i0_idx = stems[op[2]] if stems[op[2]] >= 0 else op[2] i1_idx = stems[op[3]] if stems[op[3]] >= 0 else op[3] @@ -299,7 +297,8 @@ class SimOps: self.c_locs[o_idx], self.c_caps[o_idx] = h.alloc(cap), cap if c_reuse: for loc in free_set: - h.free(loc) + if loc >= 0: # DFF clocks are not allocated. Ignore for now. + h.free(loc) # copy memory location and capacity from stems to fanout lines for lidx, stem in enumerate(stems): @@ -311,6 +310,15 @@ class SimOps: if len(n.ins) > 0: self.c_locs[self.ppo_offset + i], self.c_caps[self.ppo_offset + i] = self.c_locs[n.ins[0]], self.c_caps[n.ins[0]] + # line use information + self.line_use_start = np.full(self.c_locs_len, -1, dtype=np.int32) + self.line_use_stop = np.full(self.c_locs_len, len(self.levels), dtype=np.int32) + for i, lv in enumerate(self.levels): + for op in lv: + self.line_use_start[op[1]] = i + for x in [2, 3, 4, 5]: + self.line_use_stop[op[x]] = i + self.c_len = h.max_size d = defaultdict(int) diff --git a/src/kyupy/stil.py b/src/kyupy/stil.py index 98cc2df..0b217ee 100644 --- a/src/kyupy/stil.py +++ b/src/kyupy/stil.py @@ -41,7 +41,7 @@ class StilFile: unload = {} for so_port in self.so_ports: if so_port in call.parameters: - unload[so_port] = call.parameters[so_port].replace('\n', '').replace('N', '-') + unload[so_port] = call.parameters[so_port] if len(capture) > 0: self.patterns.append(ScanPattern(sload, launch, capture, unload)) capture = {} @@ -49,11 +49,9 @@ class StilFile: sload = {} for si_port in self.si_ports: if si_port in call.parameters: - sload[si_port] = call.parameters[si_port].replace('\n', '').replace('N', '-') - if call.name.endswith('_launch'): - launch = dict((k, v.replace('\n', '').replace('N', '-')) for k, v in call.parameters.items()) - if call.name.endswith('_capture'): - capture = dict((k, v.replace('\n', '').replace('N', '-')) for k, v in call.parameters.items()) + sload[si_port] = call.parameters[si_port] + if call.name.endswith('_launch'): launch = call.parameters + if call.name.endswith('_capture'): capture = call.parameters def _maps(self, c): interface = list(c.io_nodes) + [n for n in c.nodes if 'DFF' in n.kind] @@ -100,12 +98,12 @@ class StilFile: tests = np.full((len(interface), len(self.patterns)), logic.UNASSIGNED) for i, p in enumerate(self.patterns): for si_port in self.si_ports.keys(): - pattern = logic.mvarray(p.load[si_port]) + pattern = logic.mvarray(p.load[si_port][0]) inversions = np.choose((pattern == logic.UNASSIGNED) | (pattern == logic.UNKNOWN), [scan_inversions[si_port], logic.ZERO]).astype(np.uint8) np.bitwise_xor(pattern, inversions, out=pattern) tests[scan_maps[si_port], i] = pattern - tests[pi_map, i] = logic.mvarray(p.capture['_pi']) + tests[pi_map, i] = logic.mvarray(p.capture['_pi'][0]) return tests def tests_loc(self, circuit, init_filter=None, launch_filter=None): @@ -134,12 +132,12 @@ class StilFile: for i, p in enumerate(self.patterns): # init.set_values(i, '0' * len(interface)) for si_port in self.si_ports.keys(): - pattern = logic.mvarray(p.load[si_port]) + pattern = logic.mvarray(p.load[si_port][0]) inversions = np.choose((pattern == logic.UNASSIGNED) | (pattern == logic.UNKNOWN), [scan_inversions[si_port], logic.ZERO]).astype(np.uint8) np.bitwise_xor(pattern, inversions, out=pattern) init[scan_maps[si_port], i] = pattern - init[pi_map, i] = logic.mvarray(p.launch['_pi'] if '_pi' in p.launch else p.capture['_pi']) + init[pi_map, i] = logic.mvarray(p.launch['_pi'][0] if '_pi' in p.launch else p.capture['_pi'][0]) if init_filter: init = init_filter(init) sim8v = LogicSim(circuit, init.shape[-1], m=8) sim8v.s[0] = logic.mv_to_bp(init) @@ -149,12 +147,12 @@ class StilFile: launch = logic.bp_to_mv(sim8v.s[1])[..., :init.shape[-1]] for i, p in enumerate(self.patterns): # if there was no launch cycle or launch clock, then init = launch - if '_pi' not in p.launch or 'P' not in p.launch['_pi'] or 'P' not in p.capture['_pi']: + if '_pi' not in p.launch or 'P' not in p.launch['_pi'][0] or 'P' not in p.capture['_pi'][0]: for si_port in self.si_ports.keys(): - pattern = logic.mv_xor(logic.mvarray(p.load[si_port]), scan_inversions[si_port]) + pattern = logic.mv_xor(logic.mvarray(p.load[si_port][0]), scan_inversions[si_port]) launch[scan_maps[si_port], i] = pattern - if '_pi' in p.capture and 'P' in p.capture['_pi']: - launch[pi_map, i] = logic.mvarray(p.capture['_pi']) + if '_pi' in p.capture and 'P' in p.capture['_pi'][0]: + launch[pi_map, i] = logic.mvarray(p.capture['_pi'][0]) launch[po_map, i] = logic.UNASSIGNED if launch_filter: launch = launch_filter(launch) @@ -171,9 +169,9 @@ class StilFile: interface, _, po_map, scan_maps, scan_inversions = self._maps(circuit) resp = np.full((len(interface), len(self.patterns)), logic.UNASSIGNED) for i, p in enumerate(self.patterns): - resp[po_map, i] = logic.mvarray(p.capture['_po'] if len(p.capture) > 0 else p.launch['_po']) + resp[po_map, i] = logic.mvarray(p.capture['_po'][0] if len(p.capture) > 0 else p.launch['_po'][0]) for so_port in self.so_ports.keys(): - pattern = logic.mv_xor(logic.mvarray(p.unload[so_port]), scan_inversions[so_port]) + pattern = logic.mv_xor(logic.mvarray(p.unload[so_port][0]), scan_inversions[so_port]) resp[scan_maps[so_port], i] = pattern return resp @@ -192,7 +190,7 @@ class StilTransformer(Transformer): def call(args): return Call(args[0], dict(args[1:])) @staticmethod - def call_parameter(args): return args[0], args[1].value + def call_parameter(args): return args[0], (args[1].value.replace('\n', '').replace('N', '-'), args[1].start_pos) @staticmethod def signal_group(args): return args[0], args[1:] diff --git a/src/kyupy/techlib.py b/src/kyupy/techlib.py index ce15ed1..6304a56 100644 --- a/src/kyupy/techlib.py +++ b/src/kyupy/techlib.py @@ -11,50 +11,6 @@ from itertools import product from . import bench -class TechLibOld: - @staticmethod - def pin_index(kind, pin): - if isinstance(pin, int): - return max(0, pin-1) - if kind[:3] in ('OAI', 'AOI'): - if pin[0] == 'A': return int(pin[1]) - 1 - if pin == 'B': return int(kind[3]) - if pin[0] == 'B': return int(pin[1]) - 1 + int(kind[3]) - for prefix, pins, index in [('HADD', ('B0', 'SO'), 1), - ('HADD', ('A0', 'C1'), 0), - ('MUX21', ('S', 'S0'), 2), - ('MX2', ('S0',), 2), - ('TBUF', ('OE',), 1), - ('TINV', ('OE',), 1), - ('LATCH', ('D',), 0), - ('LATCH', ('QN',), 1), - ('DFF', ('D',), 0), - ('DFF', ('QN',), 1), - ('SDFF', ('D',), 0), - ('SDFF', ('QN',), 1), - ('SDFF', ('CLK',), 3), - ('SDFF', ('RSTB', 'RN'), 4), - ('SDFF', ('SETB',), 5), - ('ISOL', ('ISO',), 0), - ('ISOL', ('D',), 1)]: - if kind.startswith(prefix) and pin in pins: return index - for index, pins in enumerate([('A1', 'IN1', 'A', 'S', 'INP', 'I', 'Q', 'QN', 'Y', 'Z', 'ZN'), - ('A2', 'IN2', 'B', 'CK', 'CLK', 'CO', 'SE'), - ('A3', 'IN3', 'C', 'RN', 'RSTB', 'CI', 'SI'), - ('A4', 'IN4', 'D', 'SN', 'SETB'), - ('A5', 'IN5', 'E'), - ('A6', 'IN6', 'F')]): - if pin in pins: return index - raise ValueError(f'Unknown pin index for {kind}.{pin}') - - @staticmethod - def pin_is_output(kind, pin): - if isinstance(pin, int): - return pin == 0 - if 'MUX' in kind and pin == 'S': return False - return pin in ('Q', 'QN', 'Z', 'ZN', 'Y', 'CO', 'S', 'SO', 'C1') - - class TechLib: """Class for standard cell library definitions. @@ -93,6 +49,14 @@ class TechLib: assert pin in self.cells[kind][1], f'Unknown pin: {pin} for cell {kind}' return self.cells[kind][1][pin][0] + def pin_name(self, kind, pos, output=False): + """Returns the pin name for a given node kind, list position, and direction.""" + assert kind in self.cells, f'Unknown cell: {kind}' + for name, (ppos, isout) in self.cells[kind][1].items(): + if isout == output and ppos == pos: + return name + return None + def pin_is_output(self, kind, pin): """Returns True, if given pin name of a node kind is an output.""" assert kind in self.cells, f'Unknown cell: {kind}' @@ -138,21 +102,92 @@ TLATX1 input(C,D) output(Q,QN) Q=LATCH(D,C) QN=INV1(Q) ; """ -_nangate_common = r""" +NANGATE = TechLib(r""" +FILLTIE ; +FILL_X{1,2,4,8,16} ; +ANTENNA input(I) ; + +TIEH output(Z) Z=__const1__() ; +TIEL output(ZN) ZN=__const0__() ; + +BUF_X{1,2,4,8,12,16} input(I) output(Z) Z=BUF1(I) ; +INV_X{1,2,4,8,12,16} input(I) output(ZN) ZN=INV1(I) ; + +CLKBUF_X{1,2,4,8,12,16} input(I) output(Z) Z=BUF1(I) ; +CLKGATETST_X1 input(CLK,E,TE) output(Q) Q=OA21(CLK,E,TE) ; + +AND2_X{1,2} input(A1,A2) output(Z) Z=AND2(A1,A2) ; +AND3_X{1,2} input(A1,A2,A3) output(Z) Z=AND3(A1,A2,A3) ; +AND4_X{1,2} input(A1,A2,A3,A4) output(Z) Z=AND4(A1,A2,A3,A4) ; +NAND2_X{1,2} input(A1,A2) output(ZN) ZN=NAND2(A1,A2) ; +NAND3_X{1,2} input(A1,A2,A3) output(ZN) ZN=NAND3(A1,A2,A3) ; +NAND4_X{1,2} input(A1,A2,A3,A4) output(ZN) ZN=NAND4(A1,A2,A3,A4) ; +OR2_X{1,2} input(A1,A2) output(Z) Z=OR2(A1,A2) ; +OR3_X{1,2} input(A1,A2,A3) output(Z) Z=OR3(A1,A2,A3) ; +OR4_X{1,2} input(A1,A2,A3,A4) output(Z) Z=OR4(A1,A2,A3,A4) ; +NOR2_X{1,2} input(A1,A2) output(ZN) ZN=NOR2(A1,A2) ; +NOR3_X{1,2} input(A1,A2,A3) output(ZN) ZN=NOR3(A1,A2,A3) ; +NOR4_X{1,2} input(A1,A2,A3,A4) output(ZN) ZN=NOR4(A1,A2,A3,A4) ; +XOR2_X1 input(A1,A2) output(Z) Z=XOR2(A1,A2) ; +XNOR2_X1 input(A1,A2) output(ZN) ZN=XNOR2(A1,A2) ; + +MUX2_X1 input(I0,I1,S) output(Z) Z=MUX21(I0,I1,S) ; + +HA_X1 input(A,B) output(CO,S) CO=XOR2(A,B) S=AND2(A,B) ; +FA_X1 input(A,B,CI) output(CO,S) AB=XOR2(A,B) CO=XOR2(AB,CI) S=AO22(CI,A,B) ; + +AOI21_X{1,2} input(A1,A2,B) output(ZN) ZN=AOI21(A1,A2,B) ; +OAI21_X{1,2} input(A1,A2,B) output(ZN) ZN=OAI21(A1,A2,B) ; +AOI22_X{1,2} input(A1,A2,B1,B2) output(ZN) ZN=AOI22(A1,A2,B1,B2) ; +OAI22_X{1,2} input(A1,A2,B1,B2) output(ZN) ZN=OAI22(A1,A2,B1,B2) ; + +DFFRNQ_X1 input(D,RN,CLK) output(Q) DR=AND2(D,RN) Q=DFF(DR,CLK) ; +DFFSNQ_X1 input(D,SN,CLK) output(Q) S=INV1(SN) DS=OR2(D,S) Q=DFF(DS,CLK) ; + +SDFFRNQ_X1 input(D,RN,SE,SI,CLK) output(Q) DR=AND2(D,RN) DI=MUX21(DR,SI,SE) Q=DFF(DI,CLK) ; +SDFFSNQ_X1 input(D,SE,SI,SN,CLK) output(Q) S=INV1(SN) DS=OR2(D,S) DI=MUX21(DS,SI,SE) Q=DFF(DI,CLK) ; + +TBUF_X{1,2,4,8,12,16} input(EN,I) output(Z) Z=BUF1(I) ; + +LHQ_X1 input(D,E) output(Q) Q=LATCH(D,E) ; +""") +"""Nangate 15nm Open Cell Library (NanGate_15nm_OCL_v0.1_2014_06.A). +""" + + +NANGATE45 = TechLib(r""" FILLCELL_X{1,2,4,8,16,32} ; +ANTENNA_X1 input(A) ; LOGIC0_X1 output(Z) Z=__const0__() ; LOGIC1_X1 output(Z) Z=__const1__() ; BUF_X{1,2,4,8,16,32} input(A) output(Z) Z=BUF1(A) ; -CLKBUF_X{1,2,3} input(A) output(Z) Z=BUF1(A) ; +INV_X{1,2,4,8,16,32} input(A) output(ZN) ZN=INV1(A) ; + +CLKBUF_X{1,2,3} input(A) output(Z) Z=BUF1(A) ; +CLKGATETST_X{1,2,4,8} input(CK,E,SE) output(GCK) GCK=OA21(CK,E,SE) ; +CLKGATE_X{1,2,4,8} input(CK,E) output(GCK) GCK=AND2(CK,E) ; +AND2_X{1,2,4} input(A1,A2) output(ZN) ZN=AND2(A1,A2) ; +AND3_X{1,2,4} input(A1,A2,A3) output(ZN) ZN=AND3(A1,A2,A3) ; +AND4_X{1,2,4} input(A1,A2,A3,A4) output(ZN) ZN=AND4(A1,A2,A3,A4) ; NAND2_X{1,2,4} input(A1,A2) output(ZN) ZN=NAND2(A1,A2) ; NAND3_X{1,2,4} input(A1,A2,A3) output(ZN) ZN=NAND3(A1,A2,A3) ; NAND4_X{1,2,4} input(A1,A2,A3,A4) output(ZN) ZN=NAND4(A1,A2,A3,A4) ; +OR2_X{1,2,4} input(A1,A2) output(ZN) ZN=OR2(A1,A2) ; +OR3_X{1,2,4} input(A1,A2,A3) output(ZN) ZN=OR3(A1,A2,A3) ; +OR4_X{1,2,4} input(A1,A2,A3,A4) output(ZN) ZN=OR4(A1,A2,A3,A4) ; NOR2_X{1,2,4} input(A1,A2) output(ZN) ZN=NOR2(A1,A2) ; NOR3_X{1,2,4} input(A1,A2,A3) output(ZN) ZN=NOR3(A1,A2,A3) ; NOR4_X{1,2,4} input(A1,A2,A3,A4) output(ZN) ZN=NOR4(A1,A2,A3,A4) ; +XOR2_X{1,2} input(A,B) output(Z) Z=XOR2(A,B) ; +XNOR2_X{1,2} input(A,B) output(ZN) ZN=XNOR2(A,B) ; + +MUX2_X{1,2} input(A,B,S) output(Z) Z=MUX21(A,B,S) ; + +HA_X1 input(A,B) output(CO,S) CO=XOR2(A,B) S=AND2(A,B) ; +FA_X1 input(A,B,CI) output(CO,S) AB=XOR2(A,B) CO=XOR2(AB,CI) S=AO22(CI,A,B) ; AOI21_X{1,2,4} input(A,B1,B2) output(ZN) ZN=AOI21(B1,B2,A) ; OAI21_X{1,2,4} input(A,B1,B2) output(ZN) ZN=OAI21(B1,B2,A) ; @@ -162,8 +197,6 @@ OAI22_X{1,2,4} input(A1,A2,B1,B2) output(ZN) ZN=OAI22(A1,A2,B1,B2) ; OAI211_X{1,2,4} input(A,B,C1,C2) output(ZN) ZN=OAI211(C1,C2,A,B) ; AOI211_X{1,2,4} input(A,B,C1,C2) output(ZN) ZN=AOI211(C1,C2,A,B) ; -MUX2_X{1,2} input(A,B,S) output(Z) Z=MUX21(A,B,S) ; - AOI221_X{1,2,4} input(A,B1,B2,C1,C2) output(ZN) BC=AO22(B1,B2,C1,C2) ZN=NOR2(BC,A) ; OAI221_X{1,2,4} input(A,B1,B2,C1,C2) output(ZN) BC=OA22(B1,B2,C1,C2) ZN=NAND2(BC,A) ; @@ -172,14 +205,6 @@ OAI222_X{1,2,4} input(A1,A2,B1,B2,C1,C2) output(ZN) BC=OA22(B1,B2,C1,C2) ZN=OAI2 OAI33_X1 input(A1,A2,A3,B1,B2,B3) output(ZN) AA=OR2(A1,A2) BB=OR2(B1,B2) ZN=OAI22(AA,A3,BB,B3) ; -HA_X1 input(A,B) output(CO,S) CO=XOR2(A,B) S=AND2(A,B) ; - -FA_X1 input(A,B,CI) output(CO,S) AB=XOR2(A,B) CO=XOR2(AB,CI) S=AO22(CI,A,B) ; - -CLKGATE_X{1,2,4,8} input(CK,E) output(GCK) GCK=AND2(CK,E) ; - -CLKGATETST_X{1,2,4,8} input(CK,E,SE) output(GCK) GCK=OA21(CK,E,SE) ; - DFF_X{1,2} input(D,CK) output(Q,QN) Q=DFF(D,CK) QN=INV1(Q) ; DFFR_X{1,2} input(D,RN,CK) output(Q,QN) DR=AND2(D,RN) Q=DFF(DR,CK) QN=INV1(Q) ; DFFS_X{1,2} input(D,SN,CK) output(Q,QN) S=INV1(SN) DS=OR2(D,S) Q=DFF(DS,CK) QN=INV1(Q) ; @@ -191,43 +216,16 @@ SDFFS_X{1,2} input(D,SE,SI,SN,CK) output(Q,QN) S=INV1(SN) DS=OR2(D,S) DI=MU SDFFRS_X{1,2} input(D,RN,SE,SI,SN,CK) output(Q,QN) S=INV1(SN) DS=OR2(D,S) DRS=AND2(DS,RN) DI=MUX21(DRS,SI,SE) Q=DFF(DI,CK) QN=INV1(Q) ; TBUF_X{1,2,4,8,16} input(A,EN) output(Z) Z=BUF1(A) ; + TINV_X1 input(I,EN) output(ZN) ZN=INV1(I) ; TLAT_X1 input(D,G,OE) output(Q) Q=LATCH(D,G) ; DLH_X{1,2} input(D,G) output(Q) Q=LATCH(D,G) ; DLL_X{1,2} input(D,GN) output(Q) G=INV1(GN) Q=LATCH(D,G) ; -""" - -NANGATE = TechLib(_nangate_common + r""" -INV_X{1,2,4,8,16,32} input(I) output(ZN) ZN=INV1(I) ; - -AND2_X{1,2,4} input(A1,A2) output(Z) Z=AND2(A1,A2) ; -AND3_X{1,2,4} input(A1,A2,A3) output(Z) Z=AND3(A1,A2,A3) ; -AND4_X{1,2,4} input(A1,A2,A3,A4) output(Z) Z=AND4(A1,A2,A3,A4) ; -OR2_X{1,2,4} input(A1,A2) output(Z) Z=OR2(A1,A2) ; -OR3_X{1,2,4} input(A1,A2,A3) output(Z) Z=OR3(A1,A2,A3) ; -OR4_X{1,2,4} input(A1,A2,A3,A4) output(Z) Z=OR4(A1,A2,A3,A4) ; -XOR2_X{1,2} input(A1,A2) output(Z) Z=XOR2(A1,A2) ; -XNOR2_X{1,2} input(A1,A2) output(ZN) ZN=XNOR2(A1,A2) ; -""") -"""An newer NANGATE-variant that uses 'Z' as output pin names for AND and OR gates. -""" - - -NANGATE_ZN = TechLib(_nangate_common + r""" -INV_X{1,2,4,8,16,32} input(A) output(ZN) ZN=INV1(A) ; - -AND2_X{1,2,4} input(A1,A2) output(ZN) ZN=AND2(A1,A2) ; -AND3_X{1,2,4} input(A1,A2,A3) output(ZN) ZN=AND3(A1,A2,A3) ; -AND4_X{1,2,4} input(A1,A2,A3,A4) output(ZN) ZN=AND4(A1,A2,A3,A4) ; -OR2_X{1,2,4} input(A1,A2) output(ZN) ZN=OR2(A1,A2) ; -OR3_X{1,2,4} input(A1,A2,A3) output(ZN) ZN=OR3(A1,A2,A3) ; -OR4_X{1,2,4} input(A1,A2,A3,A4) output(ZN) ZN=OR4(A1,A2,A3,A4) ; -XOR2_X{1,2} input(A,B) output(Z) Z=XOR2(A,B) ; -XNOR2_X{1,2} input(A,B) output(ZN) ZN=XNOR2(A,B) ; """) -"""An older NANGATE-variant that uses 'ZN' as output pin names for AND and OR gates. +"""Nangate 45nm Open Cell Library (NangateOpenCellLibrary_PDKv1_3_v2010_12). +This NANGATE-variant that uses 'ZN' as output pin names for AND and OR gates. """ diff --git a/src/kyupy/verilog.py b/src/kyupy/verilog.py index 1c6a0d1..bcb982f 100644 --- a/src/kyupy/verilog.py +++ b/src/kyupy/verilog.py @@ -73,7 +73,7 @@ class VerilogTransformer(Transformer): elif "'" in args[0]: width, rest = args[0].split("'") width = int(width) - base, const = rest[0], rest[1:] + base, const = rest[0], rest[1:].replace('x','0') const = int(const, {'b': 2, 'd':10, 'h':16}[base.lower()]) l = [] for _ in range(width): @@ -92,6 +92,14 @@ class VerilogTransformer(Transformer): sigs.append(a) return sigs + def ternaryif(self, args): + sel = args[0] + ctrue = args[1] + cfalse = args[2] + print(f"got ternary if {args[0]} {args[1]}") + + return args[1] + def declaration(self, kind, args): rnge = None if isinstance(args[0], range): @@ -123,6 +131,9 @@ class VerilogTransformer(Transformer): assignments = [] for stmt in args[2:]: # pass 1: instantiate cells and driven signals if isinstance(stmt, Instantiation): + if stmt.type not in self.tlib.cells: + log.warn(f'Ignoring cell of unknown kind "{stmt.type}"') + continue n = Node(c, stmt.name, kind=stmt.type) for p, s in stmt.pins.items(): if self.tlib.pin_is_output(n.kind, p): @@ -141,35 +152,50 @@ class VerilogTransformer(Transformer): c.io_nodes[positions[name]] = n if sd.kind == 'input': Line(c, n, Node(c, name)) - for target, source in assignments: # pass 1.5: process signal assignments - target_sigs = [] - if not isinstance(target, list): target = [target] - for s in target: - if s in sig_decls: - target_sigs += sig_decls[s].names - else: - target_sigs.append(s) - source_sigs = [] - if not isinstance(source, list): source = [source] - for s in source: - if s in sig_decls: - source_sigs += sig_decls[s].names - else: - source_sigs.append(s) - for t, s in zip(target_sigs, source_sigs): - if t in c.forks: - assert s not in c.forks, 'assignment between two driven signals' - Line(c, c.forks[t], Node(c, s)) - elif s in c.forks: - assert t not in c.forks, 'assignment between two driven signals' - Line(c, c.forks[s], Node(c, t)) - elif s.startswith("1'b"): - cnode = Node(c, f'__const{s[3]}_{const_count}__', f'__const{s[3]}__') - const_count += 1 - Line(c, cnode, Node(c, t)) + deferred_assignments = set() + ignored = 0 + while len(assignments) > 0: + more_assignments = [] + for target, source in assignments: # pass 1.5: process signal assignments + target_sigs = [] + if not isinstance(target, list): target = [target] + for s in target: + if s in sig_decls: + target_sigs += sig_decls[s].names + else: + target_sigs.append(s) + source_sigs = [] + if not isinstance(source, list): source = [source] + for s in source: + if s in sig_decls: + source_sigs += sig_decls[s].names + else: + source_sigs.append(s) + for t, s in zip(target_sigs, source_sigs): + if t in c.forks: + assert s not in c.forks, f'assignment between two driven signals: source={s} target={t}' + Line(c, c.forks[t], Node(c, s)) + elif s in c.forks: + assert t not in c.forks, 'assignment between two driven signals' + Line(c, c.forks[s], Node(c, t)) + elif s.startswith("1'b"): + cnode = Node(c, f'__const{s[3]}_{const_count}__', f'__const{s[3]}__') + const_count += 1 + Line(c, cnode, Node(c, t)) + else: + if (t, s) in deferred_assignments: + #log.info(f'ignoring: assign {t} = {s}') + ignored += 1 + else: + more_assignments.append((t, s)) + deferred_assignments.add((t, s)) + assignments = more_assignments + if ignored > 0: + log.warn(f'ignored {ignored} assignments') for stmt in args[2:]: # pass 2: connect signals to readers if isinstance(stmt, Instantiation): for p, s in stmt.pins.items(): + if stmt.name not in c.cells: continue n = c.cells[stmt.name] if self.tlib.pin_is_output(n.kind, p): continue if s.startswith("1'b"): @@ -221,10 +247,11 @@ GRAMMAR = r""" pin: namedpin | sigsel namedpin: "." name "(" sigsel? ")" range: "[" /[0-9]+/ (":" /[0-9]+/)? "]" - sigsel: name range? | concat + sigsel: name range? | concat | ternaryif concat: "{" sigsel ( "," sigsel )* "}" + ternaryif: sigsel "?" sigsel ":" sigsel _namelist: name ( "," name )* - name: ( /[a-z_][a-z0-9_]*/i | /\\[^\t \r\n]+[\t \r\n]/i | /[0-9]+'[bdh][0-9a-f]+/i ) + name: ( /[a-z_][a-z0-9_]*/i | /\\[^\t \r\n]+[\t \r\n]/i | /[0-9]+'[bdh][x0-9a-f]+/i ) %import common.NEWLINE COMMENT: /\/\*(\*(?!\/)|[^*])*\*\// | /\(\*(\*(?!\))|[^*])*\*\)/ | "//" /(.)*/ NEWLINE %ignore ( /\r?\n/ | COMMENT )+ diff --git a/src/kyupy/wave_sim.py b/src/kyupy/wave_sim.py index 93a107f..170445b 100644 --- a/src/kyupy/wave_sim.py +++ b/src/kyupy/wave_sim.py @@ -13,10 +13,11 @@ Two simulators are available: :py:class:`WaveSim` runs on the CPU, and the deriv """ import math +from collections import defaultdict import numpy as np -from . import numba, cuda, sim, cdiv +from . import log, numba, cuda, sim, cdiv, eng TMAX = np.float32(2 ** 127) @@ -59,8 +60,8 @@ class WaveSim(sim.SimOps): self.delays = np.zeros((len(delays), self.c_locs_len, 2, 2), dtype=delays.dtype) self.delays[:, :delays.shape[1]] = delays - self.c = np.zeros((self.c_len, sims), dtype=np.float32) + TMAX - self.s = np.zeros((11, self.s_len, sims), dtype=np.float32) + self.c = np.full((self.c_len, self.sims), TMAX, dtype=np.float32) + self.s = np.zeros((11, self.s_len, self.sims), dtype=np.float32) """Information about the logic values and transitions around the sequential elements (flip-flops) and ports. The first 3 values are read by :py:func:`s_to_c`. @@ -98,12 +99,24 @@ class WaveSim(sim.SimOps): self.simctl_int[0] = range(sims) # unique seed for each sim by default, zero this to pick same delays for all sims. self.simctl_int[1] = 2 # random picking by default. - self.nbytes = sum([a.nbytes for a in (self.c, self.s, self.c_locs, self.c_caps, self.ops, self.simctl_int)]) + self.simctl_float = np.zeros((1, sims), dtype=np.float32) + 1 + """Float array for per-simulation delay configuration. + + * ``simctl_float[0]`` factor to be multiplied with each delay (default=1.0). + """ + + self.e = np.zeros((self.c_locs_len, sims, 2), dtype=np.uint8) # aux data for each line and sim + + self.error_counts = np.zeros(self.s_len, dtype=np.uint32) # number of capture errors by PPO + self.lsts = np.zeros(self.s_len, dtype=np.float32) # LST by PPO + self.overflows = np.zeros(self.s_len, dtype=np.uint32) # Overflows by PPO + + self.nbytes = sum([a.nbytes for a in (self.c, self.s, self.e, self.c_locs, self.c_caps, self.ops, self.simctl_int, self.simctl_float)]) def __repr__(self): dev = 'GPU' if hasattr(self.c, 'copy_to_host') else 'CPU' return f'{{name: "{self.circuit.name}", device: "{dev}", sims: {self.sims}, ops: {len(self.ops)}, ' + \ - f'levels: {len(self.level_starts)}, nbytes: {self.nbytes}}}' + f'levels: {len(self.level_starts)}, nbytes: {eng(self.nbytes)}}}' def s_to_c(self): """Transfers values of sequential elements and primary inputs to the combinational portion. @@ -116,7 +129,7 @@ class WaveSim(sim.SimOps): self.c[self.pippi_c_locs+1] = np.choose(cond, [TMAX, TMAX, sins[1], TMAX]) self.c[self.pippi_c_locs+2] = TMAX - def c_prop(self, sims=None, seed=1): + def c_prop(self, sims=None, seed=1, delta=0): """Propagates all waveforms from the (pseudo) primary inputs to the (pseudo) primary outputs. :param sims: Number of parallel simulations to execute. If None, all available simulations are performed. @@ -124,7 +137,7 @@ class WaveSim(sim.SimOps): """ sims = min(sims or self.sims, self.sims) for op_start, op_stop in zip(self.level_starts, self.level_stops): - level_eval_cpu(self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, self.abuf, 0, sims, self.delays, self.simctl_int, seed) + level_eval_cpu(self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, self.e, self.abuf, 0, sims, self.delays, self.simctl_int, self.simctl_float, seed, delta) def c_to_s(self, time=TMAX, sd=0.0, seed=1): """Simulates a capture operation at all sequential elements and primary outputs. @@ -152,7 +165,7 @@ class WaveSim(sim.SimOps): self.s[2, self.ppio_s_locs] = self.s[8, self.ppio_s_locs] -def _wave_eval(op, cbuf, c_locs, c_caps, sim, delays, simctl_int, seed=0): +def _wave_eval(op, cbuf, c_locs, c_caps, ebuf, sim, delays, simctl_int, simctl_float, seed, delta): overflows = int(0) lut = op[0] @@ -162,6 +175,18 @@ def _wave_eval(op, cbuf, c_locs, c_caps, sim, delays, simctl_int, seed=0): c_idx = op[4] d_idx = op[5] + input_epoch = (ebuf[a_idx, sim, 1]| + ebuf[b_idx, sim, 1]| + ebuf[c_idx, sim, 1]| + ebuf[d_idx, sim, 1]) + + output_epoch = ebuf[z_idx, sim, 1] + + if (delta): + if input_epoch == 0 and output_epoch == 0: return 0, 0 + + #out_changed = output_epoch + if len(delays) > 1: if simctl_int[1] == 0: delays = delays[seed] @@ -174,6 +199,8 @@ def _wave_eval(op, cbuf, c_locs, c_caps, sim, delays, simctl_int, seed=0): delays = delays[_rnd % len(delays)] else: delays = delays[0] + + a_mem = c_locs[a_idx] b_mem = c_locs[b_idx] @@ -192,10 +219,10 @@ def _wave_eval(op, cbuf, c_locs, c_caps, sim, delays, simctl_int, seed=0): z_val = z_cur - a = cbuf[a_mem + a_cur, sim] + delays[a_idx, 0, z_val] - b = cbuf[b_mem + b_cur, sim] + delays[b_idx, 0, z_val] - c = cbuf[c_mem + c_cur, sim] + delays[c_idx, 0, z_val] - d = cbuf[d_mem + d_cur, sim] + delays[d_idx, 0, z_val] + a = cbuf[a_mem + a_cur, sim] + delays[a_idx, 0, z_val] * simctl_float[0] + b = cbuf[b_mem + b_cur, sim] + delays[b_idx, 0, z_val] * simctl_float[0] + c = cbuf[c_mem + c_cur, sim] + delays[c_idx, 0, z_val] * simctl_float[0] + d = cbuf[d_mem + d_cur, sim] + delays[d_idx, 0, z_val] * simctl_float[0] previous_t = TMIN @@ -206,27 +233,27 @@ def _wave_eval(op, cbuf, c_locs, c_caps, sim, delays, simctl_int, seed=0): if a == current_t: a_cur += 1 inputs ^= 1 - thresh = delays[a_idx, a_cur & 1, z_val] - a = cbuf[a_mem + a_cur, sim] + delays[a_idx, a_cur & 1, z_val] - next_t = cbuf[a_mem + a_cur, sim] + delays[a_idx, (a_cur & 1) ^ 1, z_val ^ 1] + thresh = delays[a_idx, a_cur & 1 ^ 1, z_val] * simctl_float[0] + a = cbuf[a_mem + a_cur, sim] + delays[a_idx, a_cur & 1, z_val] * simctl_float[0] + next_t = cbuf[a_mem + a_cur, sim] + delays[a_idx, (a_cur & 1) ^ 1, z_val ^ 1] * simctl_float[0] elif b == current_t: b_cur += 1 inputs ^= 2 - thresh = delays[b_idx, b_cur & 1, z_val] - b = cbuf[b_mem + b_cur, sim] + delays[b_idx, b_cur & 1, z_val] - next_t = cbuf[b_mem + b_cur, sim] + delays[b_idx, (b_cur & 1) ^ 1, z_val ^ 1] + thresh = delays[b_idx, b_cur & 1 ^ 1, z_val] * simctl_float[0] + b = cbuf[b_mem + b_cur, sim] + delays[b_idx, b_cur & 1, z_val] * simctl_float[0] + next_t = cbuf[b_mem + b_cur, sim] + delays[b_idx, (b_cur & 1) ^ 1, z_val ^ 1] * simctl_float[0] elif c == current_t: c_cur += 1 inputs ^= 4 - thresh = delays[c_idx, c_cur & 1, z_val] - c = cbuf[c_mem + c_cur, sim] + delays[c_idx, c_cur & 1, z_val] - next_t = cbuf[c_mem + c_cur, sim] + delays[c_idx, (c_cur & 1) ^ 1, z_val ^ 1] + thresh = delays[c_idx, c_cur & 1 ^ 1, z_val] * simctl_float[0] + c = cbuf[c_mem + c_cur, sim] + delays[c_idx, c_cur & 1, z_val] * simctl_float[0] + next_t = cbuf[c_mem + c_cur, sim] + delays[c_idx, (c_cur & 1) ^ 1, z_val ^ 1] * simctl_float[0] else: d_cur += 1 inputs ^= 8 - thresh = delays[d_idx, d_cur & 1, z_val] - d = cbuf[d_mem + d_cur, sim] + delays[d_idx, d_cur & 1, z_val] - next_t = cbuf[d_mem + d_cur, sim] + delays[d_idx, (d_cur & 1) ^ 1, z_val ^ 1] + thresh = delays[d_idx, d_cur & 1 ^ 1, z_val] * simctl_float[0] + d = cbuf[d_mem + d_cur, sim] + delays[d_idx, d_cur & 1, z_val] * simctl_float[0] + next_t = cbuf[d_mem + d_cur, sim] + delays[d_idx, (d_cur & 1) ^ 1, z_val ^ 1] * simctl_float[0] if (z_cur & 1) != ((lut >> inputs) & 1): # we generate an edge in z_mem, if ... @@ -235,32 +262,45 @@ def _wave_eval(op, cbuf, c_locs, c_caps, sim, delays, simctl_int, seed=0): or (current_t - previous_t) > thresh # -OR- the generated hazard is wider than pulse threshold. ): if z_cur < (z_cap - 1): # enough space in z_mem? + #if delta and (cbuf[z_mem + z_cur, sim] != current_t): + # out_changed = 1 cbuf[z_mem + z_cur, sim] = current_t previous_t = current_t z_cur += 1 else: overflows += 1 - previous_t = cbuf[z_mem + z_cur - 1, sim] z_cur -= 1 + previous_t = cbuf[z_mem + z_cur, sim] else: z_cur -= 1 previous_t = cbuf[z_mem + z_cur - 1, sim] if z_cur > 0 else TMIN # output value of cell changed. update all delayed inputs. z_val = z_val ^ 1 - a = cbuf[a_mem + a_cur, sim] + delays[a_idx, a_cur & 1, z_val] - b = cbuf[b_mem + b_cur, sim] + delays[b_idx, b_cur & 1, z_val] - c = cbuf[c_mem + c_cur, sim] + delays[c_idx, c_cur & 1, z_val] - d = cbuf[d_mem + d_cur, sim] + delays[d_idx, d_cur & 1, z_val] + a = cbuf[a_mem + a_cur, sim] + delays[a_idx, a_cur & 1, z_val] * simctl_float[0] + b = cbuf[b_mem + b_cur, sim] + delays[b_idx, b_cur & 1, z_val] * simctl_float[0] + c = cbuf[c_mem + c_cur, sim] + delays[c_idx, c_cur & 1, z_val] * simctl_float[0] + d = cbuf[d_mem + d_cur, sim] + delays[d_idx, d_cur & 1, z_val] * simctl_float[0] current_t = min(a, b, c, d) + #if delta and (cbuf[z_mem + z_cur, sim] != TMAX): + # out_changed = 1 + # generate or propagate overflow flag cbuf[z_mem + z_cur, sim] = TMAX_OVL if overflows > 0 else max(a, b, c, d) nrise = max(0, (z_cur+1) // 2 - (cbuf[z_mem, sim] == TMIN)) nfall = z_cur // 2 + e = int(((cbuf[z_mem, sim] == TMIN) << 1) & 2) # initial value + e |= z_val # final value + e |= (nrise + nfall)<<2 # number of transitions + + ebuf[z_idx, sim, 0] = e + + ebuf[z_idx, sim, 1] = input_epoch #& out_changed + return nrise, nfall @@ -268,11 +308,11 @@ wave_eval_cpu = numba.njit(_wave_eval) @numba.njit -def level_eval_cpu(ops, op_start, op_stop, c, c_locs, c_caps, abuf, sim_start, sim_stop, delays, simctl_int, seed): +def level_eval_cpu(ops, op_start, op_stop, c, c_locs, c_caps, ebuf, abuf, sim_start, sim_stop, delays, simctl_int, simctl_float, seed, delta): for op_idx in range(op_start, op_stop): op = ops[op_idx] for sim in range(sim_start, sim_stop): - nrise, nfall = wave_eval_cpu(op, c, c_locs, c_caps, sim, delays, simctl_int[:, sim], seed) + nrise, nfall = wave_eval_cpu(op, c, c_locs, c_caps, ebuf, sim, delays, simctl_int[:, sim], simctl_float[:, sim], seed, delta) a_loc = op[6] a_wr = op[7] a_wf = op[8] @@ -344,32 +384,51 @@ class WaveSimCuda(WaveSim): self.c_caps = cuda.to_device(self.c_caps) self.delays = cuda.to_device(self.delays) self.simctl_int = cuda.to_device(self.simctl_int) + self.simctl_float = cuda.to_device(self.simctl_float) self.abuf = cuda.to_device(self.abuf) + self.e = cuda.to_device(self.e) + self.error_counts = cuda.to_device(self.error_counts) + self.lsts = cuda.to_device(self.lsts) + self.overflows = cuda.to_device(self.overflows) + + self.aux = cuda.to_device(np.zeros(8*1024, dtype=np.int32)) self._block_dim = (32, 16) def __getstate__(self): state = self.__dict__.copy() - state['c'] = np.array(self.c) + del state['c'] state['s'] = np.array(self.s) state['ops'] = np.array(self.ops) state['c_locs'] = np.array(self.c_locs) state['c_caps'] = np.array(self.c_caps) state['delays'] = np.array(self.delays) state['simctl_int'] = np.array(self.simctl_int) + state['simctl_float'] = np.array(self.simctl_float) state['abuf'] = np.array(self.abuf) + state['e'] = np.array(self.e) + state['error_counts'] = np.array(self.error_counts) + state['lsts'] = np.array(self.lsts) + state['overflows'] = np.array(self.overflows) + state['aux'] = np.array(self.aux) return state def __setstate__(self, state): self.__dict__.update(state) - self.c = cuda.to_device(self.c) + self.c = cuda.to_device(np.full((self.c_len, self.sims), TMAX, dtype=np.float32)) self.s = cuda.to_device(self.s) self.ops = cuda.to_device(self.ops) self.c_locs = cuda.to_device(self.c_locs) self.c_caps = cuda.to_device(self.c_caps) self.delays = cuda.to_device(self.delays) self.simctl_int = cuda.to_device(self.simctl_int) + self.simctl_float = cuda.to_device(self.simctl_float) self.abuf = cuda.to_device(self.abuf) + self.e = cuda.to_device(self.e) + self.error_counts = cuda.to_device(self.error_counts) + self.lsts = cuda.to_device(self.lsts) + self.overflows = cuda.to_device(self.overflows) + self.aux = cuda.to_device(self.aux) def s_to_c(self): grid_dim = self._grid_dim(self.sims, self.s_len) @@ -377,14 +436,24 @@ class WaveSimCuda(WaveSim): def _grid_dim(self, x, y): return cdiv(x, self._block_dim[0]), cdiv(y, self._block_dim[1]) - def c_prop(self, sims=None, seed=1): + def c_prop(self, sims=None, seed=1, op_from=0, op_to=None, delta=0): sims = min(sims or self.sims, self.sims) for op_start, op_stop in zip(self.level_starts, self.level_stops): + if op_from > op_start: continue + if op_to is not None and op_to <= op_start: break grid_dim = self._grid_dim(sims, op_stop - op_start) - wave_eval_gpu[grid_dim, self._block_dim](self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, self.abuf, int(0), - sims, self.delays, self.simctl_int, seed) + wave_eval_gpu[grid_dim, self._block_dim](self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, self.e, self.abuf, int(0), + sims, self.delays, self.simctl_int, self.simctl_float, seed, delta) cuda.synchronize() + def c_prop_level(self, level, sims=None, seed=1, delta=0): + sims = min(sims or self.sims, self.sims) + op_start = self.level_starts[level] + op_stop = self.level_stops[level] + grid_dim = self._grid_dim(sims, op_stop - op_start) + wave_eval_gpu[grid_dim, self._block_dim](self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, self.e, self.abuf, int(0), + sims, self.delays, self.simctl_int, self.simctl_float, seed, delta) + def c_to_s(self, time=TMAX, sd=0.0, seed=1): grid_dim = self._grid_dim(self.sims, self.s_len) wave_capture_gpu[grid_dim, self._block_dim](self.c, self.s, self.c_locs, self.c_caps, self.ppo_offset, @@ -394,6 +463,77 @@ class WaveSimCuda(WaveSim): grid_dim = self._grid_dim(self.sims, self.s_len) ppo_to_ppi_gpu[grid_dim, self._block_dim](self.s, self.c_locs, time, self.ppi_offset, self.ppo_offset) + def acc_error_counts(self, sims=None): + sims = min(sims or self.sims, self.sims) + grid_dim = cdiv(self.s_len, 256) + acc_error_counts_gpu[grid_dim, 256](self.s, sims, self.error_counts) + + def reset_error_counts(self): + self.error_counts[:] = 0 + + def get_error_counts(self): + return np.array(self.error_counts) + + def acc_overflows(self, sims=None): + sims = min(sims or self.sims, self.sims) + grid_dim = cdiv(self.s_len, 256) + acc_overflows_gpu[grid_dim, 256](self.s, sims, self.overflows) + + def reset_overflows(self): + self.overflows[:] = 0 + + def get_overflows(self): + return np.array(self.overflows) + + def acc_lsts(self, sims=None): + sims = min(sims or self.sims, self.sims) + grid_dim = cdiv(self.s_len, 256) + acc_lsts_gpu[grid_dim, 256](self.s, sims, self.lsts) + + def reset_lsts(self): + self.lsts[:] = 0.0 + + def get_lsts(self): + return np.array(self.lsts) + + +@cuda.jit() +def memcpy_gpu (src, dst, nitems): + tid = cuda.grid(1) + stride = cuda.gridDim.x * cuda.blockDim.x + for i in range(tid, nitems, stride): + dst.flat[i] = src.flat[i] + + +@cuda.jit() +def acc_error_counts_gpu(s, sims, error_counts): + x = cuda.grid(1) + if x >= s.shape[1]: return + cnt = 0 + for i in range(sims): + cnt += (s[6,x,i] != s[8,x,i]) + error_counts[x] += cnt + + +@cuda.jit() +def acc_overflows_gpu(s, sims, overflows): + x = cuda.grid(1) + if x >= s.shape[1]: return + cnt = 0 + for i in range(sims): + cnt += s[10,x,i] + overflows[x] += cnt + + +@cuda.jit() +def acc_lsts_gpu(s, sims, lsts): + x = cuda.grid(1) + if x >= s.shape[1]: return + lst = 0 + for i in range(sims): + lst = max(lst, s[5,x,i]) + lsts[x] = max(lsts[x], lst) + @cuda.jit() def wave_assign_gpu(c, s, c_locs, ppi_offset): @@ -423,7 +563,7 @@ _wave_eval_gpu = cuda.jit(_wave_eval, device=True) @cuda.jit() -def wave_eval_gpu(ops, op_start, op_stop, cbuf, c_locs, c_caps, abuf, sim_start, sim_stop, delays, simctl_int, seed): +def wave_eval_gpu(ops, op_start, op_stop, cbuf, c_locs, c_caps, ebuf, abuf, sim_start, sim_stop, delays, simctl_int, simctl_float, seed, delta): x, y = cuda.grid(2) sim = sim_start + x op_idx = op_start + y @@ -435,7 +575,7 @@ def wave_eval_gpu(ops, op_start, op_stop, cbuf, c_locs, c_caps, abuf, sim_start, a_wr = op[7] a_wf = op[8] - nrise, nfall = _wave_eval_gpu(op, cbuf, c_locs, c_caps, sim, delays, simctl_int[:, sim], seed) + nrise, nfall = _wave_eval_gpu(op, cbuf, c_locs, c_caps, ebuf, sim, delays, simctl_int[:, sim], simctl_float[:, sim], seed, delta) # accumulate WSA into abuf if a_loc >= 0: diff --git a/tests/b15_4ig.sa_rf.stil.gz b/tests/b15_4ig.sa_rf.stil.gz new file mode 100644 index 0000000..437a3e0 Binary files /dev/null and b/tests/b15_4ig.sa_rf.stil.gz differ diff --git a/tests/conftest.py b/tests/conftest.py index c73bdc5..7bf9779 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -13,8 +13,44 @@ def b15_2ig_circuit(mydir): from kyupy.techlib import SAED32 return verilog.load(mydir / 'b15_2ig.v.gz', branchforks=True, tlib=SAED32) +@pytest.fixture(scope='session') +def b15_2ig_circuit_resolved(b15_2ig_circuit): + from kyupy.techlib import SAED32 + cr = b15_2ig_circuit.copy() + cr.resolve_tlib_cells(SAED32) + return cr + +@pytest.fixture(scope='session') +def b15_4ig_circuit(mydir): + from kyupy import verilog + from kyupy.techlib import SAED32 + return verilog.load(mydir / 'b15_4ig.v.gz', branchforks=True, tlib=SAED32) + +@pytest.fixture(scope='session') +def b15_4ig_circuit_resolved(b15_4ig_circuit): + from kyupy.techlib import SAED32 + cr = b15_4ig_circuit.copy() + cr.resolve_tlib_cells(SAED32) + return cr + @pytest.fixture(scope='session') def b15_2ig_delays(mydir, b15_2ig_circuit): from kyupy import sdf from kyupy.techlib import SAED32 return sdf.load(mydir / 'b15_2ig.sdf.gz').iopaths(b15_2ig_circuit, tlib=SAED32)[1:2] + +@pytest.fixture(scope='session') +def b15_2ig_sa_nf_test_resp(mydir, b15_2ig_circuit_resolved): + from kyupy import stil + s = stil.load(mydir / 'b15_2ig.sa_nf.stil.gz') + tests = s.tests(b15_2ig_circuit_resolved)[:,1:] + resp = s.responses(b15_2ig_circuit_resolved)[:,1:] + return (tests, resp) + +@pytest.fixture(scope='session') +def b15_4ig_sa_rf_test_resp(mydir, b15_4ig_circuit_resolved): + from kyupy import stil + s = stil.load(mydir / 'b15_4ig.sa_rf.stil.gz') + tests = s.tests(b15_4ig_circuit_resolved)[:,1:] + resp = s.responses(b15_4ig_circuit_resolved)[:,1:] + return (tests, resp) diff --git a/tests/gates.sdf b/tests/gates.sdf index 1652ff0..529a06b 100644 --- a/tests/gates.sdf +++ b/tests/gates.sdf @@ -7,22 +7,49 @@ (TEMPERATURE 25.00:25.00:25.00) (TIMESCALE 1ns) (CELL - (CELLTYPE "NAND2X1") + (CELLTYPE "NAND2_X1") (INSTANCE nandgate) (DELAY (ABSOLUTE - (IOPATH IN1 QN (0.099:0.103:0.103) (0.122:0.127:0.127)) - (IOPATH IN2 QN (0.083:0.086:0.086) (0.100:0.104:0.104)) + (IOPATH A1 ZN (0.099:0.103:0.103) (0.122:0.127:0.127)) + (IOPATH A2 ZN (0.083:0.086:0.086) (0.100:0.104:0.104)) ) ) ) (CELL - (CELLTYPE "AND2X1") + (CELLTYPE "AND2_X1") (INSTANCE andgate) (DELAY (ABSOLUTE - (IOPATH IN1 Q (0.367:0.378:0.378) (0.351:0.377:0.377)) - (IOPATH IN2 Q (0.366:0.375:0.375) (0.359:0.370:0.370)) + (IOPATH A1 ZN (0.367:0.378:0.378) (0.351:0.377:0.377)) + (IOPATH A2 ZN (0.366:0.375:0.375) (0.359:0.370:0.370)) + ) + ) +) +(CELL + (CELLTYPE "OAI21_X1") + (INSTANCE oai21gate) + (DELAY + (ABSOLUTE + (IOPATH B1 ZN (0.000:0.025:0.025) (0.000:0.013:0.013)) + (IOPATH B2 ZN (0.000:0.030:0.030) (0.000:0.016:0.016)) + ( COND (B1 == 1'b0) && (B2 == 1'b1) (IOPATH A ZN (0.000:0.018:0.018))) + (COND (B1 == 1'b1) && (B2 == 1'b0) (IOPATH A ZN (0.000:0.018:0.018) (0.000:0.016:0.016))) + (COND (B1 == 1'b1) && (B2 == 1'b1) (IOPATH A ZN (0.000:0.019:0.019) (0.000:0.014:0.014))) + ) + ) +) +(CELL + (CELLTYPE "MUX2_X1") + (INSTANCE mux2gate) + (DELAY + (ABSOLUTE + (COND (B == 1'b0) && (S == 1'b0) (IOPATH A Z (0.000:0.037:0.037) (0.000:0.058:0.058))) + (COND (B == 1'b1) && (S == 1'b0) (IOPATH A Z (0.000:0.037:0.037) (0.000:0.058:0.058))) + (COND (A == 1'b0) && (S == 1'b1) (IOPATH B Z (0.000:0.035:0.035) (0.000:0.056:0.056))) + (COND (A == 1'b1) && (S == 1'b1) (IOPATH B Z (0.000:0.035:0.035) (0.000:0.056:0.056))) + (COND (A == 1'b0) && (B == 1'b1) (IOPATH S Z (0.000:0.047:0.047) (0.000:0.073:0.073))) + (COND (A == 1'b1) && (B == 1'b0) (IOPATH S Z (0.000:0.072:0.072) (0.000:0.064:0.064))) ) ) ) diff --git a/tests/gates.v b/tests/gates.v index 2fa07cd..0925bf6 100644 --- a/tests/gates.v +++ b/tests/gates.v @@ -1,11 +1,15 @@ -module gates (a, b, o0, o1 ); +module gates (a, b, c, o0, o1, o2, o3 ); input a; input b; +input c; output o0; output o1; +output o2; +output o3; -AND2X1 andgate (.IN1 ( a ) , .IN2 ( b ) , .Q ( o0 ) ) ; -NAND2X1 nandgate (.IN1 ( a ) , .IN2 ( b ) , .QN ( o1 ) ) ; - +AND2_X1 andgate (.A1 ( a ) , .A2 ( b ) , .ZN ( o0 ) ) ; +NAND2_X1 nandgate (.A1 ( a ) , .A2 ( b ) , .ZN ( o1 ) ) ; +OAI21_X1 oai21gate (.B1(a), .B2(b), .A(c), .ZN(o2) ) ; +MUX2_X1 mux2gate (.A(a), .B(b), .S(c), .Z(o3)) ; endmodule \ No newline at end of file diff --git a/tests/test_circuit.py b/tests/test_circuit.py index 5aa3074..d4edcd3 100644 --- a/tests/test_circuit.py +++ b/tests/test_circuit.py @@ -1,9 +1,30 @@ import pickle -from kyupy.circuit import Circuit, Node, Line +from kyupy.circuit import GrowingList, Circuit, Node, Line from kyupy import verilog, bench from kyupy.techlib import SAED32 +def test_growing_list(): + gl = GrowingList() + assert gl.free_idx == 0 + gl[0] = 1 + assert gl.free_idx == 1 + gl[2] = 1 + assert gl.free_idx == 1 + gl[0] = None + assert gl.free_idx == 0 + gl[0] = 1 + assert gl.free_idx == 1 + gl[1] = 1 + assert gl.free_idx == 3 + gl.append(1) + assert gl.free_idx == 4 + gl[2] = None + assert gl.free_idx == 2 + gl[2] = 1 + gl[1] = None + assert gl.free_idx == 1 + def test_lines(): c = Circuit() n1 = Node(c, 'n1') diff --git a/tests/test_logic_sim.py b/tests/test_logic_sim.py index 5849310..85a11ff 100644 --- a/tests/test_logic_sim.py +++ b/tests/test_logic_sim.py @@ -1,6 +1,6 @@ import numpy as np -from kyupy.logic_sim import LogicSim +from kyupy.logic_sim import LogicSim, LogicSim6V from kyupy import bench, logic, sim from kyupy.logic import mvarray, bparray, bp_to_mv, mv_to_bp @@ -94,6 +94,30 @@ def test_4v(): '--0XX', '--X1X', '--XXX', '--XXX')) +def test_6v(): + c = bench.parse('input(x, y) output(a, o, n, xo, no) a=AND2(x,y) o=OR2(x,y) n=INV1(x) xo=XOR2(x,y) no=NOR2(x,y)') + s = LogicSim6V(c, 36) + assert s.s_len == 7 + mva = mvarray( + '0000101', '0101110', '0R0R1RF', '0F0F1FR', '0P0P1PN', '0N0N1NP', + '1001010', '1111000', '1RR10F0', '1FF10R0', '1PP10N0', '1NN10P0', + 'R00RFRF', 'R1R1FF0', 'RRRRFPF', 'RFPNFNP', 'RPPRFRF', 'RNRNFFP', + 'F00FRFR', 'F1F1RR0', 'FRPNRNP', 'FFFFRPR', 'FPPFRFR', 'FNFNRRP', + 'P00PNPN', 'P1P1NN0', 'PRPRNRF', 'PFPFNFR', 'PPPPNPN', 'PNPNNNP', + 'N00NPNP', 'N1N1PP0', 'NRRNPFP', 'NFFNPRP', 'NPPNPNP', 'NNNNPPP') + tests = np.copy(mva) + tests[2:] = logic.ZERO + s.s[0] = tests + s.s_to_c() + s.c_prop() + s.c_to_s() + resp = s.s[1].copy() + + exp_resp = np.copy(mva) + exp_resp[:2] = logic.ZERO + np.testing.assert_allclose(resp, exp_resp) + + def test_8v(): c = bench.parse('input(x, y) output(a, o, n, xo) a=and(x,y) o=or(x,y) n=not(x) xo=xor(x,y)') s = LogicSim(c, 64, m=8) @@ -173,3 +197,64 @@ def test_b01(mydir): s.c_prop() s.c_to_s() bp_to_mv(s.s[1]) + + +def sim_and_compare(c, test_resp, m=8): + tests, resp = test_resp + lsim = LogicSim(c, m=m, sims=tests.shape[1]) + lsim.s[0] = logic.mv_to_bp(tests) + lsim.s_to_c() + lsim.c_prop() + lsim.c_to_s() + resp_sim = logic.bp_to_mv(lsim.s[1])[:,:tests.shape[1]] + idxs, pats = np.nonzero(((resp == logic.ONE) & (resp_sim != logic.ONE)) | ((resp == logic.ZERO) & (resp_sim != logic.ZERO))) + for i, (idx, pat) in enumerate(zip(idxs, pats)): + if i >= 10: + print(f'...') + break + print(f'mismatch pattern:{pat} ppio:{idx} exp:{logic.mv_str(resp[idx,pat])} act:{logic.mv_str(resp_sim[idx,pat])}') + assert len(idxs) == 0 + +def sim_and_compare_6v(c, test_resp): + tests, resp = test_resp + lsim = LogicSim6V(c, sims=tests.shape[1]) + lsim.s[0] = tests + lsim.s_to_c() + lsim.c_prop() + lsim.c_to_s() + resp_sim = lsim.s[1] + idxs, pats = np.nonzero(((resp == logic.ONE) & (resp_sim != logic.ONE)) | ((resp == logic.ZERO) & (resp_sim != logic.ZERO))) + for i, (idx, pat) in enumerate(zip(idxs, pats)): + if i >= 10: + print(f'...') + break + print(f'mismatch pattern:{pat} ppio:{idx} exp:{logic.mv_str(resp[idx,pat])} act:{logic.mv_str(resp_sim[idx,pat])}') + assert len(idxs) == 0 + + +def test_b15_2ig_sa_2v(b15_2ig_circuit_resolved, b15_2ig_sa_nf_test_resp): + sim_and_compare(b15_2ig_circuit_resolved, b15_2ig_sa_nf_test_resp, m=2) + + +def test_b15_2ig_sa_4v(b15_2ig_circuit_resolved, b15_2ig_sa_nf_test_resp): + sim_and_compare(b15_2ig_circuit_resolved, b15_2ig_sa_nf_test_resp, m=4) + + +def test_b15_2ig_sa_6v(b15_2ig_circuit_resolved, b15_2ig_sa_nf_test_resp): + sim_and_compare_6v(b15_2ig_circuit_resolved, b15_2ig_sa_nf_test_resp) + + +def test_b15_2ig_sa_8v(b15_2ig_circuit_resolved, b15_2ig_sa_nf_test_resp): + sim_and_compare(b15_2ig_circuit_resolved, b15_2ig_sa_nf_test_resp, m=8) + + +def test_b15_4ig_sa_2v(b15_4ig_circuit_resolved, b15_4ig_sa_rf_test_resp): + sim_and_compare(b15_4ig_circuit_resolved, b15_4ig_sa_rf_test_resp, m=2) + + +def test_b15_4ig_sa_4v(b15_4ig_circuit_resolved, b15_4ig_sa_rf_test_resp): + sim_and_compare(b15_4ig_circuit_resolved, b15_4ig_sa_rf_test_resp, m=4) + + +def test_b15_4ig_sa_8v(b15_4ig_circuit_resolved, b15_4ig_sa_rf_test_resp): + sim_and_compare(b15_4ig_circuit_resolved, b15_4ig_sa_rf_test_resp, m=8) diff --git a/tests/test_sdf.py b/tests/test_sdf.py index e94285b..9ca4627 100644 --- a/tests/test_sdf.py +++ b/tests/test_sdf.py @@ -2,7 +2,7 @@ import numpy as np from kyupy import sdf, verilog, bench from kyupy.wave_sim import WaveSim, TMAX, TMIN -from kyupy.techlib import SAED32, SAED90 +from kyupy.techlib import SAED32, NANGATE45 def test_parse(): test = ''' @@ -80,9 +80,9 @@ def test_b15(mydir): def test_gates(mydir): - c = verilog.load(mydir / 'gates.v', tlib=SAED90) + c = verilog.load(mydir / 'gates.v', tlib=NANGATE45) df = sdf.load(mydir / 'gates.sdf') - lt = df.iopaths(c, tlib=SAED90)[1] + lt = df.iopaths(c, tlib=NANGATE45)[1] nand_a = c.cells['nandgate'].ins[0] nand_b = c.cells['nandgate'].ins[1] and_a = c.cells['andgate'].ins[0] diff --git a/tests/test_verilog.py b/tests/test_verilog.py index 87bbe73..816c12b 100644 --- a/tests/test_verilog.py +++ b/tests/test_verilog.py @@ -1,5 +1,5 @@ from kyupy import verilog -from kyupy.techlib import SAED90, SAED32 +from kyupy.techlib import SAED90, SAED32, NANGATE45 def test_b01(mydir): with open(mydir / 'b01.v', 'r') as f: @@ -26,12 +26,12 @@ def test_b15(mydir): def test_gates(mydir): - c = verilog.load(mydir / 'gates.v', tlib=SAED90) - assert len(c.nodes) == 10 - assert len(c.lines) == 10 + c = verilog.load(mydir / 'gates.v', tlib=NANGATE45) + assert len(c.nodes) == 18 + assert len(c.lines) == 21 stats = c.stats - assert stats['input'] == 2 - assert stats['output'] == 2 + assert stats['input'] == 3 + assert stats['output'] == 4 assert stats['__seq__'] == 0 diff --git a/tests/test_wave_sim.py b/tests/test_wave_sim.py index 9a09b32..2510c28 100644 --- a/tests/test_wave_sim.py +++ b/tests/test_wave_sim.py @@ -5,22 +5,56 @@ from kyupy.logic_sim import LogicSim from kyupy import logic, bench, sim from kyupy.logic import mvarray +def test_xnor2_delays(): + op = (sim.XNOR2, 2, 0, 1, 3, 3, -1, 0, 0) + #op = (0b0111, 4, 0, 1) + c = np.full((4*16, 1), TMAX, dtype=np.float32) # 4 waveforms of capacity 16 + c_locs = np.zeros((4,), dtype='int') + c_caps = np.zeros((4,), dtype='int') + ebuf = np.zeros((4, 1, 2), dtype=np.int32) + + for i in range(4): c_locs[i], c_caps[i] = i*16, 16 # 1:1 mapping + delays = np.zeros((1, 4, 2, 2)) + delays[0, 0, 0, 0] = 0.031 # A rise -> Z rise + delays[0, 0, 0, 1] = 0.027 # A rise -> Z fall + delays[0, 0, 1, 0] = 0.033 # A fall -> Z rise + delays[0, 0, 1, 1] = 0.037 # A fall -> Z fall + delays[0, 1, 0, 0] = 0.032 # B rise -> Z rise + delays[0, 1, 0, 1] = 0.030 # B rise -> Z fall + delays[0, 1, 1, 0] = 0.038 # B fall -> Z rise + delays[0, 1, 1, 1] = 0.036 # B fall -> Z fall + + simctl_int = np.asarray([0], dtype=np.int32) + + def wave_assert(inputs, output): + for i, a in zip(inputs, c.reshape(-1,16)): a[:len(i)] = i + wave_eval_cpu(op, c, c_locs, c_caps, ebuf, 0, delays, simctl_int, 0, 0) + for i, v in enumerate(output): np.testing.assert_allclose(c.reshape(-1,16)[2,i], v) + + wave_assert([[TMIN,TMAX],[TMIN,TMAX]], [TMIN,TMAX]) # XNOR(1,1) => 1 + wave_assert([[TMAX,TMAX],[TMIN,TMAX]], [TMAX]) # XNOR(0,1) => 0 + # using Afall/Zfall for pulse length, bug: was using Arise/Zfall + #wave_assert([[0.07, 0.10, TMAX], [0.0, TMAX]], [TMIN, 0.03, 0.101, 0.137, TMAX]) + wave_assert([[0.07, 0.10, TMAX], [0.0, TMAX]], [TMIN, 0.03, TMAX]) + wave_assert([[0.06, 0.10, TMAX], [0.0, TMAX]], [TMIN, 0.03, 0.091, 0.137, TMAX]) + def test_nand_delays(): op = (sim.NAND4, 4, 0, 1, 2, 3, -1, 0, 0) #op = (0b0111, 4, 0, 1) - c = np.full((5*16, 1), TMAX) # 5 waveforms of capacity 16 + c = np.full((5*16, 1), TMAX, dtype=np.float32) # 5 waveforms of capacity 16 c_locs = np.zeros((5,), dtype='int') c_caps = np.zeros((5,), dtype='int') + ebuf = np.zeros((4, 1, 2), dtype=np.int32) for i in range(5): c_locs[i], c_caps[i] = i*16, 16 # 1:1 mapping # SDF specifies IOPATH delays with respect to output polarity # SDF pulse rejection value is determined by IOPATH causing last transition and polarity of last transition delays = np.zeros((1, 5, 2, 2)) - delays[0, 0, 0, 0] = 0.1 # A -> Z rise delay - delays[0, 0, 0, 1] = 0.2 # A -> Z fall delay - delays[0, 0, 1, 0] = 0.1 # A -> Z negative pulse limit (terminate in rising Z) - delays[0, 0, 1, 1] = 0.2 # A -> Z positive pulse limit + delays[0, 0, 0, 0] = 0.1 # A rise -> Z rise + delays[0, 0, 0, 1] = 0.2 # A rise -> Z fall + delays[0, 0, 1, 0] = 0.1 # A fall -> Z rise + delays[0, 0, 1, 1] = 0.2 # A fall -> Z fall delays[0, 1, :, 0] = 0.3 # as above for B -> Z delays[0, 1, :, 1] = 0.4 delays[0, 2, :, 0] = 0.5 # as above for C -> Z @@ -32,7 +66,7 @@ def test_nand_delays(): def wave_assert(inputs, output): for i, a in zip(inputs, c.reshape(-1,16)): a[:len(i)] = i - wave_eval_cpu(op, c, c_locs, c_caps, 0, delays, simctl_int) + wave_eval_cpu(op, c, c_locs, c_caps, ebuf, 0, delays, simctl_int, 0, 0) for i, v in enumerate(output): np.testing.assert_allclose(c.reshape(-1,16)[4,i], v) wave_assert([[TMAX,TMAX],[TMAX,TMAX],[TMIN,TMAX],[TMIN,TMAX]], [TMIN,TMAX]) # NAND(0,0,1,1) => 1 @@ -145,7 +179,7 @@ def compare_to_logic_sim(wsim: WaveSim): lsim.s_to_c() lsim.c_prop() lsim.c_to_s() - exp = logic.bp_to_mv(lsim.s[1]) + exp = logic.bp_to_mv(lsim.s[1])[:,:tests.shape[-1]] resp[resp == logic.PPULSE] = logic.ZERO resp[resp == logic.NPULSE] = logic.ONE @@ -156,13 +190,13 @@ def compare_to_logic_sim(wsim: WaveSim): np.testing.assert_allclose(resp, exp) -def test_b15(b15_2ig_circuit, b15_2ig_delays): - compare_to_logic_sim(WaveSim(b15_2ig_circuit, b15_2ig_delays, 8)) +def test_b15(b15_2ig_circuit_resolved, b15_2ig_delays): + compare_to_logic_sim(WaveSim(b15_2ig_circuit_resolved, b15_2ig_delays, 8)) -def test_b15_strip_forks(b15_2ig_circuit, b15_2ig_delays): - compare_to_logic_sim(WaveSim(b15_2ig_circuit, b15_2ig_delays, 8, strip_forks=True)) +def test_b15_strip_forks(b15_2ig_circuit_resolved, b15_2ig_delays): + compare_to_logic_sim(WaveSim(b15_2ig_circuit_resolved, b15_2ig_delays, 8, strip_forks=True)) -def test_b15_cuda(b15_2ig_circuit, b15_2ig_delays): - compare_to_logic_sim(WaveSimCuda(b15_2ig_circuit, b15_2ig_delays, 8, strip_forks=True)) +def test_b15_cuda(b15_2ig_circuit_resolved, b15_2ig_delays): + compare_to_logic_sim(WaveSimCuda(b15_2ig_circuit_resolved, b15_2ig_delays, 8, strip_forks=True))