Compare commits

..

No commits in common. 'devel' and 'main' have entirely different histories.
devel ... main

  1. 2
      docs/conf.py
  2. 2
      setup.py
  3. 18
      src/kyupy/__init__.py
  4. 97
      src/kyupy/circuit.py
  5. 18
      src/kyupy/logic.py
  6. 162
      src/kyupy/logic_sim.py
  7. 37
      src/kyupy/sdf.py
  8. 132
      src/kyupy/sim.py
  9. 32
      src/kyupy/stil.py
  10. 168
      src/kyupy/techlib.py
  11. 9
      src/kyupy/verilog.py
  12. 173
      src/kyupy/wave_sim.py
  13. BIN
      tests/b15_4ig.sa_rf.stil.gz
  14. 36
      tests/conftest.py
  15. 39
      tests/gates.sdf
  16. 12
      tests/gates.v
  17. 23
      tests/test_circuit.py
  18. 87
      tests/test_logic_sim.py
  19. 6
      tests/test_sdf.py
  20. 12
      tests/test_verilog.py
  21. 60
      tests/test_wave_sim.py

2
docs/conf.py

@ -24,7 +24,7 @@ copyright = '2020-2023, Stefan Holst'
author = 'Stefan Holst' author = 'Stefan Holst'
# The full version, including alpha/beta/rc tags # The full version, including alpha/beta/rc tags
release = '0.0.5' release = '0.0.4'
# -- General configuration --------------------------------------------------- # -- General configuration ---------------------------------------------------

2
setup.py

@ -5,7 +5,7 @@ with open('README.rst', 'r') as f:
setup( setup(
name='kyupy', name='kyupy',
version='0.0.5', version='0.0.4',
description='High-performance processing and analysis of non-hierarchical VLSI designs', description='High-performance processing and analysis of non-hierarchical VLSI designs',
long_description=long_description, long_description=long_description,
long_description_content_type='text/x-rst', long_description_content_type='text/x-rst',

18
src/kyupy/__init__.py

@ -57,18 +57,6 @@ def hr_bytes(nbytes):
multiplier += 1 multiplier += 1
return f'{nbytes:.1f}{["", "ki", "Mi", "Gi", "Ti", "Pi"][multiplier]}B' return f'{nbytes:.1f}{["", "ki", "Mi", "Gi", "Ti", "Pi"][multiplier]}B'
def eng(number):
"""Formats a given number using engineering notation."""
exponent = 0
if abs(number) < 1:
while abs(number) >= 1000:
number *= 1000
exponent -= 3
else:
while abs(number) >= 1000:
number /= 1000
exponent += 3
return f'{number:.0f}' + (f'e{exponent}' if exponent != 0 else '')
def hr_time(seconds): def hr_time(seconds):
"""Formats a given time interval for human readability.""" """Formats a given time interval for human readability."""
@ -150,10 +138,10 @@ class Log:
self._limit = limit self._limit = limit
def stop_limit(self): def stop_limit(self):
self._limit = -1
if self.filtered > 0: if self.filtered > 0:
self.info(f'{self.filtered} more messages (filtered).') log.info(f'{self.filtered} more messages (filtered).')
self.filtered = 0 self.filtered = 0
self._limit = -1
def __getstate__(self): def __getstate__(self):
return {'elapsed': time.perf_counter() - self.start} return {'elapsed': time.perf_counter() - self.start}
@ -161,8 +149,6 @@ class Log:
def __setstate__(self, state): def __setstate__(self, state):
self.logfile = sys.stdout self.logfile = sys.stdout
self.indent = 0 self.indent = 0
self._limit = -1
self.filtered = 0
self.start = time.perf_counter() - state['elapsed'] self.start = time.perf_counter() - state['elapsed']
def write(self, s, indent=0): def write(self, s, indent=0):

97
src/kyupy/circuit.py

@ -10,40 +10,20 @@ Circuit graphs also define an ordering of inputs, outputs and other nodes to eas
""" """
from __future__ import annotations
from collections import deque, defaultdict from collections import deque, defaultdict
import re import re
from typing import Union
import numpy as np import numpy as np
class GrowingList(list): class GrowingList(list):
def __setitem__(self, index, value): def __setitem__(self, index, value):
if value is None: self.has_nones = True if index >= len(self):
if index == len(self): return super().append(value) self.extend([None] * (index + 1 - len(self)))
if index > len(self):
super().extend([None] * (index + 1 - len(self)))
self.has_nones = True
super().__setitem__(index, value) super().__setitem__(index, value)
def __getitem__(self, index): def free_index(self):
if isinstance(index, slice): return super().__getitem__(index) return next((i for i, x in enumerate(self) if x is None), len(self))
return super().__getitem__(index) if index < len(self) else None
@property
def free_idx(self):
fi = len(self)
if hasattr(self, 'has_nones') and self.has_nones:
fi = next((i for i, x in enumerate(self) if x is None), len(self))
self.has_nones = fi < len(self)
return fi
def without_nones(self):
for item in self:
if item is not None:
yield item
class IndexList(list): class IndexList(list):
@ -96,10 +76,10 @@ class Node:
by allocating an array or list :code:`my_data` of length :code:`len(n.circuit.nodes)` and by allocating an array or list :code:`my_data` of length :code:`len(n.circuit.nodes)` and
accessing it by :code:`my_data[n.index]` or simply by :code:`my_data[n]`. accessing it by :code:`my_data[n.index]` or simply by :code:`my_data[n]`.
""" """
self.ins: GrowingList[Line] = GrowingList() self.ins = GrowingList()
"""A list of input connections (:class:`Line` objects). """A list of input connections (:class:`Line` objects).
""" """
self.outs: GrowingList[Line] = GrowingList() self.outs = GrowingList()
"""A list of output connections (:class:`Line` objects). """A list of output connections (:class:`Line` objects).
""" """
@ -155,7 +135,7 @@ class Line:
Use the explicit case only if connections to specific pins are required. Use the explicit case only if connections to specific pins are required.
It may overwrite any previous line references in the connection list of the nodes. It may overwrite any previous line references in the connection list of the nodes.
""" """
def __init__(self, circuit: Circuit, driver: Union[Node, tuple[Node, int]], reader: Union[Node, tuple[Node, int]]): def __init__(self, circuit, driver, reader):
self.circuit = circuit self.circuit = circuit
"""The :class:`Circuit` object the line is part of. """The :class:`Circuit` object the line is part of.
""" """
@ -167,7 +147,7 @@ class Line:
by allocating an array or list :code:`my_data` of length :code:`len(l.circuit.lines)` and by allocating an array or list :code:`my_data` of length :code:`len(l.circuit.lines)` and
accessing it by :code:`my_data[l.index]` or simply by :code:`my_data[l]`. accessing it by :code:`my_data[l.index]` or simply by :code:`my_data[l]`.
""" """
if not isinstance(driver, tuple): driver = (driver, driver.outs.free_idx) if not isinstance(driver, tuple): driver = (driver, driver.outs.free_index())
self.driver = driver[0] self.driver = driver[0]
"""The :class:`Node` object that drives this line. """The :class:`Node` object that drives this line.
""" """
@ -177,7 +157,7 @@ class Line:
This is the position in the list :py:attr:`Node.outs` of the driving node this line referenced from: This is the position in the list :py:attr:`Node.outs` of the driving node this line referenced from:
:code:`self.driver.outs[self.driver_pin] == self`. :code:`self.driver.outs[self.driver_pin] == self`.
""" """
if not isinstance(reader, tuple): reader = (reader, reader.ins.free_idx) if not isinstance(reader, tuple): reader = (reader, reader.ins.free_index())
self.reader = reader[0] self.reader = reader[0]
"""The :class:`Node` object that reads this line. """The :class:`Node` object that reads this line.
""" """
@ -312,7 +292,7 @@ class Circuit:
def _locs(self, prefix, nodes): def _locs(self, prefix, nodes):
d_top = dict() d_top = dict()
for i, n in enumerate(nodes): for i, n in enumerate(nodes):
if m := re.match(fr'({re.escape(prefix)}.*?)((?:[\d_\[\]])*$)', n.name): if m := re.match(fr'({prefix}.*?)((?:[\d_\[\]])*$)', n.name):
path = [m[1]] + [int(v) for v in re.split(r'[_\[\]]+', m[2]) if len(v) > 0] path = [m[1]] + [int(v) for v in re.split(r'[_\[\]]+', m[2]) if len(v) > 0]
d = d_top d = d_top
for j in path[:-1]: for j in path[:-1]:
@ -354,16 +334,15 @@ class Circuit:
def get_or_add_fork(self, name): def get_or_add_fork(self, name):
return self.forks[name] if name in self.forks else Node(self, name) return self.forks[name] if name in self.forks else Node(self, name)
def remove_dangling_nodes(self, root_node:Node, keep=[]): def remove_dangling_nodes(self, root_node:Node):
if len([l for l in root_node.outs if l is not None]) > 0: return if len([l for l in root_node.outs if l is not None]) > 0: return
lines = [l for l in root_node.ins if l is not None] lines = [l for l in root_node.ins if l is not None]
drivers = [l.driver for l in lines] drivers = [l.driver for l in lines]
if root_node in keep: return
root_node.remove() root_node.remove()
for l in lines: for l in lines:
l.remove() l.remove()
for d in drivers: for d in drivers:
self.remove_dangling_nodes(d, keep=keep) self.remove_dangling_nodes(d)
def eliminate_1to1_forks(self): def eliminate_1to1_forks(self):
"""Removes all forks that drive only one node. """Removes all forks that drive only one node.
@ -391,21 +370,6 @@ class Circuit:
in_line.reader_pin = out_reader_pin in_line.reader_pin = out_reader_pin
in_line.reader.ins[in_line.reader_pin] = in_line in_line.reader.ins[in_line.reader_pin] = in_line
def remove_forks(self):
ios = set(self.io_nodes)
for n in list(self.forks.values()):
if n in ios: continue
d = None
if (l := n.ins[0]) is not None:
d = l.driver
l.remove()
for l in list(n.outs):
if l is None: continue
r, rp = l.reader, l.reader_pin
l.remove()
if d is not None: Line(self, d, (r, rp))
n.remove()
def substitute(self, node, impl): def substitute(self, node, impl):
"""Replaces a given node with the given implementation circuit. """Replaces a given node with the given implementation circuit.
@ -464,7 +428,7 @@ class Circuit:
for l, ll in zip(impl_out_lines, node_out_lines): # connect outputs for l, ll in zip(impl_out_lines, node_out_lines): # connect outputs
if ll is None: if ll is None:
if l.driver in node_map: if l.driver in node_map:
self.remove_dangling_nodes(node_map[l.driver], keep=ios) self.remove_dangling_nodes(node_map[l.driver])
continue continue
if len(l.reader.outs) > 0: # output is also read by impl. circuit, connect to fork. if len(l.reader.outs) > 0: # output is also read by impl. circuit, connect to fork.
ll.driver = node_map[l.reader] ll.driver = node_map[l.reader]
@ -483,21 +447,6 @@ class Circuit:
if n.kind in tlib.cells: if n.kind in tlib.cells:
self.substitute(n, tlib.cells[n.kind][0]) self.substitute(n, tlib.cells[n.kind][0])
def remove_constants(self):
c1gen = None
for n in self.nodes:
if n.kind == '__const0__': # just remove, unconnected inputs are defined 0.
for l in n.outs:
l.remove()
n.remove()
elif n.kind == '__const1__':
if c1gen is None: c1gen = Node(self, '__const1gen__', 'INV1') # one unique const 1 generator
for l in n.outs:
r, rp = l.reader, l.reader_pin
l.remove()
Line(self, c1gen, (r, rp))
n.remove()
def copy(self): def copy(self):
"""Returns a deep copy of the circuit. """Returns a deep copy of the circuit.
""" """
@ -552,15 +501,14 @@ class Circuit:
substrings 'dff' or 'latch' are yielded first. substrings 'dff' or 'latch' are yielded first.
""" """
visit_count = np.zeros(len(self.nodes), dtype=np.uint32) visit_count = np.zeros(len(self.nodes), dtype=np.uint32)
start = set(n for n in self.nodes if len(n.ins) == 0 or 'dff' in n.kind.lower() or 'latch' in n.kind.lower()) queue = deque(n for n in self.nodes if len(n.ins) == 0 or 'dff' in n.kind.lower() or 'latch' in n.kind.lower())
queue = deque(start)
while len(queue) > 0: while len(queue) > 0:
n = queue.popleft() n = queue.popleft()
for line in n.outs: for line in n.outs:
if line is None: continue if line is None: continue
succ = line.reader succ = line.reader
visit_count[succ] += 1 visit_count[succ] += 1
if visit_count[succ] == len(succ.ins) and succ not in start: if visit_count[succ] == len(succ.ins) and 'dff' not in succ.kind.lower() and 'latch' not in succ.kind.lower():
queue.append(succ) queue.append(succ)
yield n yield n
@ -615,21 +563,6 @@ class Circuit:
if marks[n]: if marks[n]:
yield n yield n
def fanout(self, origin_nodes):
"""Generator function to iterate over the fan-out cone of a given list of origin nodes.
Nodes are yielded in topological order.
"""
marks = [False] * len(self.nodes)
for n in origin_nodes:
marks[n] = True
for n in self.topological_order():
if not marks[n]:
for line in n.ins.without_nones():
marks[n] |= marks[line.driver]
if marks[n]:
yield n
def fanout_free_regions(self): def fanout_free_regions(self):
for stem in self.reversed_topological_order(): for stem in self.reversed_topological_order():
if len(stem.outs) == 1 and 'dff' not in stem.kind.lower(): continue if len(stem.outs) == 1 and 'dff' not in stem.kind.lower(): continue

18
src/kyupy/logic.py

@ -241,8 +241,6 @@ def mv_latch(d, t, q_prev, out=None):
def mv_transition(init, final, out=None): def mv_transition(init, final, out=None):
"""Computes the logic transitions from the initial values of ``init`` to the final values of ``final``. """Computes the logic transitions from the initial values of ``init`` to the final values of ``final``.
Pulses in the input data are ignored. If any of the inputs are ``UNKNOWN``, the result is ``UNKNOWN``. Pulses in the input data are ignored. If any of the inputs are ``UNKNOWN``, the result is ``UNKNOWN``.
If init is ``UNASSIGNED``, the result is the final value of ``final``.
If final is ``UNASSIGNED``, the result is the initial value of ``init``.
If both inputs are ``UNASSIGNED``, the result is ``UNASSIGNED``. If both inputs are ``UNASSIGNED``, the result is ``UNASSIGNED``.
:param init: A multi-valued array. :param init: A multi-valued array.
@ -253,9 +251,7 @@ def mv_transition(init, final, out=None):
out = out or np.empty(np.broadcast(init, final).shape, dtype=np.uint8) out = out or np.empty(np.broadcast(init, final).shape, dtype=np.uint8)
out[...] = (init & 0b010) | (final & 0b001) out[...] = (init & 0b010) | (final & 0b001)
out[...] |= ((out << 1) ^ (out << 2)) & 0b100 out[...] |= ((out << 1) ^ (out << 2)) & 0b100
out[...] = np.choose(init == UNASSIGNED, [out, (final & 0b001) * ONE]) unknown = (init == UNKNOWN) | (init == UNASSIGNED) | (final == UNKNOWN) | (final == UNASSIGNED)
out[...] = np.choose(final == UNASSIGNED, [out, ((init & 0b010) >> 1) * ONE])
unknown = (init == UNKNOWN) | (final == UNKNOWN)
unassigned = (init == UNASSIGNED) & (final == UNASSIGNED) unassigned = (init == UNASSIGNED) & (final == UNASSIGNED)
np.putmask(out, unknown, UNKNOWN) np.putmask(out, unknown, UNKNOWN)
np.putmask(out, unassigned, UNASSIGNED) np.putmask(out, unassigned, UNASSIGNED)
@ -269,18 +265,6 @@ def mv_to_bp(mva):
return np.packbits(unpackbits(mva)[...,:3], axis=-2, bitorder='little').swapaxes(-1,-2) return np.packbits(unpackbits(mva)[...,:3], axis=-2, bitorder='little').swapaxes(-1,-2)
def mv_init(mva):
"""Returns the initial binary values for mva.
"""
return (mva>>1) & ((mva>>2)|mva) & 1
def mv_final(mva):
"""Returns the final binary value of mva.
"""
return mva & ((mva>>2)|(mva>>1)) & 1
def bparray(*a): def bparray(*a):
"""Converts (lists of) Boolean values or strings into a bit-parallel array. """Converts (lists of) Boolean values or strings into a bit-parallel array.

162
src/kyupy/logic_sim.py

@ -10,10 +10,9 @@ import math
import numpy as np import numpy as np
from . import numba, logic, hr_bytes, sim, eng, cdiv from . import numba, logic, hr_bytes, sim
from .circuit import Circuit from .circuit import Circuit
class LogicSim(sim.SimOps): class LogicSim(sim.SimOps):
"""A bit-parallel naïve combinational simulator for 2-, 4-, or 8-valued logic. """A bit-parallel naïve combinational simulator for 2-, 4-, or 8-valued logic.
@ -29,7 +28,7 @@ class LogicSim(sim.SimOps):
self.m = m self.m = m
self.mdim = math.ceil(math.log2(m)) self.mdim = math.ceil(math.log2(m))
self.sims = sims self.sims = sims
nbytes = cdiv(sims, 8) nbytes = (sims - 1) // 8 + 1
self.c = np.zeros((self.c_len, self.mdim, nbytes), dtype=np.uint8) self.c = np.zeros((self.c_len, self.mdim, nbytes), dtype=np.uint8)
self.s = np.zeros((2, self.s_len, 3, nbytes), dtype=np.uint8) self.s = np.zeros((2, self.s_len, 3, nbytes), dtype=np.uint8)
@ -45,14 +44,14 @@ class LogicSim(sim.SimOps):
self.s[:,:,1,:] = 255 # unassigned self.s[:,:,1,:] = 255 # unassigned
def __repr__(self): def __repr__(self):
return f'{{name: "{self.circuit.name}", sims: {self.sims}, m: {self.m}, c_bytes: {eng(self.c.nbytes)}}}' return f'{{name: "{self.circuit.name}", sims: {self.sims}, m: {self.m}, c_bytes: {self.c.nbytes}}}'
def s_to_c(self): def s_to_c(self):
"""Copies the values from ``s[0]`` the inputs of the combinational portion. """Copies the values from ``s[0]`` the inputs of the combinational portion.
""" """
self.c[self.pippi_c_locs] = self.s[0, self.pippi_s_locs, :self.mdim] self.c[self.pippi_c_locs] = self.s[0, self.pippi_s_locs, :self.mdim]
def c_prop(self, sims=None, inject_cb=None, flip_line=-1, flip_mask=None): def c_prop(self, inject_cb=None):
"""Propagate the input values through the combinational circuit towards the outputs. """Propagate the input values through the combinational circuit towards the outputs.
Performs all logic operations in topological order. Performs all logic operations in topological order.
@ -68,17 +67,10 @@ class LogicSim(sim.SimOps):
t1 = self.c_locs[self.tmp2_idx] t1 = self.c_locs[self.tmp2_idx]
if self.m == 2: if self.m == 2:
if inject_cb is None: if inject_cb is None:
if flip_mask is None: _prop_cpu(self.ops, self.c_locs, self.c)
flip_mask = np.full(self.c.shape[-1], 255, dtype=np.uint8)
else:
if len(flip_mask) < self.c.shape[-1]:
flip_mask2 = np.full(self.c.shape[-1], 0, dtype=np.uint8)
flip_mask2[:len(flip_mask)] = flip_mask
flip_mask = flip_mask2
_prop_cpu(self.ops, self.c_locs, self.c, int(flip_line), flip_mask)
else: else:
for op, o0l, i0l, i1l, i2l, i3l in self.ops[:,:6]: for op, o0, i0, i1, i2, i3 in self.ops[:,:6]:
o0, i0, i1, i2, i3 = [self.c_locs[x] for x in (o0l, i0l, i1l, i2l, i3l)] o0, i0, i1, i2, i3 = [self.c_locs[x] for x in (o0, i0, i1, i2, i3)]
if op == sim.BUF1: self.c[o0]=self.c[i0] if op == sim.BUF1: self.c[o0]=self.c[i0]
elif op == sim.INV1: self.c[o0] = ~self.c[i0] elif op == sim.INV1: self.c[o0] = ~self.c[i0]
elif op == sim.AND2: self.c[o0] = self.c[i0] & self.c[i1] elif op == sim.AND2: self.c[o0] = self.c[i0] & self.c[i1]
@ -113,10 +105,10 @@ class LogicSim(sim.SimOps):
elif op == sim.OAI211:self.c[o0] = ~((self.c[i0] | self.c[i1]) & self.c[i2] & self.c[i3]) elif op == sim.OAI211:self.c[o0] = ~((self.c[i0] | self.c[i1]) & self.c[i2] & self.c[i3])
elif op == sim.MUX21: self.c[o0] = (self.c[i0] & ~self.c[i2]) | (self.c[i1] & self.c[i2]) elif op == sim.MUX21: self.c[o0] = (self.c[i0] & ~self.c[i2]) | (self.c[i1] & self.c[i2])
else: print(f'unknown op {op}') else: print(f'unknown op {op}')
inject_cb(o0l, self.c[o0]) inject_cb(o0, self.s[o0])
elif self.m == 4: elif self.m == 4:
for op, o0l, i0l, i1l, i2l, i3l in self.ops[:,:6]: for op, o0, i0, i1, i2, i3 in self.ops[:,:6]:
o0, i0, i1, i2, i3 = [self.c_locs[x] for x in (o0l, i0l, i1l, i2l, i3l)] o0, i0, i1, i2, i3 = [self.c_locs[x] for x in (o0, i0, i1, i2, i3)]
if op == sim.BUF1: self.c[o0]=self.c[i0] if op == sim.BUF1: self.c[o0]=self.c[i0]
elif op == sim.INV1: logic.bp4v_not(self.c[o0], self.c[i0]) elif op == sim.INV1: logic.bp4v_not(self.c[o0], self.c[i0])
elif op == sim.AND2: logic.bp4v_and(self.c[o0], self.c[i0], self.c[i1]) elif op == sim.AND2: logic.bp4v_and(self.c[o0], self.c[i0], self.c[i1])
@ -189,10 +181,9 @@ class LogicSim(sim.SimOps):
logic.bp4v_and(self.c[t1], self.c[i1], self.c[i2]) logic.bp4v_and(self.c[t1], self.c[i1], self.c[i2])
logic.bp4v_or(self.c[o0], self.c[t0], self.c[t1]) logic.bp4v_or(self.c[o0], self.c[t0], self.c[t1])
else: print(f'unknown op {op}') else: print(f'unknown op {op}')
if inject_cb is not None: inject_cb(o0l, self.c[o0])
else: else:
for op, o0l, i0l, i1l, i2l, i3l in self.ops[:,:6]: for op, o0, i0, i1, i2, i3 in self.ops[:,:6]:
o0, i0, i1, i2, i3 = [self.c_locs[x] for x in (o0l, i0l, i1l, i2l, i3l)] o0, i0, i1, i2, i3 = [self.c_locs[x] for x in (o0, i0, i1, i2, i3)]
if op == sim.BUF1: self.c[o0]=self.c[i0] if op == sim.BUF1: self.c[o0]=self.c[i0]
elif op == sim.INV1: logic.bp8v_not(self.c[o0], self.c[i0]) elif op == sim.INV1: logic.bp8v_not(self.c[o0], self.c[i0])
elif op == sim.AND2: logic.bp8v_and(self.c[o0], self.c[i0], self.c[i1]) elif op == sim.AND2: logic.bp8v_and(self.c[o0], self.c[i0], self.c[i1])
@ -265,7 +256,7 @@ class LogicSim(sim.SimOps):
logic.bp8v_and(self.c[t1], self.c[i1], self.c[i2]) logic.bp8v_and(self.c[t1], self.c[i1], self.c[i2])
logic.bp8v_or(self.c[o0], self.c[t0], self.c[t1]) logic.bp8v_or(self.c[o0], self.c[t0], self.c[t1])
else: print(f'unknown op {op}') else: print(f'unknown op {op}')
if inject_cb is not None: inject_cb(o0l, self.c[o0]) if inject_cb is not None: inject_cb(o0, self.s[o0])
def c_to_s(self): def c_to_s(self):
"""Copies (captures) the results of the combinational portion to ``s[1]``. """Copies (captures) the results of the combinational portion to ``s[1]``.
@ -305,9 +296,9 @@ class LogicSim(sim.SimOps):
@numba.njit @numba.njit
def _prop_cpu(ops, c_locs, c, flip_line, flip_mask): def _prop_cpu(ops, c_locs, c):
for op, o0l, i0l, i1l, i2l, i3l in ops[:,:6]: for op, o0, i0, i1, i2, i3 in ops[:,:6]:
o0, i0, i1, i2, i3 = [c_locs[x] for x in (o0l, i0l, i1l, i2l, i3l)] o0, i0, i1, i2, i3 = [c_locs[x] for x in (o0, i0, i1, i2, i3)]
if op == sim.BUF1: c[o0]=c[i0] if op == sim.BUF1: c[o0]=c[i0]
elif op == sim.INV1: c[o0] = ~c[i0] elif op == sim.INV1: c[o0] = ~c[i0]
elif op == sim.AND2: c[o0] = c[i0] & c[i1] elif op == sim.AND2: c[o0] = c[i0] & c[i1]
@ -342,124 +333,3 @@ def _prop_cpu(ops, c_locs, c, flip_line, flip_mask):
elif op == sim.OAI211: c[o0] = ~((c[i0] | c[i1]) & c[i2] & c[i3]) elif op == sim.OAI211: c[o0] = ~((c[i0] | c[i1]) & c[i2] & c[i3])
elif op == sim.MUX21: c[o0] = (c[i0] & ~c[i2]) | (c[i1] & c[i2]) elif op == sim.MUX21: c[o0] = (c[i0] & ~c[i2]) | (c[i1] & c[i2])
else: print(f'unknown op {op}') else: print(f'unknown op {op}')
if flip_line >= 0 and o0l == flip_line:
#n = len(flip_mask)
c[o0] = c[o0] ^ flip_mask
class LogicSim6V(sim.SimOps):
"""A bit-parallel naïve combinational simulator for 6-valued logic.
:param circuit: The circuit to simulate.
:param sims: The number of parallel logic simulations to perform.
:param c_reuse: If True, intermediate signal values may get overwritten when not needed anymore to save memory.
:param strip_forks: If True, forks are not included in the simulation model to save memory and simulation time.
"""
def __init__(self, circuit: Circuit, sims: int = 8, c_reuse: bool = False, strip_forks: bool = False):
super().__init__(circuit, c_reuse=c_reuse, strip_forks=strip_forks)
self.sims = sims
nbytes = cdiv(sims, 8)
self.c = np.zeros((self.c_len, 3, nbytes), dtype=np.uint8)
self.s = np.zeros((2, self.s_len, self.sims), dtype=np.uint8)
"""Logic values of the sequential elements (flip-flops) and ports.
It is a pair of arrays in mv storage format:
* ``s[0]`` Assigned values. Simulator will read (P)PI value from here.
* ``s[1]`` Result values. Simulator will write (P)PO values here.
Access this array to assign new values to the (P)PIs or read values from the (P)POs.
"""
def __repr__(self):
return f'{{name: "{self.circuit.name}", sims: {self.sims}, c_bytes: {eng(self.c.nbytes)}}}'
def s_to_c(self):
"""Assigns the values from ``s[0]`` to the inputs of the combinational portion.
"""
self.c[self.pippi_c_locs] = logic.mv_to_bp(self.s[0, self.pippi_s_locs])
def c_prop(self):
c_prop_cpu(self.ops, self.c, self.c_locs, self.tmp_idx, self.tmp2_idx)
def c_to_s(self):
"""Captures the results of the combinational portion into ``s[1]``.
"""
self.s[1, self.poppo_s_locs] = logic.bp_to_mv(self.c[self.poppo_c_locs])[:,:self.sims]
@numba.njit
def c_prop_cpu(ops, c, c_locs, tmp_idx, tmp2_idx):
t0 = c[c_locs[tmp_idx]]
t1 = c[c_locs[tmp2_idx]]
inv_op = np.array([255, 255, 0], dtype=np.uint8)[np.newaxis, :, np.newaxis]
for op, o0l, i0l, i1l, i2l, i3l in ops[:,:6]:
o0, i0, i1, i2, i3 = [c[c_locs[x]] for x in (o0l, i0l, i1l, i2l, i3l)]
if op == sim.BUF1 or op == sim.INV1:
o0[...] = i0
elif op == sim.AND2 or op == sim.NAND2:
o0[0] = i0[0] & i1[0]
o0[1] = i0[1] & i1[1]
o0[2] = (i0[2]&(i1[0]|i1[1]|i1[2])|
i1[2]&(i0[0]|i0[1]|i0[2]))
elif op == sim.AND3 or op == sim.NAND3:
o0[0] = i0[0] & i1[0] & i2[0]
o0[1] = i0[1] & i1[1] & i2[1]
o0[2] = (i0[2]&(i1[0]|i1[1]|i1[2])&(i2[0]|i2[1]|i2[2])|
i1[2]&(i0[0]|i0[1]|i0[2])&(i2[0]|i2[1]|i2[2])|
i2[2]&(i0[0]|i0[1]|i0[2])&(i1[0]|i1[1]|i1[2]))
elif op == sim.AND4 or op == sim.NAND4:
o0[0] = i0[0] & i1[0] & i2[0] & i3[0]
o0[1] = i0[1] & i1[1] & i2[1] & i3[1]
o0[2] = (i0[2]&(i1[0]|i1[1]|i1[2])&(i2[0]|i2[1]|i2[2])&(i3[0]|i3[1]|i3[2])|
i1[2]&(i0[0]|i0[1]|i0[2])&(i2[0]|i2[1]|i2[2])&(i3[0]|i3[1]|i3[2])|
i2[2]&(i0[0]|i0[1]|i0[2])&(i1[0]|i1[1]|i1[2])&(i3[0]|i3[1]|i3[2])|
i3[2]&(i0[0]|i0[1]|i0[2])&(i1[0]|i1[1]|i1[2])&(i2[0]|i2[1]|i2[2]))
elif op == sim.OR2 or op == sim.NOR2:
o0[0] = i0[0] | i1[0]
o0[1] = i0[1] | i1[1]
o0[2] = (i0[2]&(~i1[0]|~i1[1]|i1[2])|
i1[2]&(~i0[0]|~i0[1]|i0[2]))
elif op == sim.OR3 or op == sim.NOR3:
o0[0] = i0[0] | i1[0] | i2[0]
o0[1] = i0[1] | i1[1] | i2[1]
o0[2] = (i0[2]&(~i1[0]|~i1[1]|i1[2])&(~i2[0]|~i2[1]|i2[2])|
i1[2]&(~i0[0]|~i0[1]|i0[2])&(~i2[0]|~i2[1]|i2[2])|
i2[2]&(~i0[0]|~i0[1]|i0[2])&(~i1[0]|~i1[1]|i1[2]))
elif op == sim.OR4 or op == sim.NOR4:
o0[0] = i0[0] | i1[0] | i2[0] | i3[0]
o0[1] = i0[1] | i1[1] | i2[1] | i3[1]
o0[2] = (i0[2]&(~i1[0]|~i1[1]|i1[2])&(~i2[0]|~i2[1]|i2[2])&(~i3[0]|~i3[1]|i3[2])|
i1[2]&(~i0[0]|~i0[1]|i0[2])&(~i2[0]|~i2[1]|i2[2])&(~i3[0]|~i3[1]|i3[2])|
i2[2]&(~i0[0]|~i0[1]|i0[2])&(~i1[0]|~i1[1]|i1[2])&(~i3[0]|~i3[1]|i3[2])|
i3[2]&(~i0[0]|~i0[1]|i0[2])&(~i1[0]|~i1[1]|i1[2])&(~i2[0]|~i2[1]|i2[2]))
elif op == sim.XOR2 or op == sim.XNOR2:
o0[0] = i0[0] ^ i1[0]
o0[1] = i0[1] ^ i1[1]
o0[2] = i0[2] | i1[2]
elif op == sim.MUX21:
# t1 = ~i2
t1[...] = i2 ^ inv_op
# t0 = i0 & t1
t0[0] = i0[0] & t1[0]
t0[1] = i0[1] & t1[1]
t0[2] = (i0[2]&(t1[0]|t1[1]|t1[2])|
t1[2]&(i0[0]|i0[1]|i0[2]))
# t1 = i1 & i2
t1[0] = i1[0] & i2[0]
t1[1] = i1[1] & i2[1]
t1[2] = (i1[2]&(i2[0]|i2[1]|i2[2])|
i2[2]&(i1[0]|i1[1]|i1[2]))
# o0 = t0 | t1
o0[0] = t0[0] | t1[0]
o0[1] = t0[1] | t1[1]
o0[2] = (t0[2]&(~t1[0]|~t1[1]|t1[2])|
t1[2]&(~t0[0]|~t0[1]|t0[2]))
else: print(f'unknown op {op}')
if (op == sim.INV1 or
op == sim.NAND2 or op == sim.NAND3 or op == sim.NAND4 or
op == sim.NOR2 or op == sim.NOR3 or op == sim.NOR4 or
op == sim.XNOR2):
o0[...] = o0 ^ inv_op

37
src/kyupy/sdf.py

@ -61,7 +61,6 @@ class DelayFile:
delays = np.zeros((len(circuit.lines), 2, 2, 3)) # dataset last during construction. delays = np.zeros((len(circuit.lines), 2, 2, 3)) # dataset last during construction.
with log.limit(50):
for name, iopaths in self.cells.items(): for name, iopaths in self.cells.items():
name = name.replace('\\', '') name = name.replace('\\', '')
if cell := circuit.cells.get(name, None): if cell := circuit.cells.get(name, None):
@ -103,12 +102,11 @@ class DelayFile:
delays = np.zeros((len(circuit.lines), 2, 2, 3)) # dataset last during construction. delays = np.zeros((len(circuit.lines), 2, 2, 3)) # dataset last during construction.
nonfork_annotations = 0
for n1, n2, *delvals in self._interconnects: for n1, n2, *delvals in self._interconnects:
delvals = [d if len(d) > 0 else [0, 0, 0] for d in delvals] delvals = [d if len(d) > 0 else [0, 0, 0] for d in delvals]
if max(max(delvals)) == 0: continue if max(max(delvals)) == 0: continue
cn1, pn1 = (n1, None) if (slash := n1.rfind('/')) < 0 else (n1[:slash], n1[slash+1:]) cn1, pn1 = n1.split('/') if '/' in n1 else (n1, None)
cn2, pn2 = (n2, None) if (slash := n2.rfind('/')) < 0 else (n2[:slash], n2[slash+1:]) cn2, pn2 = n2.split('/') if '/' in n2 else (n2, None)
cn1 = cn1.replace('\\','') cn1 = cn1.replace('\\','')
cn2 = cn2.replace('\\','') cn2 = cn2.replace('\\','')
c1, c2 = circuit.cells[cn1], circuit.cells[cn2] c1, c2 = circuit.cells[cn1], circuit.cells[cn2]
@ -121,27 +119,19 @@ class DelayFile:
log.warn(f'No line to annotate pin {pn2} of {c2}') log.warn(f'No line to annotate pin {pn2} of {c2}')
continue continue
f1, f2 = c1.outs[p1].reader, c2.ins[p2].driver # find the forks between cells. f1, f2 = c1.outs[p1].reader, c2.ins[p2].driver # find the forks between cells.
if f1 == c2 and f2 == c1:
nonfork_annotations += 1
if nonfork_annotations < 10:
log.warn(f'No fork between {c1.name}/{p1} and {c2.name}/{p2}, using {c2.name}/{p2}')
line = c2.ins[p2]
else:
assert f1.kind == '__fork__' assert f1.kind == '__fork__'
assert f2.kind == '__fork__' assert f2.kind == '__fork__'
if len(f2.outs) == 1: if f1 != f2: # at least two forks, make sure f2 is a branchfork connected to f1
assert f1 == f2 or f1.outs[f2.ins[0].driver_pin] == f2.ins[0] assert len(f2.outs) == 1
assert f1.outs[f2.ins[0].driver_pin] == f2.ins[0]
line = f2.ins[0]
elif len(f2.outs) == 1: # f1==f2, only OK when there is no fanout.
line = f2.ins[0] line = f2.ins[0]
else: else:
nonfork_annotations += 1 log.warn(f'No branchfork to annotate interconnect delay {c1.name}/{p1}->{c2.name}/{p2}')
if nonfork_annotations < 10: continue
log.warn(f'No branchfork between {c1.name}/{p1} and {c2.name}/{p2}, using {c2.name}/{p2}')
line = c2.ins[p2]
delays[line, :] = delvals delays[line, :] = delvals
if nonfork_annotations > 0:
log.warn(f'{nonfork_annotations} interconnect annotations were moved to gate inputs due to missing forks.')
return np.moveaxis(delays, -1, 0) return np.moveaxis(delays, -1, 0)
@ -166,10 +156,6 @@ class SdfTransformer(Transformer):
entries = [e for a in args if hasattr(a, 'children') for e in a.children] entries = [e for a in args if hasattr(a, 'children') for e in a.children]
return name, entries return name, entries
@staticmethod
def cond(args): # ignore conditions
return args[1]
@staticmethod @staticmethod
def start(args): def start(args):
name = next((a for a in args if isinstance(a, str)), None) name = next((a for a in args if isinstance(a, str)), None)
@ -194,12 +180,9 @@ GRAMMAR = r"""
| "(INSTANCE" ID? ")" | "(INSTANCE" ID? ")"
| "(TIMINGCHECK" _ignore* ")" | "(TIMINGCHECK" _ignore* ")"
| delay )* ")" | delay )* ")"
delay: "(DELAY" "(ABSOLUTE" (interconnect | iopath | cond)* ")" ")" delay: "(DELAY" "(ABSOLUTE" (interconnect | iopath)* ")" ")"
interconnect: "(INTERCONNECT" ID ID triple* ")" interconnect: "(INTERCONNECT" ID ID triple* ")"
iopath: "(IOPATH" ID_OR_EDGE ID_OR_EDGE triple* ")" iopath: "(IOPATH" ID_OR_EDGE ID_OR_EDGE triple* ")"
cond: "(" "COND" cond_port_expr iopath ")"
?cond_port_expr: ID | "(" cond_port_expr ")" | cond_port_expr BINARY_OP cond_port_expr
BINARY_OP: /&&/ | /==/
NAME: /[^"]+/ NAME: /[^"]+/
ID_OR_EDGE: ( /[^() ]+/ | "(" /[^)]+/ ")" ) ID_OR_EDGE: ( /[^() ]+/ | "(" /[^)]+/ ")" )
ID: ( /[^"() ]+/ | "\"" /[^"]+/ "\"" ) ID: ( /[^"() ]+/ | "\"" /[^"]+/ "\"" )

132
src/kyupy/sim.py

@ -4,14 +4,9 @@ from bisect import bisect, insort_left
import numpy as np import numpy as np
from .circuit import Circuit
BUF1 = np.uint16(0b1010_1010_1010_1010) BUF1 = np.uint16(0b1010_1010_1010_1010)
INV1 = ~BUF1 INV1 = ~BUF1
__const0__ = BUF1
__const1__ = INV1
AND2 = np.uint16(0b1000_1000_1000_1000) AND2 = np.uint16(0b1000_1000_1000_1000)
AND3 = np.uint16(0b1000_0000_1000_0000) AND3 = np.uint16(0b1000_0000_1000_0000)
AND4 = np.uint16(0b1000_0000_0000_0000) AND4 = np.uint16(0b1000_0000_0000_0000)
@ -44,10 +39,7 @@ AOI211, OAI211 = ~AO211, ~OA211
MUX21 = np.uint16(0b1100_1010_1100_1010) # z = i1 if i2 else i0 (i2 is select) MUX21 = np.uint16(0b1100_1010_1100_1010) # z = i1 if i2 else i0 (i2 is select)
names = dict([(v, k) for k, v in globals().items() if isinstance(v, np.uint16) and '__' not in k]) names = dict([(v, k) for k, v in globals().items() if isinstance(v, np.uint16)])
prim2name = dict([(v, k) for k, v in globals().items() if isinstance(v, np.uint16) and '__' not in k])
name2prim = dict([(k, v) for k, v in globals().items() if isinstance(v, np.uint16)])
kind_prefixes = { kind_prefixes = {
'nand': (NAND4, NAND3, NAND2), 'nand': (NAND4, NAND3, NAND2),
@ -164,7 +156,7 @@ class SimOps:
:param c_reuse: If enabled, memory of intermediate signal waveforms will be re-used. This greatly reduces :param c_reuse: If enabled, memory of intermediate signal waveforms will be re-used. This greatly reduces
memory footprint, but intermediate signal waveforms become unaccessible after a propagation. memory footprint, but intermediate signal waveforms become unaccessible after a propagation.
""" """
def __init__(self, circuit: Circuit, c_caps=1, c_caps_min=1, a_ctrl=None, c_reuse=False, strip_forks=False): def __init__(self, circuit, c_caps=1, c_caps_min=1, a_ctrl=None, c_reuse=False, strip_forks=False):
self.circuit = circuit self.circuit = circuit
self.s_len = len(circuit.s_nodes) self.s_len = len(circuit.s_nodes)
@ -183,74 +175,84 @@ class SimOps:
self.ppo_offset = self.ppi_offset + self.s_len self.ppo_offset = self.ppi_offset + self.s_len
self.c_locs_len = self.ppo_offset + self.s_len self.c_locs_len = self.ppo_offset + self.s_len
# ALAP-toposort the circuit into self.ops # translate circuit structure into self.ops
levels = [] ops = []
interface_dict = dict((n, i) for i, n in enumerate(circuit.s_nodes))
ppio2idx = dict((n, i) for i, n in enumerate(circuit.s_nodes)) for n in circuit.topological_order():
ppos = set([n for n in circuit.s_nodes if len(n.ins) > 0]) if n in interface_dict:
readers = np.array([1 if l.reader in ppos else len(l.reader.outs) for l in circuit.lines], dtype=np.int32) # for ref-counting forks inp_idx = self.ppi_offset + interface_dict[n]
if len(n.outs) > 0 and n.outs[0] is not None: # first output of a PI/PPI
level_lines = [n.ins[0] for n in ppos] # start from PPOs ops.append((BUF1, n.outs[0].index, inp_idx, self.zero_idx, self.zero_idx, self.zero_idx, *a_ctrl[n.outs[0]]))
# FIXME: Should probably instanciate buffers for PPOs and attach DFF clocks if 'dff' in n.kind.lower(): # second output of DFF is inverted
if len(n.outs) > 1 and n.outs[1] is not None:
while len(level_lines) > 0: # traverse the circuit level-wise back towards (P)PIs ops.append((INV1, n.outs[1].index, inp_idx, self.zero_idx, self.zero_idx, self.zero_idx, *a_ctrl[n.outs[1]]))
level_ops = [] else: # if not DFF, no output is inverted.
prev_level_lines = [] for o_line in n.outs[1:]:
if o_line is not None:
for l in level_lines: ops.append((BUF1, o_line.index, inp_idx, self.zero_idx, self.zero_idx, self.zero_idx, *a_ctrl[o_line]))
n = l.driver continue
in_idxs = [n.ins[x].index if len(n.ins) > x and n.ins[x] is not None else self.zero_idx for x in [0,1,2,3]] # regular node, not PI/PPI or PO/PPO
if n in ppio2idx: o0_idx = n.outs[0].index if len(n.outs) > 0 and n.outs[0] is not None else self.tmp_idx
in_idxs[0] = self.ppi_offset + ppio2idx[n] i0_idx = n.ins[0].index if len(n.ins) > 0 and n.ins[0] is not None else self.zero_idx
if l.driver_pin == 1 and 'dff' in n.kind.lower(): # second output of DFF is inverted i1_idx = n.ins[1].index if len(n.ins) > 1 and n.ins[1] is not None else self.zero_idx
level_ops.append((INV1, l.index, *in_idxs, *a_ctrl[l])) i2_idx = n.ins[2].index if len(n.ins) > 2 and n.ins[2] is not None else self.zero_idx
else: i3_idx = n.ins[3].index if len(n.ins) > 3 and n.ins[3] is not None else self.zero_idx
level_ops.append((BUF1, l.index, *in_idxs, *a_ctrl[l]))
elif n.kind == '__fork__':
readers[n.ins[0]] -= 1
if readers[n.ins[0]] == 0: prev_level_lines.append(n.ins[0])
if not strip_forks: level_ops.append((BUF1, l.index, *in_idxs, *a_ctrl[l]))
else:
prev_level_lines += n.ins
sp = None
kind = n.kind.lower() kind = n.kind.lower()
if kind == '__fork__':
if not strip_forks:
for o_line in n.outs:
if o_line is not None:
ops.append((BUF1, o_line.index, i0_idx, i1_idx, i2_idx, i3_idx, *a_ctrl[o_line]))
continue
sp = None
for prefix, prims in kind_prefixes.items(): for prefix, prims in kind_prefixes.items():
if kind.startswith(prefix): if kind.startswith(prefix):
sp = prims[0] sp = prims[0]
if in_idxs[3] == self.zero_idx: if i3_idx == self.zero_idx:
sp = prims[1] sp = prims[1]
if in_idxs[2] == self.zero_idx: if i2_idx == self.zero_idx:
sp = prims[2] sp = prims[2]
break break
if sp is None: if sp is None:
print('unknown cell type', kind) print('unknown cell type', kind)
else: else:
level_ops.append((sp, l.index, *in_idxs, *a_ctrl[l])) ops.append((sp, o0_idx, i0_idx, i1_idx, i2_idx, i3_idx, *a_ctrl[o0_idx]))
if len(level_ops) > 0: levels.append(level_ops)
level_lines = prev_level_lines
self.levels = [np.asarray(lv, dtype=np.int32) for lv in levels[::-1]] self.ops = np.asarray(ops, dtype='int32')
level_sums = np.cumsum([0]+[len(lv) for lv in self.levels], dtype=np.int32)
self.level_starts, self.level_stops = level_sums[:-1], level_sums[1:]
self.ops = np.vstack(self.levels)
# create a map from fanout lines to stem lines for fork stripping # create a map from fanout lines to stem lines for fork stripping
stems = np.full(self.c_locs_len, -1, dtype=np.int32) # default to -1: 'no fanout line' stems = np.zeros(self.c_locs_len, dtype='int32') - 1 # default to -1: 'no fanout line'
if strip_forks: if strip_forks:
for f in circuit.forks.values(): for f in circuit.forks.values():
prev_line = f.ins[0] prev_line = f.ins[0]
while prev_line.driver.kind == '__fork__': while prev_line.driver.kind == '__fork__':
prev_line = prev_line.driver.ins[0] prev_line = prev_line.driver.ins[0]
stem_idx = prev_line.index
for ol in f.outs: for ol in f.outs:
if ol is not None: if ol is not None:
stems[ol] = prev_line.index stems[ol] = stem_idx
ref_count = np.zeros(self.c_locs_len, dtype=np.int32) # calculate level (distance from PI/PPI) and reference count for each line
levels = np.zeros(self.c_locs_len, dtype='int32')
for op in self.ops: ref_count = np.zeros(self.c_locs_len, dtype='int32')
for x in [2, 3, 4, 5]: level_starts = [0]
ref_count[stems[op[x]] if stems[op[x]] >= 0 else op[x]] += 1 current_level = 1
for i, op in enumerate(self.ops):
# if we fork-strip, always take the stems for determining fan-in level
i0_idx = stems[op[2]] if stems[op[2]] >= 0 else op[2]
i1_idx = stems[op[3]] if stems[op[3]] >= 0 else op[3]
i2_idx = stems[op[4]] if stems[op[4]] >= 0 else op[4]
i3_idx = stems[op[5]] if stems[op[5]] >= 0 else op[5]
if levels[i0_idx] >= current_level or levels[i1_idx] >= current_level or levels[i2_idx] >= current_level or levels[i3_idx] >= current_level:
current_level += 1
level_starts.append(i)
levels[op[1]] = current_level # set level of the output line
ref_count[i0_idx] += 1
ref_count[i1_idx] += 1
ref_count[i2_idx] += 1
ref_count[i3_idx] += 1
self.level_starts = np.asarray(level_starts, dtype='int32')
self.level_stops = np.asarray(level_starts[1:] + [len(self.ops)], dtype='int32')
# combinational signal allocation table. maps line and interface indices to self.c memory locations # combinational signal allocation table. maps line and interface indices to self.c memory locations
self.c_locs = np.full((self.c_locs_len,), -1, dtype=np.int32) self.c_locs = np.full((self.c_locs_len,), -1, dtype=np.int32)
@ -276,9 +278,9 @@ class SimOps:
ref_count[i0_idx] += 1 ref_count[i0_idx] += 1
# allocate memory for the rest of the circuit # allocate memory for the rest of the circuit
for ops in self.levels: for op_start, op_stop in zip(self.level_starts, self.level_stops):
free_set = set() free_set = set()
for op in ops: for op in self.ops[op_start:op_stop]:
# if we fork-strip, always take the stems # if we fork-strip, always take the stems
i0_idx = stems[op[2]] if stems[op[2]] >= 0 else op[2] i0_idx = stems[op[2]] if stems[op[2]] >= 0 else op[2]
i1_idx = stems[op[3]] if stems[op[3]] >= 0 else op[3] i1_idx = stems[op[3]] if stems[op[3]] >= 0 else op[3]
@ -297,7 +299,6 @@ class SimOps:
self.c_locs[o_idx], self.c_caps[o_idx] = h.alloc(cap), cap self.c_locs[o_idx], self.c_caps[o_idx] = h.alloc(cap), cap
if c_reuse: if c_reuse:
for loc in free_set: for loc in free_set:
if loc >= 0: # DFF clocks are not allocated. Ignore for now.
h.free(loc) h.free(loc)
# copy memory location and capacity from stems to fanout lines # copy memory location and capacity from stems to fanout lines
@ -310,15 +311,6 @@ class SimOps:
if len(n.ins) > 0: if len(n.ins) > 0:
self.c_locs[self.ppo_offset + i], self.c_caps[self.ppo_offset + i] = self.c_locs[n.ins[0]], self.c_caps[n.ins[0]] self.c_locs[self.ppo_offset + i], self.c_caps[self.ppo_offset + i] = self.c_locs[n.ins[0]], self.c_caps[n.ins[0]]
# line use information
self.line_use_start = np.full(self.c_locs_len, -1, dtype=np.int32)
self.line_use_stop = np.full(self.c_locs_len, len(self.levels), dtype=np.int32)
for i, lv in enumerate(self.levels):
for op in lv:
self.line_use_start[op[1]] = i
for x in [2, 3, 4, 5]:
self.line_use_stop[op[x]] = i
self.c_len = h.max_size self.c_len = h.max_size
d = defaultdict(int) d = defaultdict(int)

32
src/kyupy/stil.py

@ -41,7 +41,7 @@ class StilFile:
unload = {} unload = {}
for so_port in self.so_ports: for so_port in self.so_ports:
if so_port in call.parameters: if so_port in call.parameters:
unload[so_port] = call.parameters[so_port] unload[so_port] = call.parameters[so_port].replace('\n', '').replace('N', '-')
if len(capture) > 0: if len(capture) > 0:
self.patterns.append(ScanPattern(sload, launch, capture, unload)) self.patterns.append(ScanPattern(sload, launch, capture, unload))
capture = {} capture = {}
@ -49,9 +49,11 @@ class StilFile:
sload = {} sload = {}
for si_port in self.si_ports: for si_port in self.si_ports:
if si_port in call.parameters: if si_port in call.parameters:
sload[si_port] = call.parameters[si_port] sload[si_port] = call.parameters[si_port].replace('\n', '').replace('N', '-')
if call.name.endswith('_launch'): launch = call.parameters if call.name.endswith('_launch'):
if call.name.endswith('_capture'): capture = call.parameters launch = dict((k, v.replace('\n', '').replace('N', '-')) for k, v in call.parameters.items())
if call.name.endswith('_capture'):
capture = dict((k, v.replace('\n', '').replace('N', '-')) for k, v in call.parameters.items())
def _maps(self, c): def _maps(self, c):
interface = list(c.io_nodes) + [n for n in c.nodes if 'DFF' in n.kind] interface = list(c.io_nodes) + [n for n in c.nodes if 'DFF' in n.kind]
@ -98,12 +100,12 @@ class StilFile:
tests = np.full((len(interface), len(self.patterns)), logic.UNASSIGNED) tests = np.full((len(interface), len(self.patterns)), logic.UNASSIGNED)
for i, p in enumerate(self.patterns): for i, p in enumerate(self.patterns):
for si_port in self.si_ports.keys(): for si_port in self.si_ports.keys():
pattern = logic.mvarray(p.load[si_port][0]) pattern = logic.mvarray(p.load[si_port])
inversions = np.choose((pattern == logic.UNASSIGNED) | (pattern == logic.UNKNOWN), inversions = np.choose((pattern == logic.UNASSIGNED) | (pattern == logic.UNKNOWN),
[scan_inversions[si_port], logic.ZERO]).astype(np.uint8) [scan_inversions[si_port], logic.ZERO]).astype(np.uint8)
np.bitwise_xor(pattern, inversions, out=pattern) np.bitwise_xor(pattern, inversions, out=pattern)
tests[scan_maps[si_port], i] = pattern tests[scan_maps[si_port], i] = pattern
tests[pi_map, i] = logic.mvarray(p.capture['_pi'][0]) tests[pi_map, i] = logic.mvarray(p.capture['_pi'])
return tests return tests
def tests_loc(self, circuit, init_filter=None, launch_filter=None): def tests_loc(self, circuit, init_filter=None, launch_filter=None):
@ -132,12 +134,12 @@ class StilFile:
for i, p in enumerate(self.patterns): for i, p in enumerate(self.patterns):
# init.set_values(i, '0' * len(interface)) # init.set_values(i, '0' * len(interface))
for si_port in self.si_ports.keys(): for si_port in self.si_ports.keys():
pattern = logic.mvarray(p.load[si_port][0]) pattern = logic.mvarray(p.load[si_port])
inversions = np.choose((pattern == logic.UNASSIGNED) | (pattern == logic.UNKNOWN), inversions = np.choose((pattern == logic.UNASSIGNED) | (pattern == logic.UNKNOWN),
[scan_inversions[si_port], logic.ZERO]).astype(np.uint8) [scan_inversions[si_port], logic.ZERO]).astype(np.uint8)
np.bitwise_xor(pattern, inversions, out=pattern) np.bitwise_xor(pattern, inversions, out=pattern)
init[scan_maps[si_port], i] = pattern init[scan_maps[si_port], i] = pattern
init[pi_map, i] = logic.mvarray(p.launch['_pi'][0] if '_pi' in p.launch else p.capture['_pi'][0]) init[pi_map, i] = logic.mvarray(p.launch['_pi'] if '_pi' in p.launch else p.capture['_pi'])
if init_filter: init = init_filter(init) if init_filter: init = init_filter(init)
sim8v = LogicSim(circuit, init.shape[-1], m=8) sim8v = LogicSim(circuit, init.shape[-1], m=8)
sim8v.s[0] = logic.mv_to_bp(init) sim8v.s[0] = logic.mv_to_bp(init)
@ -147,12 +149,12 @@ class StilFile:
launch = logic.bp_to_mv(sim8v.s[1])[..., :init.shape[-1]] launch = logic.bp_to_mv(sim8v.s[1])[..., :init.shape[-1]]
for i, p in enumerate(self.patterns): for i, p in enumerate(self.patterns):
# if there was no launch cycle or launch clock, then init = launch # if there was no launch cycle or launch clock, then init = launch
if '_pi' not in p.launch or 'P' not in p.launch['_pi'][0] or 'P' not in p.capture['_pi'][0]: if '_pi' not in p.launch or 'P' not in p.launch['_pi'] or 'P' not in p.capture['_pi']:
for si_port in self.si_ports.keys(): for si_port in self.si_ports.keys():
pattern = logic.mv_xor(logic.mvarray(p.load[si_port][0]), scan_inversions[si_port]) pattern = logic.mv_xor(logic.mvarray(p.load[si_port]), scan_inversions[si_port])
launch[scan_maps[si_port], i] = pattern launch[scan_maps[si_port], i] = pattern
if '_pi' in p.capture and 'P' in p.capture['_pi'][0]: if '_pi' in p.capture and 'P' in p.capture['_pi']:
launch[pi_map, i] = logic.mvarray(p.capture['_pi'][0]) launch[pi_map, i] = logic.mvarray(p.capture['_pi'])
launch[po_map, i] = logic.UNASSIGNED launch[po_map, i] = logic.UNASSIGNED
if launch_filter: launch = launch_filter(launch) if launch_filter: launch = launch_filter(launch)
@ -169,9 +171,9 @@ class StilFile:
interface, _, po_map, scan_maps, scan_inversions = self._maps(circuit) interface, _, po_map, scan_maps, scan_inversions = self._maps(circuit)
resp = np.full((len(interface), len(self.patterns)), logic.UNASSIGNED) resp = np.full((len(interface), len(self.patterns)), logic.UNASSIGNED)
for i, p in enumerate(self.patterns): for i, p in enumerate(self.patterns):
resp[po_map, i] = logic.mvarray(p.capture['_po'][0] if len(p.capture) > 0 else p.launch['_po'][0]) resp[po_map, i] = logic.mvarray(p.capture['_po'] if len(p.capture) > 0 else p.launch['_po'])
for so_port in self.so_ports.keys(): for so_port in self.so_ports.keys():
pattern = logic.mv_xor(logic.mvarray(p.unload[so_port][0]), scan_inversions[so_port]) pattern = logic.mv_xor(logic.mvarray(p.unload[so_port]), scan_inversions[so_port])
resp[scan_maps[so_port], i] = pattern resp[scan_maps[so_port], i] = pattern
return resp return resp
@ -190,7 +192,7 @@ class StilTransformer(Transformer):
def call(args): return Call(args[0], dict(args[1:])) def call(args): return Call(args[0], dict(args[1:]))
@staticmethod @staticmethod
def call_parameter(args): return args[0], (args[1].value.replace('\n', '').replace('N', '-'), args[1].start_pos) def call_parameter(args): return args[0], args[1].value
@staticmethod @staticmethod
def signal_group(args): return args[0], args[1:] def signal_group(args): return args[0], args[1:]

168
src/kyupy/techlib.py

@ -11,6 +11,50 @@ from itertools import product
from . import bench from . import bench
class TechLibOld:
@staticmethod
def pin_index(kind, pin):
if isinstance(pin, int):
return max(0, pin-1)
if kind[:3] in ('OAI', 'AOI'):
if pin[0] == 'A': return int(pin[1]) - 1
if pin == 'B': return int(kind[3])
if pin[0] == 'B': return int(pin[1]) - 1 + int(kind[3])
for prefix, pins, index in [('HADD', ('B0', 'SO'), 1),
('HADD', ('A0', 'C1'), 0),
('MUX21', ('S', 'S0'), 2),
('MX2', ('S0',), 2),
('TBUF', ('OE',), 1),
('TINV', ('OE',), 1),
('LATCH', ('D',), 0),
('LATCH', ('QN',), 1),
('DFF', ('D',), 0),
('DFF', ('QN',), 1),
('SDFF', ('D',), 0),
('SDFF', ('QN',), 1),
('SDFF', ('CLK',), 3),
('SDFF', ('RSTB', 'RN'), 4),
('SDFF', ('SETB',), 5),
('ISOL', ('ISO',), 0),
('ISOL', ('D',), 1)]:
if kind.startswith(prefix) and pin in pins: return index
for index, pins in enumerate([('A1', 'IN1', 'A', 'S', 'INP', 'I', 'Q', 'QN', 'Y', 'Z', 'ZN'),
('A2', 'IN2', 'B', 'CK', 'CLK', 'CO', 'SE'),
('A3', 'IN3', 'C', 'RN', 'RSTB', 'CI', 'SI'),
('A4', 'IN4', 'D', 'SN', 'SETB'),
('A5', 'IN5', 'E'),
('A6', 'IN6', 'F')]):
if pin in pins: return index
raise ValueError(f'Unknown pin index for {kind}.{pin}')
@staticmethod
def pin_is_output(kind, pin):
if isinstance(pin, int):
return pin == 0
if 'MUX' in kind and pin == 'S': return False
return pin in ('Q', 'QN', 'Z', 'ZN', 'Y', 'CO', 'S', 'SO', 'C1')
class TechLib: class TechLib:
"""Class for standard cell library definitions. """Class for standard cell library definitions.
@ -49,14 +93,6 @@ class TechLib:
assert pin in self.cells[kind][1], f'Unknown pin: {pin} for cell {kind}' assert pin in self.cells[kind][1], f'Unknown pin: {pin} for cell {kind}'
return self.cells[kind][1][pin][0] return self.cells[kind][1][pin][0]
def pin_name(self, kind, pos, output=False):
"""Returns the pin name for a given node kind, list position, and direction."""
assert kind in self.cells, f'Unknown cell: {kind}'
for name, (ppos, isout) in self.cells[kind][1].items():
if isout == output and ppos == pos:
return name
return None
def pin_is_output(self, kind, pin): def pin_is_output(self, kind, pin):
"""Returns True, if given pin name of a node kind is an output.""" """Returns True, if given pin name of a node kind is an output."""
assert kind in self.cells, f'Unknown cell: {kind}' assert kind in self.cells, f'Unknown cell: {kind}'
@ -102,92 +138,21 @@ TLATX1 input(C,D) output(Q,QN) Q=LATCH(D,C) QN=INV1(Q) ;
""" """
NANGATE = TechLib(r""" _nangate_common = r"""
FILLTIE ;
FILL_X{1,2,4,8,16} ;
ANTENNA input(I) ;
TIEH output(Z) Z=__const1__() ;
TIEL output(ZN) ZN=__const0__() ;
BUF_X{1,2,4,8,12,16} input(I) output(Z) Z=BUF1(I) ;
INV_X{1,2,4,8,12,16} input(I) output(ZN) ZN=INV1(I) ;
CLKBUF_X{1,2,4,8,12,16} input(I) output(Z) Z=BUF1(I) ;
CLKGATETST_X1 input(CLK,E,TE) output(Q) Q=OA21(CLK,E,TE) ;
AND2_X{1,2} input(A1,A2) output(Z) Z=AND2(A1,A2) ;
AND3_X{1,2} input(A1,A2,A3) output(Z) Z=AND3(A1,A2,A3) ;
AND4_X{1,2} input(A1,A2,A3,A4) output(Z) Z=AND4(A1,A2,A3,A4) ;
NAND2_X{1,2} input(A1,A2) output(ZN) ZN=NAND2(A1,A2) ;
NAND3_X{1,2} input(A1,A2,A3) output(ZN) ZN=NAND3(A1,A2,A3) ;
NAND4_X{1,2} input(A1,A2,A3,A4) output(ZN) ZN=NAND4(A1,A2,A3,A4) ;
OR2_X{1,2} input(A1,A2) output(Z) Z=OR2(A1,A2) ;
OR3_X{1,2} input(A1,A2,A3) output(Z) Z=OR3(A1,A2,A3) ;
OR4_X{1,2} input(A1,A2,A3,A4) output(Z) Z=OR4(A1,A2,A3,A4) ;
NOR2_X{1,2} input(A1,A2) output(ZN) ZN=NOR2(A1,A2) ;
NOR3_X{1,2} input(A1,A2,A3) output(ZN) ZN=NOR3(A1,A2,A3) ;
NOR4_X{1,2} input(A1,A2,A3,A4) output(ZN) ZN=NOR4(A1,A2,A3,A4) ;
XOR2_X1 input(A1,A2) output(Z) Z=XOR2(A1,A2) ;
XNOR2_X1 input(A1,A2) output(ZN) ZN=XNOR2(A1,A2) ;
MUX2_X1 input(I0,I1,S) output(Z) Z=MUX21(I0,I1,S) ;
HA_X1 input(A,B) output(CO,S) CO=XOR2(A,B) S=AND2(A,B) ;
FA_X1 input(A,B,CI) output(CO,S) AB=XOR2(A,B) CO=XOR2(AB,CI) S=AO22(CI,A,B) ;
AOI21_X{1,2} input(A1,A2,B) output(ZN) ZN=AOI21(A1,A2,B) ;
OAI21_X{1,2} input(A1,A2,B) output(ZN) ZN=OAI21(A1,A2,B) ;
AOI22_X{1,2} input(A1,A2,B1,B2) output(ZN) ZN=AOI22(A1,A2,B1,B2) ;
OAI22_X{1,2} input(A1,A2,B1,B2) output(ZN) ZN=OAI22(A1,A2,B1,B2) ;
DFFRNQ_X1 input(D,RN,CLK) output(Q) DR=AND2(D,RN) Q=DFF(DR,CLK) ;
DFFSNQ_X1 input(D,SN,CLK) output(Q) S=INV1(SN) DS=OR2(D,S) Q=DFF(DS,CLK) ;
SDFFRNQ_X1 input(D,RN,SE,SI,CLK) output(Q) DR=AND2(D,RN) DI=MUX21(DR,SI,SE) Q=DFF(DI,CLK) ;
SDFFSNQ_X1 input(D,SE,SI,SN,CLK) output(Q) S=INV1(SN) DS=OR2(D,S) DI=MUX21(DS,SI,SE) Q=DFF(DI,CLK) ;
TBUF_X{1,2,4,8,12,16} input(EN,I) output(Z) Z=BUF1(I) ;
LHQ_X1 input(D,E) output(Q) Q=LATCH(D,E) ;
""")
"""Nangate 15nm Open Cell Library (NanGate_15nm_OCL_v0.1_2014_06.A).
"""
NANGATE45 = TechLib(r"""
FILLCELL_X{1,2,4,8,16,32} ; FILLCELL_X{1,2,4,8,16,32} ;
ANTENNA_X1 input(A) ;
LOGIC0_X1 output(Z) Z=__const0__() ; LOGIC0_X1 output(Z) Z=__const0__() ;
LOGIC1_X1 output(Z) Z=__const1__() ; LOGIC1_X1 output(Z) Z=__const1__() ;
BUF_X{1,2,4,8,16,32} input(A) output(Z) Z=BUF1(A) ; BUF_X{1,2,4,8,16,32} input(A) output(Z) Z=BUF1(A) ;
INV_X{1,2,4,8,16,32} input(A) output(ZN) ZN=INV1(A) ;
CLKBUF_X{1,2,3} input(A) output(Z) Z=BUF1(A) ; CLKBUF_X{1,2,3} input(A) output(Z) Z=BUF1(A) ;
CLKGATETST_X{1,2,4,8} input(CK,E,SE) output(GCK) GCK=OA21(CK,E,SE) ;
CLKGATE_X{1,2,4,8} input(CK,E) output(GCK) GCK=AND2(CK,E) ;
AND2_X{1,2,4} input(A1,A2) output(ZN) ZN=AND2(A1,A2) ;
AND3_X{1,2,4} input(A1,A2,A3) output(ZN) ZN=AND3(A1,A2,A3) ;
AND4_X{1,2,4} input(A1,A2,A3,A4) output(ZN) ZN=AND4(A1,A2,A3,A4) ;
NAND2_X{1,2,4} input(A1,A2) output(ZN) ZN=NAND2(A1,A2) ; NAND2_X{1,2,4} input(A1,A2) output(ZN) ZN=NAND2(A1,A2) ;
NAND3_X{1,2,4} input(A1,A2,A3) output(ZN) ZN=NAND3(A1,A2,A3) ; NAND3_X{1,2,4} input(A1,A2,A3) output(ZN) ZN=NAND3(A1,A2,A3) ;
NAND4_X{1,2,4} input(A1,A2,A3,A4) output(ZN) ZN=NAND4(A1,A2,A3,A4) ; NAND4_X{1,2,4} input(A1,A2,A3,A4) output(ZN) ZN=NAND4(A1,A2,A3,A4) ;
OR2_X{1,2,4} input(A1,A2) output(ZN) ZN=OR2(A1,A2) ;
OR3_X{1,2,4} input(A1,A2,A3) output(ZN) ZN=OR3(A1,A2,A3) ;
OR4_X{1,2,4} input(A1,A2,A3,A4) output(ZN) ZN=OR4(A1,A2,A3,A4) ;
NOR2_X{1,2,4} input(A1,A2) output(ZN) ZN=NOR2(A1,A2) ; NOR2_X{1,2,4} input(A1,A2) output(ZN) ZN=NOR2(A1,A2) ;
NOR3_X{1,2,4} input(A1,A2,A3) output(ZN) ZN=NOR3(A1,A2,A3) ; NOR3_X{1,2,4} input(A1,A2,A3) output(ZN) ZN=NOR3(A1,A2,A3) ;
NOR4_X{1,2,4} input(A1,A2,A3,A4) output(ZN) ZN=NOR4(A1,A2,A3,A4) ; NOR4_X{1,2,4} input(A1,A2,A3,A4) output(ZN) ZN=NOR4(A1,A2,A3,A4) ;
XOR2_X{1,2} input(A,B) output(Z) Z=XOR2(A,B) ;
XNOR2_X{1,2} input(A,B) output(ZN) ZN=XNOR2(A,B) ;
MUX2_X{1,2} input(A,B,S) output(Z) Z=MUX21(A,B,S) ;
HA_X1 input(A,B) output(CO,S) CO=XOR2(A,B) S=AND2(A,B) ;
FA_X1 input(A,B,CI) output(CO,S) AB=XOR2(A,B) CO=XOR2(AB,CI) S=AO22(CI,A,B) ;
AOI21_X{1,2,4} input(A,B1,B2) output(ZN) ZN=AOI21(B1,B2,A) ; AOI21_X{1,2,4} input(A,B1,B2) output(ZN) ZN=AOI21(B1,B2,A) ;
OAI21_X{1,2,4} input(A,B1,B2) output(ZN) ZN=OAI21(B1,B2,A) ; OAI21_X{1,2,4} input(A,B1,B2) output(ZN) ZN=OAI21(B1,B2,A) ;
@ -197,6 +162,8 @@ OAI22_X{1,2,4} input(A1,A2,B1,B2) output(ZN) ZN=OAI22(A1,A2,B1,B2) ;
OAI211_X{1,2,4} input(A,B,C1,C2) output(ZN) ZN=OAI211(C1,C2,A,B) ; OAI211_X{1,2,4} input(A,B,C1,C2) output(ZN) ZN=OAI211(C1,C2,A,B) ;
AOI211_X{1,2,4} input(A,B,C1,C2) output(ZN) ZN=AOI211(C1,C2,A,B) ; AOI211_X{1,2,4} input(A,B,C1,C2) output(ZN) ZN=AOI211(C1,C2,A,B) ;
MUX2_X{1,2} input(A,B,S) output(Z) Z=MUX21(A,B,S) ;
AOI221_X{1,2,4} input(A,B1,B2,C1,C2) output(ZN) BC=AO22(B1,B2,C1,C2) ZN=NOR2(BC,A) ; AOI221_X{1,2,4} input(A,B1,B2,C1,C2) output(ZN) BC=AO22(B1,B2,C1,C2) ZN=NOR2(BC,A) ;
OAI221_X{1,2,4} input(A,B1,B2,C1,C2) output(ZN) BC=OA22(B1,B2,C1,C2) ZN=NAND2(BC,A) ; OAI221_X{1,2,4} input(A,B1,B2,C1,C2) output(ZN) BC=OA22(B1,B2,C1,C2) ZN=NAND2(BC,A) ;
@ -205,6 +172,14 @@ OAI222_X{1,2,4} input(A1,A2,B1,B2,C1,C2) output(ZN) BC=OA22(B1,B2,C1,C2) ZN=OAI2
OAI33_X1 input(A1,A2,A3,B1,B2,B3) output(ZN) AA=OR2(A1,A2) BB=OR2(B1,B2) ZN=OAI22(AA,A3,BB,B3) ; OAI33_X1 input(A1,A2,A3,B1,B2,B3) output(ZN) AA=OR2(A1,A2) BB=OR2(B1,B2) ZN=OAI22(AA,A3,BB,B3) ;
HA_X1 input(A,B) output(CO,S) CO=XOR2(A,B) S=AND2(A,B) ;
FA_X1 input(A,B,CI) output(CO,S) AB=XOR2(A,B) CO=XOR2(AB,CI) S=AO22(CI,A,B) ;
CLKGATE_X{1,2,4,8} input(CK,E) output(GCK) GCK=AND2(CK,E) ;
CLKGATETST_X{1,2,4,8} input(CK,E,SE) output(GCK) GCK=OA21(CK,E,SE) ;
DFF_X{1,2} input(D,CK) output(Q,QN) Q=DFF(D,CK) QN=INV1(Q) ; DFF_X{1,2} input(D,CK) output(Q,QN) Q=DFF(D,CK) QN=INV1(Q) ;
DFFR_X{1,2} input(D,RN,CK) output(Q,QN) DR=AND2(D,RN) Q=DFF(DR,CK) QN=INV1(Q) ; DFFR_X{1,2} input(D,RN,CK) output(Q,QN) DR=AND2(D,RN) Q=DFF(DR,CK) QN=INV1(Q) ;
DFFS_X{1,2} input(D,SN,CK) output(Q,QN) S=INV1(SN) DS=OR2(D,S) Q=DFF(DS,CK) QN=INV1(Q) ; DFFS_X{1,2} input(D,SN,CK) output(Q,QN) S=INV1(SN) DS=OR2(D,S) Q=DFF(DS,CK) QN=INV1(Q) ;
@ -216,16 +191,43 @@ SDFFS_X{1,2} input(D,SE,SI,SN,CK) output(Q,QN) S=INV1(SN) DS=OR2(D,S) DI=MU
SDFFRS_X{1,2} input(D,RN,SE,SI,SN,CK) output(Q,QN) S=INV1(SN) DS=OR2(D,S) DRS=AND2(DS,RN) DI=MUX21(DRS,SI,SE) Q=DFF(DI,CK) QN=INV1(Q) ; SDFFRS_X{1,2} input(D,RN,SE,SI,SN,CK) output(Q,QN) S=INV1(SN) DS=OR2(D,S) DRS=AND2(DS,RN) DI=MUX21(DRS,SI,SE) Q=DFF(DI,CK) QN=INV1(Q) ;
TBUF_X{1,2,4,8,16} input(A,EN) output(Z) Z=BUF1(A) ; TBUF_X{1,2,4,8,16} input(A,EN) output(Z) Z=BUF1(A) ;
TINV_X1 input(I,EN) output(ZN) ZN=INV1(I) ; TINV_X1 input(I,EN) output(ZN) ZN=INV1(I) ;
TLAT_X1 input(D,G,OE) output(Q) Q=LATCH(D,G) ; TLAT_X1 input(D,G,OE) output(Q) Q=LATCH(D,G) ;
DLH_X{1,2} input(D,G) output(Q) Q=LATCH(D,G) ; DLH_X{1,2} input(D,G) output(Q) Q=LATCH(D,G) ;
DLL_X{1,2} input(D,GN) output(Q) G=INV1(GN) Q=LATCH(D,G) ; DLL_X{1,2} input(D,GN) output(Q) G=INV1(GN) Q=LATCH(D,G) ;
"""
NANGATE = TechLib(_nangate_common + r"""
INV_X{1,2,4,8,16,32} input(I) output(ZN) ZN=INV1(I) ;
AND2_X{1,2,4} input(A1,A2) output(Z) Z=AND2(A1,A2) ;
AND3_X{1,2,4} input(A1,A2,A3) output(Z) Z=AND3(A1,A2,A3) ;
AND4_X{1,2,4} input(A1,A2,A3,A4) output(Z) Z=AND4(A1,A2,A3,A4) ;
OR2_X{1,2,4} input(A1,A2) output(Z) Z=OR2(A1,A2) ;
OR3_X{1,2,4} input(A1,A2,A3) output(Z) Z=OR3(A1,A2,A3) ;
OR4_X{1,2,4} input(A1,A2,A3,A4) output(Z) Z=OR4(A1,A2,A3,A4) ;
XOR2_X{1,2} input(A1,A2) output(Z) Z=XOR2(A1,A2) ;
XNOR2_X{1,2} input(A1,A2) output(ZN) ZN=XNOR2(A1,A2) ;
""")
"""An newer NANGATE-variant that uses 'Z' as output pin names for AND and OR gates.
"""
NANGATE_ZN = TechLib(_nangate_common + r"""
INV_X{1,2,4,8,16,32} input(A) output(ZN) ZN=INV1(A) ;
AND2_X{1,2,4} input(A1,A2) output(ZN) ZN=AND2(A1,A2) ;
AND3_X{1,2,4} input(A1,A2,A3) output(ZN) ZN=AND3(A1,A2,A3) ;
AND4_X{1,2,4} input(A1,A2,A3,A4) output(ZN) ZN=AND4(A1,A2,A3,A4) ;
OR2_X{1,2,4} input(A1,A2) output(ZN) ZN=OR2(A1,A2) ;
OR3_X{1,2,4} input(A1,A2,A3) output(ZN) ZN=OR3(A1,A2,A3) ;
OR4_X{1,2,4} input(A1,A2,A3,A4) output(ZN) ZN=OR4(A1,A2,A3,A4) ;
XOR2_X{1,2} input(A,B) output(Z) Z=XOR2(A,B) ;
XNOR2_X{1,2} input(A,B) output(ZN) ZN=XNOR2(A,B) ;
""") """)
"""Nangate 45nm Open Cell Library (NangateOpenCellLibrary_PDKv1_3_v2010_12). """An older NANGATE-variant that uses 'ZN' as output pin names for AND and OR gates.
This NANGATE-variant that uses 'ZN' as output pin names for AND and OR gates.
""" """

9
src/kyupy/verilog.py

@ -123,9 +123,6 @@ class VerilogTransformer(Transformer):
assignments = [] assignments = []
for stmt in args[2:]: # pass 1: instantiate cells and driven signals for stmt in args[2:]: # pass 1: instantiate cells and driven signals
if isinstance(stmt, Instantiation): if isinstance(stmt, Instantiation):
if stmt.type not in self.tlib.cells:
log.warn(f'Ignoring cell of unknown kind "{stmt.type}"')
continue
n = Node(c, stmt.name, kind=stmt.type) n = Node(c, stmt.name, kind=stmt.type)
for p, s in stmt.pins.items(): for p, s in stmt.pins.items():
if self.tlib.pin_is_output(n.kind, p): if self.tlib.pin_is_output(n.kind, p):
@ -144,8 +141,6 @@ class VerilogTransformer(Transformer):
c.io_nodes[positions[name]] = n c.io_nodes[positions[name]] = n
if sd.kind == 'input': if sd.kind == 'input':
Line(c, n, Node(c, name)) Line(c, n, Node(c, name))
while len(assignments) > 0:
more_assignments = []
for target, source in assignments: # pass 1.5: process signal assignments for target, source in assignments: # pass 1.5: process signal assignments
target_sigs = [] target_sigs = []
if not isinstance(target, list): target = [target] if not isinstance(target, list): target = [target]
@ -172,13 +167,9 @@ class VerilogTransformer(Transformer):
cnode = Node(c, f'__const{s[3]}_{const_count}__', f'__const{s[3]}__') cnode = Node(c, f'__const{s[3]}_{const_count}__', f'__const{s[3]}__')
const_count += 1 const_count += 1
Line(c, cnode, Node(c, t)) Line(c, cnode, Node(c, t))
else:
more_assignments.append((target, source))
assignments = more_assignments
for stmt in args[2:]: # pass 2: connect signals to readers for stmt in args[2:]: # pass 2: connect signals to readers
if isinstance(stmt, Instantiation): if isinstance(stmt, Instantiation):
for p, s in stmt.pins.items(): for p, s in stmt.pins.items():
if stmt.name not in c.cells: continue
n = c.cells[stmt.name] n = c.cells[stmt.name]
if self.tlib.pin_is_output(n.kind, p): continue if self.tlib.pin_is_output(n.kind, p): continue
if s.startswith("1'b"): if s.startswith("1'b"):

173
src/kyupy/wave_sim.py

@ -13,11 +13,10 @@ Two simulators are available: :py:class:`WaveSim` runs on the CPU, and the deriv
""" """
import math import math
from collections import defaultdict
import numpy as np import numpy as np
from . import log, numba, cuda, sim, cdiv, eng from . import numba, cuda, sim, cdiv
TMAX = np.float32(2 ** 127) TMAX = np.float32(2 ** 127)
@ -60,8 +59,8 @@ class WaveSim(sim.SimOps):
self.delays = np.zeros((len(delays), self.c_locs_len, 2, 2), dtype=delays.dtype) self.delays = np.zeros((len(delays), self.c_locs_len, 2, 2), dtype=delays.dtype)
self.delays[:, :delays.shape[1]] = delays self.delays[:, :delays.shape[1]] = delays
self.c = np.full((self.c_len, self.sims), TMAX, dtype=np.float32) self.c = np.zeros((self.c_len, sims), dtype=np.float32) + TMAX
self.s = np.zeros((11, self.s_len, self.sims), dtype=np.float32) self.s = np.zeros((11, self.s_len, sims), dtype=np.float32)
"""Information about the logic values and transitions around the sequential elements (flip-flops) and ports. """Information about the logic values and transitions around the sequential elements (flip-flops) and ports.
The first 3 values are read by :py:func:`s_to_c`. The first 3 values are read by :py:func:`s_to_c`.
@ -99,18 +98,12 @@ class WaveSim(sim.SimOps):
self.simctl_int[0] = range(sims) # unique seed for each sim by default, zero this to pick same delays for all sims. self.simctl_int[0] = range(sims) # unique seed for each sim by default, zero this to pick same delays for all sims.
self.simctl_int[1] = 2 # random picking by default. self.simctl_int[1] = 2 # random picking by default.
self.e = np.zeros((self.c_locs_len, sims, 2), dtype=np.uint8) # aux data for each line and sim self.nbytes = sum([a.nbytes for a in (self.c, self.s, self.c_locs, self.c_caps, self.ops, self.simctl_int)])
self.error_counts = np.zeros(self.s_len, dtype=np.uint32) # number of capture errors by PPO
self.lsts = np.zeros(self.s_len, dtype=np.float32) # LST by PPO
self.overflows = np.zeros(self.s_len, dtype=np.uint32) # Overflows by PPO
self.nbytes = sum([a.nbytes for a in (self.c, self.s, self.e, self.c_locs, self.c_caps, self.ops, self.simctl_int)])
def __repr__(self): def __repr__(self):
dev = 'GPU' if hasattr(self.c, 'copy_to_host') else 'CPU' dev = 'GPU' if hasattr(self.c, 'copy_to_host') else 'CPU'
return f'{{name: "{self.circuit.name}", device: "{dev}", sims: {self.sims}, ops: {len(self.ops)}, ' + \ return f'{{name: "{self.circuit.name}", device: "{dev}", sims: {self.sims}, ops: {len(self.ops)}, ' + \
f'levels: {len(self.level_starts)}, nbytes: {eng(self.nbytes)}}}' f'levels: {len(self.level_starts)}, nbytes: {self.nbytes}}}'
def s_to_c(self): def s_to_c(self):
"""Transfers values of sequential elements and primary inputs to the combinational portion. """Transfers values of sequential elements and primary inputs to the combinational portion.
@ -123,7 +116,7 @@ class WaveSim(sim.SimOps):
self.c[self.pippi_c_locs+1] = np.choose(cond, [TMAX, TMAX, sins[1], TMAX]) self.c[self.pippi_c_locs+1] = np.choose(cond, [TMAX, TMAX, sins[1], TMAX])
self.c[self.pippi_c_locs+2] = TMAX self.c[self.pippi_c_locs+2] = TMAX
def c_prop(self, sims=None, seed=1, delta=0): def c_prop(self, sims=None, seed=1):
"""Propagates all waveforms from the (pseudo) primary inputs to the (pseudo) primary outputs. """Propagates all waveforms from the (pseudo) primary inputs to the (pseudo) primary outputs.
:param sims: Number of parallel simulations to execute. If None, all available simulations are performed. :param sims: Number of parallel simulations to execute. If None, all available simulations are performed.
@ -131,7 +124,7 @@ class WaveSim(sim.SimOps):
""" """
sims = min(sims or self.sims, self.sims) sims = min(sims or self.sims, self.sims)
for op_start, op_stop in zip(self.level_starts, self.level_stops): for op_start, op_stop in zip(self.level_starts, self.level_stops):
level_eval_cpu(self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, self.e, self.abuf, 0, sims, self.delays, self.simctl_int, seed, delta) level_eval_cpu(self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, self.abuf, 0, sims, self.delays, self.simctl_int, seed)
def c_to_s(self, time=TMAX, sd=0.0, seed=1): def c_to_s(self, time=TMAX, sd=0.0, seed=1):
"""Simulates a capture operation at all sequential elements and primary outputs. """Simulates a capture operation at all sequential elements and primary outputs.
@ -159,7 +152,7 @@ class WaveSim(sim.SimOps):
self.s[2, self.ppio_s_locs] = self.s[8, self.ppio_s_locs] self.s[2, self.ppio_s_locs] = self.s[8, self.ppio_s_locs]
def _wave_eval(op, cbuf, c_locs, c_caps, ebuf, sim, delays, simctl_int, seed, delta): def _wave_eval(op, cbuf, c_locs, c_caps, sim, delays, simctl_int, seed=0):
overflows = int(0) overflows = int(0)
lut = op[0] lut = op[0]
@ -169,18 +162,6 @@ def _wave_eval(op, cbuf, c_locs, c_caps, ebuf, sim, delays, simctl_int, seed, de
c_idx = op[4] c_idx = op[4]
d_idx = op[5] d_idx = op[5]
input_epoch = (ebuf[a_idx, sim, 1]|
ebuf[b_idx, sim, 1]|
ebuf[c_idx, sim, 1]|
ebuf[d_idx, sim, 1])
output_epoch = ebuf[z_idx, sim, 1]
if (delta):
if input_epoch == 0 and output_epoch == 0: return 0, 0
out_changed = output_epoch
if len(delays) > 1: if len(delays) > 1:
if simctl_int[1] == 0: if simctl_int[1] == 0:
delays = delays[seed] delays = delays[seed]
@ -225,25 +206,25 @@ def _wave_eval(op, cbuf, c_locs, c_caps, ebuf, sim, delays, simctl_int, seed, de
if a == current_t: if a == current_t:
a_cur += 1 a_cur += 1
inputs ^= 1 inputs ^= 1
thresh = delays[a_idx, a_cur & 1 ^ 1, z_val] thresh = delays[a_idx, a_cur & 1, z_val]
a = cbuf[a_mem + a_cur, sim] + delays[a_idx, a_cur & 1, z_val] a = cbuf[a_mem + a_cur, sim] + delays[a_idx, a_cur & 1, z_val]
next_t = cbuf[a_mem + a_cur, sim] + delays[a_idx, (a_cur & 1) ^ 1, z_val ^ 1] next_t = cbuf[a_mem + a_cur, sim] + delays[a_idx, (a_cur & 1) ^ 1, z_val ^ 1]
elif b == current_t: elif b == current_t:
b_cur += 1 b_cur += 1
inputs ^= 2 inputs ^= 2
thresh = delays[b_idx, b_cur & 1 ^ 1, z_val] thresh = delays[b_idx, b_cur & 1, z_val]
b = cbuf[b_mem + b_cur, sim] + delays[b_idx, b_cur & 1, z_val] b = cbuf[b_mem + b_cur, sim] + delays[b_idx, b_cur & 1, z_val]
next_t = cbuf[b_mem + b_cur, sim] + delays[b_idx, (b_cur & 1) ^ 1, z_val ^ 1] next_t = cbuf[b_mem + b_cur, sim] + delays[b_idx, (b_cur & 1) ^ 1, z_val ^ 1]
elif c == current_t: elif c == current_t:
c_cur += 1 c_cur += 1
inputs ^= 4 inputs ^= 4
thresh = delays[c_idx, c_cur & 1 ^ 1, z_val] thresh = delays[c_idx, c_cur & 1, z_val]
c = cbuf[c_mem + c_cur, sim] + delays[c_idx, c_cur & 1, z_val] c = cbuf[c_mem + c_cur, sim] + delays[c_idx, c_cur & 1, z_val]
next_t = cbuf[c_mem + c_cur, sim] + delays[c_idx, (c_cur & 1) ^ 1, z_val ^ 1] next_t = cbuf[c_mem + c_cur, sim] + delays[c_idx, (c_cur & 1) ^ 1, z_val ^ 1]
else: else:
d_cur += 1 d_cur += 1
inputs ^= 8 inputs ^= 8
thresh = delays[d_idx, d_cur & 1 ^ 1, z_val] thresh = delays[d_idx, d_cur & 1, z_val]
d = cbuf[d_mem + d_cur, sim] + delays[d_idx, d_cur & 1, z_val] d = cbuf[d_mem + d_cur, sim] + delays[d_idx, d_cur & 1, z_val]
next_t = cbuf[d_mem + d_cur, sim] + delays[d_idx, (d_cur & 1) ^ 1, z_val ^ 1] next_t = cbuf[d_mem + d_cur, sim] + delays[d_idx, (d_cur & 1) ^ 1, z_val ^ 1]
@ -254,15 +235,13 @@ def _wave_eval(op, cbuf, c_locs, c_caps, ebuf, sim, delays, simctl_int, seed, de
or (current_t - previous_t) > thresh # -OR- the generated hazard is wider than pulse threshold. or (current_t - previous_t) > thresh # -OR- the generated hazard is wider than pulse threshold.
): ):
if z_cur < (z_cap - 1): # enough space in z_mem? if z_cur < (z_cap - 1): # enough space in z_mem?
if delta and (cbuf[z_mem + z_cur, sim] != current_t):
out_changed = 1
cbuf[z_mem + z_cur, sim] = current_t cbuf[z_mem + z_cur, sim] = current_t
previous_t = current_t previous_t = current_t
z_cur += 1 z_cur += 1
else: else:
overflows += 1 overflows += 1
previous_t = cbuf[z_mem + z_cur - 1, sim]
z_cur -= 1 z_cur -= 1
previous_t = cbuf[z_mem + z_cur, sim]
else: else:
z_cur -= 1 z_cur -= 1
previous_t = cbuf[z_mem + z_cur - 1, sim] if z_cur > 0 else TMIN previous_t = cbuf[z_mem + z_cur - 1, sim] if z_cur > 0 else TMIN
@ -276,23 +255,12 @@ def _wave_eval(op, cbuf, c_locs, c_caps, ebuf, sim, delays, simctl_int, seed, de
current_t = min(a, b, c, d) current_t = min(a, b, c, d)
if delta and (cbuf[z_mem + z_cur, sim] != TMAX):
out_changed = 1
# generate or propagate overflow flag # generate or propagate overflow flag
cbuf[z_mem + z_cur, sim] = TMAX_OVL if overflows > 0 else max(a, b, c, d) cbuf[z_mem + z_cur, sim] = TMAX_OVL if overflows > 0 else max(a, b, c, d)
nrise = max(0, (z_cur+1) // 2 - (cbuf[z_mem, sim] == TMIN)) nrise = max(0, (z_cur+1) // 2 - (cbuf[z_mem, sim] == TMIN))
nfall = z_cur // 2 nfall = z_cur // 2
e = int(((cbuf[z_mem, sim] == TMIN) << 1) & 2) # initial value
e |= z_val # final value
e |= (nrise + nfall)<<2 # number of transitions
ebuf[z_idx, sim, 0] = e
ebuf[z_idx, sim, 1] = input_epoch & out_changed
return nrise, nfall return nrise, nfall
@ -300,11 +268,11 @@ wave_eval_cpu = numba.njit(_wave_eval)
@numba.njit @numba.njit
def level_eval_cpu(ops, op_start, op_stop, c, c_locs, c_caps, ebuf, abuf, sim_start, sim_stop, delays, simctl_int, seed, delta): def level_eval_cpu(ops, op_start, op_stop, c, c_locs, c_caps, abuf, sim_start, sim_stop, delays, simctl_int, seed):
for op_idx in range(op_start, op_stop): for op_idx in range(op_start, op_stop):
op = ops[op_idx] op = ops[op_idx]
for sim in range(sim_start, sim_stop): for sim in range(sim_start, sim_stop):
nrise, nfall = wave_eval_cpu(op, c, c_locs, c_caps, ebuf, sim, delays, simctl_int[:, sim], seed, delta) nrise, nfall = wave_eval_cpu(op, c, c_locs, c_caps, sim, delays, simctl_int[:, sim], seed)
a_loc = op[6] a_loc = op[6]
a_wr = op[7] a_wr = op[7]
a_wf = op[8] a_wf = op[8]
@ -377,18 +345,12 @@ class WaveSimCuda(WaveSim):
self.delays = cuda.to_device(self.delays) self.delays = cuda.to_device(self.delays)
self.simctl_int = cuda.to_device(self.simctl_int) self.simctl_int = cuda.to_device(self.simctl_int)
self.abuf = cuda.to_device(self.abuf) self.abuf = cuda.to_device(self.abuf)
self.e = cuda.to_device(self.e)
self.error_counts = cuda.to_device(self.error_counts)
self.lsts = cuda.to_device(self.lsts)
self.overflows = cuda.to_device(self.overflows)
self.aux = cuda.to_device(np.zeros(8*1024, dtype=np.int32))
self._block_dim = (32, 16) self._block_dim = (32, 16)
def __getstate__(self): def __getstate__(self):
state = self.__dict__.copy() state = self.__dict__.copy()
del state['c'] state['c'] = np.array(self.c)
state['s'] = np.array(self.s) state['s'] = np.array(self.s)
state['ops'] = np.array(self.ops) state['ops'] = np.array(self.ops)
state['c_locs'] = np.array(self.c_locs) state['c_locs'] = np.array(self.c_locs)
@ -396,16 +358,11 @@ class WaveSimCuda(WaveSim):
state['delays'] = np.array(self.delays) state['delays'] = np.array(self.delays)
state['simctl_int'] = np.array(self.simctl_int) state['simctl_int'] = np.array(self.simctl_int)
state['abuf'] = np.array(self.abuf) state['abuf'] = np.array(self.abuf)
state['e'] = np.array(self.e)
state['error_counts'] = np.array(self.error_counts)
state['lsts'] = np.array(self.lsts)
state['overflows'] = np.array(self.overflows)
state['aux'] = np.array(self.aux)
return state return state
def __setstate__(self, state): def __setstate__(self, state):
self.__dict__.update(state) self.__dict__.update(state)
self.c = cuda.to_device(np.full((self.c_len, self.sims), TMAX, dtype=np.float32)) self.c = cuda.to_device(self.c)
self.s = cuda.to_device(self.s) self.s = cuda.to_device(self.s)
self.ops = cuda.to_device(self.ops) self.ops = cuda.to_device(self.ops)
self.c_locs = cuda.to_device(self.c_locs) self.c_locs = cuda.to_device(self.c_locs)
@ -413,11 +370,6 @@ class WaveSimCuda(WaveSim):
self.delays = cuda.to_device(self.delays) self.delays = cuda.to_device(self.delays)
self.simctl_int = cuda.to_device(self.simctl_int) self.simctl_int = cuda.to_device(self.simctl_int)
self.abuf = cuda.to_device(self.abuf) self.abuf = cuda.to_device(self.abuf)
self.e = cuda.to_device(self.e)
self.error_counts = cuda.to_device(self.error_counts)
self.lsts = cuda.to_device(self.lsts)
self.overflows = cuda.to_device(self.overflows)
self.aux = cuda.to_device(self.aux)
def s_to_c(self): def s_to_c(self):
grid_dim = self._grid_dim(self.sims, self.s_len) grid_dim = self._grid_dim(self.sims, self.s_len)
@ -425,24 +377,14 @@ class WaveSimCuda(WaveSim):
def _grid_dim(self, x, y): return cdiv(x, self._block_dim[0]), cdiv(y, self._block_dim[1]) def _grid_dim(self, x, y): return cdiv(x, self._block_dim[0]), cdiv(y, self._block_dim[1])
def c_prop(self, sims=None, seed=1, op_from=0, op_to=None, delta=0): def c_prop(self, sims=None, seed=1):
sims = min(sims or self.sims, self.sims) sims = min(sims or self.sims, self.sims)
for op_start, op_stop in zip(self.level_starts, self.level_stops): for op_start, op_stop in zip(self.level_starts, self.level_stops):
if op_from > op_start: continue
if op_to is not None and op_to <= op_start: break
grid_dim = self._grid_dim(sims, op_stop - op_start) grid_dim = self._grid_dim(sims, op_stop - op_start)
wave_eval_gpu[grid_dim, self._block_dim](self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, self.e, self.abuf, int(0), wave_eval_gpu[grid_dim, self._block_dim](self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, self.abuf, int(0),
sims, self.delays, self.simctl_int, seed, delta) sims, self.delays, self.simctl_int, seed)
cuda.synchronize() cuda.synchronize()
def c_prop_level(self, level, sims=None, seed=1, delta=0):
sims = min(sims or self.sims, self.sims)
op_start = self.level_starts[level]
op_stop = self.level_stops[level]
grid_dim = self._grid_dim(sims, op_stop - op_start)
wave_eval_gpu[grid_dim, self._block_dim](self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, self.e, self.abuf, int(0),
sims, self.delays, self.simctl_int, seed, delta)
def c_to_s(self, time=TMAX, sd=0.0, seed=1): def c_to_s(self, time=TMAX, sd=0.0, seed=1):
grid_dim = self._grid_dim(self.sims, self.s_len) grid_dim = self._grid_dim(self.sims, self.s_len)
wave_capture_gpu[grid_dim, self._block_dim](self.c, self.s, self.c_locs, self.c_caps, self.ppo_offset, wave_capture_gpu[grid_dim, self._block_dim](self.c, self.s, self.c_locs, self.c_caps, self.ppo_offset,
@ -452,77 +394,6 @@ class WaveSimCuda(WaveSim):
grid_dim = self._grid_dim(self.sims, self.s_len) grid_dim = self._grid_dim(self.sims, self.s_len)
ppo_to_ppi_gpu[grid_dim, self._block_dim](self.s, self.c_locs, time, self.ppi_offset, self.ppo_offset) ppo_to_ppi_gpu[grid_dim, self._block_dim](self.s, self.c_locs, time, self.ppi_offset, self.ppo_offset)
def acc_error_counts(self, sims=None):
sims = min(sims or self.sims, self.sims)
grid_dim = cdiv(self.s_len, 256)
acc_error_counts_gpu[grid_dim, 256](self.s, sims, self.error_counts)
def reset_error_counts(self):
self.error_counts[:] = 0
def get_error_counts(self):
return np.array(self.error_counts)
def acc_overflows(self, sims=None):
sims = min(sims or self.sims, self.sims)
grid_dim = cdiv(self.s_len, 256)
acc_overflows_gpu[grid_dim, 256](self.s, sims, self.overflows)
def reset_overflows(self):
self.overflows[:] = 0
def get_overflows(self):
return np.array(self.overflows)
def acc_lsts(self, sims=None):
sims = min(sims or self.sims, self.sims)
grid_dim = cdiv(self.s_len, 256)
acc_lsts_gpu[grid_dim, 256](self.s, sims, self.lsts)
def reset_lsts(self):
self.lsts[:] = 0.0
def get_lsts(self):
return np.array(self.lsts)
@cuda.jit()
def memcpy_gpu (src, dst, nitems):
tid = cuda.grid(1)
stride = cuda.gridDim.x * cuda.blockDim.x
for i in range(tid, nitems, stride):
dst.flat[i] = src.flat[i]
@cuda.jit()
def acc_error_counts_gpu(s, sims, error_counts):
x = cuda.grid(1)
if x >= s.shape[1]: return
cnt = 0
for i in range(sims):
cnt += (s[6,x,i] != s[8,x,i])
error_counts[x] += cnt
@cuda.jit()
def acc_overflows_gpu(s, sims, overflows):
x = cuda.grid(1)
if x >= s.shape[1]: return
cnt = 0
for i in range(sims):
cnt += s[10,x,i]
overflows[x] += cnt
@cuda.jit()
def acc_lsts_gpu(s, sims, lsts):
x = cuda.grid(1)
if x >= s.shape[1]: return
lst = 0
for i in range(sims):
lst = max(lst, s[5,x,i])
lsts[x] = max(lsts[x], lst)
@cuda.jit() @cuda.jit()
def wave_assign_gpu(c, s, c_locs, ppi_offset): def wave_assign_gpu(c, s, c_locs, ppi_offset):
@ -552,7 +423,7 @@ _wave_eval_gpu = cuda.jit(_wave_eval, device=True)
@cuda.jit() @cuda.jit()
def wave_eval_gpu(ops, op_start, op_stop, cbuf, c_locs, c_caps, ebuf, abuf, sim_start, sim_stop, delays, simctl_int, seed, delta): def wave_eval_gpu(ops, op_start, op_stop, cbuf, c_locs, c_caps, abuf, sim_start, sim_stop, delays, simctl_int, seed):
x, y = cuda.grid(2) x, y = cuda.grid(2)
sim = sim_start + x sim = sim_start + x
op_idx = op_start + y op_idx = op_start + y
@ -564,7 +435,7 @@ def wave_eval_gpu(ops, op_start, op_stop, cbuf, c_locs, c_caps, ebuf, abuf, sim_
a_wr = op[7] a_wr = op[7]
a_wf = op[8] a_wf = op[8]
nrise, nfall = _wave_eval_gpu(op, cbuf, c_locs, c_caps, ebuf, sim, delays, simctl_int[:, sim], seed, delta) nrise, nfall = _wave_eval_gpu(op, cbuf, c_locs, c_caps, sim, delays, simctl_int[:, sim], seed)
# accumulate WSA into abuf # accumulate WSA into abuf
if a_loc >= 0: if a_loc >= 0:

BIN
tests/b15_4ig.sa_rf.stil.gz

Binary file not shown.

36
tests/conftest.py

@ -13,44 +13,8 @@ def b15_2ig_circuit(mydir):
from kyupy.techlib import SAED32 from kyupy.techlib import SAED32
return verilog.load(mydir / 'b15_2ig.v.gz', branchforks=True, tlib=SAED32) return verilog.load(mydir / 'b15_2ig.v.gz', branchforks=True, tlib=SAED32)
@pytest.fixture(scope='session')
def b15_2ig_circuit_resolved(b15_2ig_circuit):
from kyupy.techlib import SAED32
cr = b15_2ig_circuit.copy()
cr.resolve_tlib_cells(SAED32)
return cr
@pytest.fixture(scope='session')
def b15_4ig_circuit(mydir):
from kyupy import verilog
from kyupy.techlib import SAED32
return verilog.load(mydir / 'b15_4ig.v.gz', branchforks=True, tlib=SAED32)
@pytest.fixture(scope='session')
def b15_4ig_circuit_resolved(b15_4ig_circuit):
from kyupy.techlib import SAED32
cr = b15_4ig_circuit.copy()
cr.resolve_tlib_cells(SAED32)
return cr
@pytest.fixture(scope='session') @pytest.fixture(scope='session')
def b15_2ig_delays(mydir, b15_2ig_circuit): def b15_2ig_delays(mydir, b15_2ig_circuit):
from kyupy import sdf from kyupy import sdf
from kyupy.techlib import SAED32 from kyupy.techlib import SAED32
return sdf.load(mydir / 'b15_2ig.sdf.gz').iopaths(b15_2ig_circuit, tlib=SAED32)[1:2] return sdf.load(mydir / 'b15_2ig.sdf.gz').iopaths(b15_2ig_circuit, tlib=SAED32)[1:2]
@pytest.fixture(scope='session')
def b15_2ig_sa_nf_test_resp(mydir, b15_2ig_circuit_resolved):
from kyupy import stil
s = stil.load(mydir / 'b15_2ig.sa_nf.stil.gz')
tests = s.tests(b15_2ig_circuit_resolved)[:,1:]
resp = s.responses(b15_2ig_circuit_resolved)[:,1:]
return (tests, resp)
@pytest.fixture(scope='session')
def b15_4ig_sa_rf_test_resp(mydir, b15_4ig_circuit_resolved):
from kyupy import stil
s = stil.load(mydir / 'b15_4ig.sa_rf.stil.gz')
tests = s.tests(b15_4ig_circuit_resolved)[:,1:]
resp = s.responses(b15_4ig_circuit_resolved)[:,1:]
return (tests, resp)

39
tests/gates.sdf

@ -7,49 +7,22 @@
(TEMPERATURE 25.00:25.00:25.00) (TEMPERATURE 25.00:25.00:25.00)
(TIMESCALE 1ns) (TIMESCALE 1ns)
(CELL (CELL
(CELLTYPE "NAND2_X1") (CELLTYPE "NAND2X1")
(INSTANCE nandgate) (INSTANCE nandgate)
(DELAY (DELAY
(ABSOLUTE (ABSOLUTE
(IOPATH A1 ZN (0.099:0.103:0.103) (0.122:0.127:0.127)) (IOPATH IN1 QN (0.099:0.103:0.103) (0.122:0.127:0.127))
(IOPATH A2 ZN (0.083:0.086:0.086) (0.100:0.104:0.104)) (IOPATH IN2 QN (0.083:0.086:0.086) (0.100:0.104:0.104))
) )
) )
) )
(CELL (CELL
(CELLTYPE "AND2_X1") (CELLTYPE "AND2X1")
(INSTANCE andgate) (INSTANCE andgate)
(DELAY (DELAY
(ABSOLUTE (ABSOLUTE
(IOPATH A1 ZN (0.367:0.378:0.378) (0.351:0.377:0.377)) (IOPATH IN1 Q (0.367:0.378:0.378) (0.351:0.377:0.377))
(IOPATH A2 ZN (0.366:0.375:0.375) (0.359:0.370:0.370)) (IOPATH IN2 Q (0.366:0.375:0.375) (0.359:0.370:0.370))
)
)
)
(CELL
(CELLTYPE "OAI21_X1")
(INSTANCE oai21gate)
(DELAY
(ABSOLUTE
(IOPATH B1 ZN (0.000:0.025:0.025) (0.000:0.013:0.013))
(IOPATH B2 ZN (0.000:0.030:0.030) (0.000:0.016:0.016))
( COND (B1 == 1'b0) && (B2 == 1'b1) (IOPATH A ZN (0.000:0.018:0.018)))
(COND (B1 == 1'b1) && (B2 == 1'b0) (IOPATH A ZN (0.000:0.018:0.018) (0.000:0.016:0.016)))
(COND (B1 == 1'b1) && (B2 == 1'b1) (IOPATH A ZN (0.000:0.019:0.019) (0.000:0.014:0.014)))
)
)
)
(CELL
(CELLTYPE "MUX2_X1")
(INSTANCE mux2gate)
(DELAY
(ABSOLUTE
(COND (B == 1'b0) && (S == 1'b0) (IOPATH A Z (0.000:0.037:0.037) (0.000:0.058:0.058)))
(COND (B == 1'b1) && (S == 1'b0) (IOPATH A Z (0.000:0.037:0.037) (0.000:0.058:0.058)))
(COND (A == 1'b0) && (S == 1'b1) (IOPATH B Z (0.000:0.035:0.035) (0.000:0.056:0.056)))
(COND (A == 1'b1) && (S == 1'b1) (IOPATH B Z (0.000:0.035:0.035) (0.000:0.056:0.056)))
(COND (A == 1'b0) && (B == 1'b1) (IOPATH S Z (0.000:0.047:0.047) (0.000:0.073:0.073)))
(COND (A == 1'b1) && (B == 1'b0) (IOPATH S Z (0.000:0.072:0.072) (0.000:0.064:0.064)))
) )
) )
) )

12
tests/gates.v

@ -1,15 +1,11 @@
module gates (a, b, c, o0, o1, o2, o3 ); module gates (a, b, o0, o1 );
input a; input a;
input b; input b;
input c;
output o0; output o0;
output o1; output o1;
output o2;
output o3;
AND2_X1 andgate (.A1 ( a ) , .A2 ( b ) , .ZN ( o0 ) ) ; AND2X1 andgate (.IN1 ( a ) , .IN2 ( b ) , .Q ( o0 ) ) ;
NAND2_X1 nandgate (.A1 ( a ) , .A2 ( b ) , .ZN ( o1 ) ) ; NAND2X1 nandgate (.IN1 ( a ) , .IN2 ( b ) , .QN ( o1 ) ) ;
OAI21_X1 oai21gate (.B1(a), .B2(b), .A(c), .ZN(o2) ) ;
MUX2_X1 mux2gate (.A(a), .B(b), .S(c), .Z(o3)) ;
endmodule endmodule

23
tests/test_circuit.py

@ -1,30 +1,9 @@
import pickle import pickle
from kyupy.circuit import GrowingList, Circuit, Node, Line from kyupy.circuit import Circuit, Node, Line
from kyupy import verilog, bench from kyupy import verilog, bench
from kyupy.techlib import SAED32 from kyupy.techlib import SAED32
def test_growing_list():
gl = GrowingList()
assert gl.free_idx == 0
gl[0] = 1
assert gl.free_idx == 1
gl[2] = 1
assert gl.free_idx == 1
gl[0] = None
assert gl.free_idx == 0
gl[0] = 1
assert gl.free_idx == 1
gl[1] = 1
assert gl.free_idx == 3
gl.append(1)
assert gl.free_idx == 4
gl[2] = None
assert gl.free_idx == 2
gl[2] = 1
gl[1] = None
assert gl.free_idx == 1
def test_lines(): def test_lines():
c = Circuit() c = Circuit()
n1 = Node(c, 'n1') n1 = Node(c, 'n1')

87
tests/test_logic_sim.py

@ -1,6 +1,6 @@
import numpy as np import numpy as np
from kyupy.logic_sim import LogicSim, LogicSim6V from kyupy.logic_sim import LogicSim
from kyupy import bench, logic, sim from kyupy import bench, logic, sim
from kyupy.logic import mvarray, bparray, bp_to_mv, mv_to_bp from kyupy.logic import mvarray, bparray, bp_to_mv, mv_to_bp
@ -94,30 +94,6 @@ def test_4v():
'--0XX', '--X1X', '--XXX', '--XXX')) '--0XX', '--X1X', '--XXX', '--XXX'))
def test_6v():
c = bench.parse('input(x, y) output(a, o, n, xo, no) a=AND2(x,y) o=OR2(x,y) n=INV1(x) xo=XOR2(x,y) no=NOR2(x,y)')
s = LogicSim6V(c, 36)
assert s.s_len == 7
mva = mvarray(
'0000101', '0101110', '0R0R1RF', '0F0F1FR', '0P0P1PN', '0N0N1NP',
'1001010', '1111000', '1RR10F0', '1FF10R0', '1PP10N0', '1NN10P0',
'R00RFRF', 'R1R1FF0', 'RRRRFPF', 'RFPNFNP', 'RPPRFRF', 'RNRNFFP',
'F00FRFR', 'F1F1RR0', 'FRPNRNP', 'FFFFRPR', 'FPPFRFR', 'FNFNRRP',
'P00PNPN', 'P1P1NN0', 'PRPRNRF', 'PFPFNFR', 'PPPPNPN', 'PNPNNNP',
'N00NPNP', 'N1N1PP0', 'NRRNPFP', 'NFFNPRP', 'NPPNPNP', 'NNNNPPP')
tests = np.copy(mva)
tests[2:] = logic.ZERO
s.s[0] = tests
s.s_to_c()
s.c_prop()
s.c_to_s()
resp = s.s[1].copy()
exp_resp = np.copy(mva)
exp_resp[:2] = logic.ZERO
np.testing.assert_allclose(resp, exp_resp)
def test_8v(): def test_8v():
c = bench.parse('input(x, y) output(a, o, n, xo) a=and(x,y) o=or(x,y) n=not(x) xo=xor(x,y)') c = bench.parse('input(x, y) output(a, o, n, xo) a=and(x,y) o=or(x,y) n=not(x) xo=xor(x,y)')
s = LogicSim(c, 64, m=8) s = LogicSim(c, 64, m=8)
@ -197,64 +173,3 @@ def test_b01(mydir):
s.c_prop() s.c_prop()
s.c_to_s() s.c_to_s()
bp_to_mv(s.s[1]) bp_to_mv(s.s[1])
def sim_and_compare(c, test_resp, m=8):
tests, resp = test_resp
lsim = LogicSim(c, m=m, sims=tests.shape[1])
lsim.s[0] = logic.mv_to_bp(tests)
lsim.s_to_c()
lsim.c_prop()
lsim.c_to_s()
resp_sim = logic.bp_to_mv(lsim.s[1])[:,:tests.shape[1]]
idxs, pats = np.nonzero(((resp == logic.ONE) & (resp_sim != logic.ONE)) | ((resp == logic.ZERO) & (resp_sim != logic.ZERO)))
for i, (idx, pat) in enumerate(zip(idxs, pats)):
if i >= 10:
print(f'...')
break
print(f'mismatch pattern:{pat} ppio:{idx} exp:{logic.mv_str(resp[idx,pat])} act:{logic.mv_str(resp_sim[idx,pat])}')
assert len(idxs) == 0
def sim_and_compare_6v(c, test_resp):
tests, resp = test_resp
lsim = LogicSim6V(c, sims=tests.shape[1])
lsim.s[0] = tests
lsim.s_to_c()
lsim.c_prop()
lsim.c_to_s()
resp_sim = lsim.s[1]
idxs, pats = np.nonzero(((resp == logic.ONE) & (resp_sim != logic.ONE)) | ((resp == logic.ZERO) & (resp_sim != logic.ZERO)))
for i, (idx, pat) in enumerate(zip(idxs, pats)):
if i >= 10:
print(f'...')
break
print(f'mismatch pattern:{pat} ppio:{idx} exp:{logic.mv_str(resp[idx,pat])} act:{logic.mv_str(resp_sim[idx,pat])}')
assert len(idxs) == 0
def test_b15_2ig_sa_2v(b15_2ig_circuit_resolved, b15_2ig_sa_nf_test_resp):
sim_and_compare(b15_2ig_circuit_resolved, b15_2ig_sa_nf_test_resp, m=2)
def test_b15_2ig_sa_4v(b15_2ig_circuit_resolved, b15_2ig_sa_nf_test_resp):
sim_and_compare(b15_2ig_circuit_resolved, b15_2ig_sa_nf_test_resp, m=4)
def test_b15_2ig_sa_6v(b15_2ig_circuit_resolved, b15_2ig_sa_nf_test_resp):
sim_and_compare_6v(b15_2ig_circuit_resolved, b15_2ig_sa_nf_test_resp)
def test_b15_2ig_sa_8v(b15_2ig_circuit_resolved, b15_2ig_sa_nf_test_resp):
sim_and_compare(b15_2ig_circuit_resolved, b15_2ig_sa_nf_test_resp, m=8)
def test_b15_4ig_sa_2v(b15_4ig_circuit_resolved, b15_4ig_sa_rf_test_resp):
sim_and_compare(b15_4ig_circuit_resolved, b15_4ig_sa_rf_test_resp, m=2)
def test_b15_4ig_sa_4v(b15_4ig_circuit_resolved, b15_4ig_sa_rf_test_resp):
sim_and_compare(b15_4ig_circuit_resolved, b15_4ig_sa_rf_test_resp, m=4)
def test_b15_4ig_sa_8v(b15_4ig_circuit_resolved, b15_4ig_sa_rf_test_resp):
sim_and_compare(b15_4ig_circuit_resolved, b15_4ig_sa_rf_test_resp, m=8)

6
tests/test_sdf.py

@ -2,7 +2,7 @@ import numpy as np
from kyupy import sdf, verilog, bench from kyupy import sdf, verilog, bench
from kyupy.wave_sim import WaveSim, TMAX, TMIN from kyupy.wave_sim import WaveSim, TMAX, TMIN
from kyupy.techlib import SAED32, NANGATE45 from kyupy.techlib import SAED32, SAED90
def test_parse(): def test_parse():
test = ''' test = '''
@ -80,9 +80,9 @@ def test_b15(mydir):
def test_gates(mydir): def test_gates(mydir):
c = verilog.load(mydir / 'gates.v', tlib=NANGATE45) c = verilog.load(mydir / 'gates.v', tlib=SAED90)
df = sdf.load(mydir / 'gates.sdf') df = sdf.load(mydir / 'gates.sdf')
lt = df.iopaths(c, tlib=NANGATE45)[1] lt = df.iopaths(c, tlib=SAED90)[1]
nand_a = c.cells['nandgate'].ins[0] nand_a = c.cells['nandgate'].ins[0]
nand_b = c.cells['nandgate'].ins[1] nand_b = c.cells['nandgate'].ins[1]
and_a = c.cells['andgate'].ins[0] and_a = c.cells['andgate'].ins[0]

12
tests/test_verilog.py

@ -1,5 +1,5 @@
from kyupy import verilog from kyupy import verilog
from kyupy.techlib import SAED90, SAED32, NANGATE45 from kyupy.techlib import SAED90, SAED32
def test_b01(mydir): def test_b01(mydir):
with open(mydir / 'b01.v', 'r') as f: with open(mydir / 'b01.v', 'r') as f:
@ -26,12 +26,12 @@ def test_b15(mydir):
def test_gates(mydir): def test_gates(mydir):
c = verilog.load(mydir / 'gates.v', tlib=NANGATE45) c = verilog.load(mydir / 'gates.v', tlib=SAED90)
assert len(c.nodes) == 18 assert len(c.nodes) == 10
assert len(c.lines) == 21 assert len(c.lines) == 10
stats = c.stats stats = c.stats
assert stats['input'] == 3 assert stats['input'] == 2
assert stats['output'] == 4 assert stats['output'] == 2
assert stats['__seq__'] == 0 assert stats['__seq__'] == 0

60
tests/test_wave_sim.py

@ -5,56 +5,22 @@ from kyupy.logic_sim import LogicSim
from kyupy import logic, bench, sim from kyupy import logic, bench, sim
from kyupy.logic import mvarray from kyupy.logic import mvarray
def test_xnor2_delays():
op = (sim.XNOR2, 2, 0, 1, 3, 3, -1, 0, 0)
#op = (0b0111, 4, 0, 1)
c = np.full((4*16, 1), TMAX, dtype=np.float32) # 4 waveforms of capacity 16
c_locs = np.zeros((4,), dtype='int')
c_caps = np.zeros((4,), dtype='int')
ebuf = np.zeros((4, 1, 2), dtype=np.int32)
for i in range(4): c_locs[i], c_caps[i] = i*16, 16 # 1:1 mapping
delays = np.zeros((1, 4, 2, 2))
delays[0, 0, 0, 0] = 0.031 # A rise -> Z rise
delays[0, 0, 0, 1] = 0.027 # A rise -> Z fall
delays[0, 0, 1, 0] = 0.033 # A fall -> Z rise
delays[0, 0, 1, 1] = 0.037 # A fall -> Z fall
delays[0, 1, 0, 0] = 0.032 # B rise -> Z rise
delays[0, 1, 0, 1] = 0.030 # B rise -> Z fall
delays[0, 1, 1, 0] = 0.038 # B fall -> Z rise
delays[0, 1, 1, 1] = 0.036 # B fall -> Z fall
simctl_int = np.asarray([0], dtype=np.int32)
def wave_assert(inputs, output):
for i, a in zip(inputs, c.reshape(-1,16)): a[:len(i)] = i
wave_eval_cpu(op, c, c_locs, c_caps, ebuf, 0, delays, simctl_int, 0, 0)
for i, v in enumerate(output): np.testing.assert_allclose(c.reshape(-1,16)[2,i], v)
wave_assert([[TMIN,TMAX],[TMIN,TMAX]], [TMIN,TMAX]) # XNOR(1,1) => 1
wave_assert([[TMAX,TMAX],[TMIN,TMAX]], [TMAX]) # XNOR(0,1) => 0
# using Afall/Zfall for pulse length, bug: was using Arise/Zfall
#wave_assert([[0.07, 0.10, TMAX], [0.0, TMAX]], [TMIN, 0.03, 0.101, 0.137, TMAX])
wave_assert([[0.07, 0.10, TMAX], [0.0, TMAX]], [TMIN, 0.03, TMAX])
wave_assert([[0.06, 0.10, TMAX], [0.0, TMAX]], [TMIN, 0.03, 0.091, 0.137, TMAX])
def test_nand_delays(): def test_nand_delays():
op = (sim.NAND4, 4, 0, 1, 2, 3, -1, 0, 0) op = (sim.NAND4, 4, 0, 1, 2, 3, -1, 0, 0)
#op = (0b0111, 4, 0, 1) #op = (0b0111, 4, 0, 1)
c = np.full((5*16, 1), TMAX, dtype=np.float32) # 5 waveforms of capacity 16 c = np.full((5*16, 1), TMAX) # 5 waveforms of capacity 16
c_locs = np.zeros((5,), dtype='int') c_locs = np.zeros((5,), dtype='int')
c_caps = np.zeros((5,), dtype='int') c_caps = np.zeros((5,), dtype='int')
ebuf = np.zeros((4, 1, 2), dtype=np.int32)
for i in range(5): c_locs[i], c_caps[i] = i*16, 16 # 1:1 mapping for i in range(5): c_locs[i], c_caps[i] = i*16, 16 # 1:1 mapping
# SDF specifies IOPATH delays with respect to output polarity # SDF specifies IOPATH delays with respect to output polarity
# SDF pulse rejection value is determined by IOPATH causing last transition and polarity of last transition # SDF pulse rejection value is determined by IOPATH causing last transition and polarity of last transition
delays = np.zeros((1, 5, 2, 2)) delays = np.zeros((1, 5, 2, 2))
delays[0, 0, 0, 0] = 0.1 # A rise -> Z rise delays[0, 0, 0, 0] = 0.1 # A -> Z rise delay
delays[0, 0, 0, 1] = 0.2 # A rise -> Z fall delays[0, 0, 0, 1] = 0.2 # A -> Z fall delay
delays[0, 0, 1, 0] = 0.1 # A fall -> Z rise delays[0, 0, 1, 0] = 0.1 # A -> Z negative pulse limit (terminate in rising Z)
delays[0, 0, 1, 1] = 0.2 # A fall -> Z fall delays[0, 0, 1, 1] = 0.2 # A -> Z positive pulse limit
delays[0, 1, :, 0] = 0.3 # as above for B -> Z delays[0, 1, :, 0] = 0.3 # as above for B -> Z
delays[0, 1, :, 1] = 0.4 delays[0, 1, :, 1] = 0.4
delays[0, 2, :, 0] = 0.5 # as above for C -> Z delays[0, 2, :, 0] = 0.5 # as above for C -> Z
@ -66,7 +32,7 @@ def test_nand_delays():
def wave_assert(inputs, output): def wave_assert(inputs, output):
for i, a in zip(inputs, c.reshape(-1,16)): a[:len(i)] = i for i, a in zip(inputs, c.reshape(-1,16)): a[:len(i)] = i
wave_eval_cpu(op, c, c_locs, c_caps, ebuf, 0, delays, simctl_int, 0, 0) wave_eval_cpu(op, c, c_locs, c_caps, 0, delays, simctl_int)
for i, v in enumerate(output): np.testing.assert_allclose(c.reshape(-1,16)[4,i], v) for i, v in enumerate(output): np.testing.assert_allclose(c.reshape(-1,16)[4,i], v)
wave_assert([[TMAX,TMAX],[TMAX,TMAX],[TMIN,TMAX],[TMIN,TMAX]], [TMIN,TMAX]) # NAND(0,0,1,1) => 1 wave_assert([[TMAX,TMAX],[TMAX,TMAX],[TMIN,TMAX],[TMIN,TMAX]], [TMIN,TMAX]) # NAND(0,0,1,1) => 1
@ -179,7 +145,7 @@ def compare_to_logic_sim(wsim: WaveSim):
lsim.s_to_c() lsim.s_to_c()
lsim.c_prop() lsim.c_prop()
lsim.c_to_s() lsim.c_to_s()
exp = logic.bp_to_mv(lsim.s[1])[:,:tests.shape[-1]] exp = logic.bp_to_mv(lsim.s[1])
resp[resp == logic.PPULSE] = logic.ZERO resp[resp == logic.PPULSE] = logic.ZERO
resp[resp == logic.NPULSE] = logic.ONE resp[resp == logic.NPULSE] = logic.ONE
@ -190,13 +156,13 @@ def compare_to_logic_sim(wsim: WaveSim):
np.testing.assert_allclose(resp, exp) np.testing.assert_allclose(resp, exp)
def test_b15(b15_2ig_circuit_resolved, b15_2ig_delays): def test_b15(b15_2ig_circuit, b15_2ig_delays):
compare_to_logic_sim(WaveSim(b15_2ig_circuit_resolved, b15_2ig_delays, 8)) compare_to_logic_sim(WaveSim(b15_2ig_circuit, b15_2ig_delays, 8))
def test_b15_strip_forks(b15_2ig_circuit_resolved, b15_2ig_delays): def test_b15_strip_forks(b15_2ig_circuit, b15_2ig_delays):
compare_to_logic_sim(WaveSim(b15_2ig_circuit_resolved, b15_2ig_delays, 8, strip_forks=True)) compare_to_logic_sim(WaveSim(b15_2ig_circuit, b15_2ig_delays, 8, strip_forks=True))
def test_b15_cuda(b15_2ig_circuit_resolved, b15_2ig_delays): def test_b15_cuda(b15_2ig_circuit, b15_2ig_delays):
compare_to_logic_sim(WaveSimCuda(b15_2ig_circuit_resolved, b15_2ig_delays, 8, strip_forks=True)) compare_to_logic_sim(WaveSimCuda(b15_2ig_circuit, b15_2ig_delays, 8, strip_forks=True))

Loading…
Cancel
Save