Compare commits

..

No commits in common. 'devel' and 'main' have entirely different histories.
devel ... main

  1. 2
      docs/conf.py
  2. 2
      setup.py
  3. 18
      src/kyupy/__init__.py
  4. 97
      src/kyupy/circuit.py
  5. 18
      src/kyupy/logic.py
  6. 162
      src/kyupy/logic_sim.py
  7. 37
      src/kyupy/sdf.py
  8. 132
      src/kyupy/sim.py
  9. 32
      src/kyupy/stil.py
  10. 168
      src/kyupy/techlib.py
  11. 9
      src/kyupy/verilog.py
  12. 173
      src/kyupy/wave_sim.py
  13. BIN
      tests/b15_4ig.sa_rf.stil.gz
  14. 36
      tests/conftest.py
  15. 39
      tests/gates.sdf
  16. 12
      tests/gates.v
  17. 23
      tests/test_circuit.py
  18. 87
      tests/test_logic_sim.py
  19. 6
      tests/test_sdf.py
  20. 12
      tests/test_verilog.py
  21. 60
      tests/test_wave_sim.py

2
docs/conf.py

@ -24,7 +24,7 @@ copyright = '2020-2023, Stefan Holst' @@ -24,7 +24,7 @@ copyright = '2020-2023, Stefan Holst'
author = 'Stefan Holst'
# The full version, including alpha/beta/rc tags
release = '0.0.5'
release = '0.0.4'
# -- General configuration ---------------------------------------------------

2
setup.py

@ -5,7 +5,7 @@ with open('README.rst', 'r') as f: @@ -5,7 +5,7 @@ with open('README.rst', 'r') as f:
setup(
name='kyupy',
version='0.0.5',
version='0.0.4',
description='High-performance processing and analysis of non-hierarchical VLSI designs',
long_description=long_description,
long_description_content_type='text/x-rst',

18
src/kyupy/__init__.py

@ -57,18 +57,6 @@ def hr_bytes(nbytes): @@ -57,18 +57,6 @@ def hr_bytes(nbytes):
multiplier += 1
return f'{nbytes:.1f}{["", "ki", "Mi", "Gi", "Ti", "Pi"][multiplier]}B'
def eng(number):
"""Formats a given number using engineering notation."""
exponent = 0
if abs(number) < 1:
while abs(number) >= 1000:
number *= 1000
exponent -= 3
else:
while abs(number) >= 1000:
number /= 1000
exponent += 3
return f'{number:.0f}' + (f'e{exponent}' if exponent != 0 else '')
def hr_time(seconds):
"""Formats a given time interval for human readability."""
@ -150,10 +138,10 @@ class Log: @@ -150,10 +138,10 @@ class Log:
self._limit = limit
def stop_limit(self):
self._limit = -1
if self.filtered > 0:
self.info(f'{self.filtered} more messages (filtered).')
log.info(f'{self.filtered} more messages (filtered).')
self.filtered = 0
self._limit = -1
def __getstate__(self):
return {'elapsed': time.perf_counter() - self.start}
@ -161,8 +149,6 @@ class Log: @@ -161,8 +149,6 @@ class Log:
def __setstate__(self, state):
self.logfile = sys.stdout
self.indent = 0
self._limit = -1
self.filtered = 0
self.start = time.perf_counter() - state['elapsed']
def write(self, s, indent=0):

97
src/kyupy/circuit.py

@ -10,40 +10,20 @@ Circuit graphs also define an ordering of inputs, outputs and other nodes to eas @@ -10,40 +10,20 @@ Circuit graphs also define an ordering of inputs, outputs and other nodes to eas
"""
from __future__ import annotations
from collections import deque, defaultdict
import re
from typing import Union
import numpy as np
class GrowingList(list):
def __setitem__(self, index, value):
if value is None: self.has_nones = True
if index == len(self): return super().append(value)
if index > len(self):
super().extend([None] * (index + 1 - len(self)))
self.has_nones = True
if index >= len(self):
self.extend([None] * (index + 1 - len(self)))
super().__setitem__(index, value)
def __getitem__(self, index):
if isinstance(index, slice): return super().__getitem__(index)
return super().__getitem__(index) if index < len(self) else None
@property
def free_idx(self):
fi = len(self)
if hasattr(self, 'has_nones') and self.has_nones:
fi = next((i for i, x in enumerate(self) if x is None), len(self))
self.has_nones = fi < len(self)
return fi
def without_nones(self):
for item in self:
if item is not None:
yield item
def free_index(self):
return next((i for i, x in enumerate(self) if x is None), len(self))
class IndexList(list):
@ -96,10 +76,10 @@ class Node: @@ -96,10 +76,10 @@ class Node:
by allocating an array or list :code:`my_data` of length :code:`len(n.circuit.nodes)` and
accessing it by :code:`my_data[n.index]` or simply by :code:`my_data[n]`.
"""
self.ins: GrowingList[Line] = GrowingList()
self.ins = GrowingList()
"""A list of input connections (:class:`Line` objects).
"""
self.outs: GrowingList[Line] = GrowingList()
self.outs = GrowingList()
"""A list of output connections (:class:`Line` objects).
"""
@ -155,7 +135,7 @@ class Line: @@ -155,7 +135,7 @@ class Line:
Use the explicit case only if connections to specific pins are required.
It may overwrite any previous line references in the connection list of the nodes.
"""
def __init__(self, circuit: Circuit, driver: Union[Node, tuple[Node, int]], reader: Union[Node, tuple[Node, int]]):
def __init__(self, circuit, driver, reader):
self.circuit = circuit
"""The :class:`Circuit` object the line is part of.
"""
@ -167,7 +147,7 @@ class Line: @@ -167,7 +147,7 @@ class Line:
by allocating an array or list :code:`my_data` of length :code:`len(l.circuit.lines)` and
accessing it by :code:`my_data[l.index]` or simply by :code:`my_data[l]`.
"""
if not isinstance(driver, tuple): driver = (driver, driver.outs.free_idx)
if not isinstance(driver, tuple): driver = (driver, driver.outs.free_index())
self.driver = driver[0]
"""The :class:`Node` object that drives this line.
"""
@ -177,7 +157,7 @@ class Line: @@ -177,7 +157,7 @@ class Line:
This is the position in the list :py:attr:`Node.outs` of the driving node this line referenced from:
:code:`self.driver.outs[self.driver_pin] == self`.
"""
if not isinstance(reader, tuple): reader = (reader, reader.ins.free_idx)
if not isinstance(reader, tuple): reader = (reader, reader.ins.free_index())
self.reader = reader[0]
"""The :class:`Node` object that reads this line.
"""
@ -312,7 +292,7 @@ class Circuit: @@ -312,7 +292,7 @@ class Circuit:
def _locs(self, prefix, nodes):
d_top = dict()
for i, n in enumerate(nodes):
if m := re.match(fr'({re.escape(prefix)}.*?)((?:[\d_\[\]])*$)', n.name):
if m := re.match(fr'({prefix}.*?)((?:[\d_\[\]])*$)', n.name):
path = [m[1]] + [int(v) for v in re.split(r'[_\[\]]+', m[2]) if len(v) > 0]
d = d_top
for j in path[:-1]:
@ -354,16 +334,15 @@ class Circuit: @@ -354,16 +334,15 @@ class Circuit:
def get_or_add_fork(self, name):
return self.forks[name] if name in self.forks else Node(self, name)
def remove_dangling_nodes(self, root_node:Node, keep=[]):
def remove_dangling_nodes(self, root_node:Node):
if len([l for l in root_node.outs if l is not None]) > 0: return
lines = [l for l in root_node.ins if l is not None]
drivers = [l.driver for l in lines]
if root_node in keep: return
root_node.remove()
for l in lines:
l.remove()
for d in drivers:
self.remove_dangling_nodes(d, keep=keep)
self.remove_dangling_nodes(d)
def eliminate_1to1_forks(self):
"""Removes all forks that drive only one node.
@ -391,21 +370,6 @@ class Circuit: @@ -391,21 +370,6 @@ class Circuit:
in_line.reader_pin = out_reader_pin
in_line.reader.ins[in_line.reader_pin] = in_line
def remove_forks(self):
ios = set(self.io_nodes)
for n in list(self.forks.values()):
if n in ios: continue
d = None
if (l := n.ins[0]) is not None:
d = l.driver
l.remove()
for l in list(n.outs):
if l is None: continue
r, rp = l.reader, l.reader_pin
l.remove()
if d is not None: Line(self, d, (r, rp))
n.remove()
def substitute(self, node, impl):
"""Replaces a given node with the given implementation circuit.
@ -464,7 +428,7 @@ class Circuit: @@ -464,7 +428,7 @@ class Circuit:
for l, ll in zip(impl_out_lines, node_out_lines): # connect outputs
if ll is None:
if l.driver in node_map:
self.remove_dangling_nodes(node_map[l.driver], keep=ios)
self.remove_dangling_nodes(node_map[l.driver])
continue
if len(l.reader.outs) > 0: # output is also read by impl. circuit, connect to fork.
ll.driver = node_map[l.reader]
@ -483,21 +447,6 @@ class Circuit: @@ -483,21 +447,6 @@ class Circuit:
if n.kind in tlib.cells:
self.substitute(n, tlib.cells[n.kind][0])
def remove_constants(self):
c1gen = None
for n in self.nodes:
if n.kind == '__const0__': # just remove, unconnected inputs are defined 0.
for l in n.outs:
l.remove()
n.remove()
elif n.kind == '__const1__':
if c1gen is None: c1gen = Node(self, '__const1gen__', 'INV1') # one unique const 1 generator
for l in n.outs:
r, rp = l.reader, l.reader_pin
l.remove()
Line(self, c1gen, (r, rp))
n.remove()
def copy(self):
"""Returns a deep copy of the circuit.
"""
@ -552,15 +501,14 @@ class Circuit: @@ -552,15 +501,14 @@ class Circuit:
substrings 'dff' or 'latch' are yielded first.
"""
visit_count = np.zeros(len(self.nodes), dtype=np.uint32)
start = set(n for n in self.nodes if len(n.ins) == 0 or 'dff' in n.kind.lower() or 'latch' in n.kind.lower())
queue = deque(start)
queue = deque(n for n in self.nodes if len(n.ins) == 0 or 'dff' in n.kind.lower() or 'latch' in n.kind.lower())
while len(queue) > 0:
n = queue.popleft()
for line in n.outs:
if line is None: continue
succ = line.reader
visit_count[succ] += 1
if visit_count[succ] == len(succ.ins) and succ not in start:
if visit_count[succ] == len(succ.ins) and 'dff' not in succ.kind.lower() and 'latch' not in succ.kind.lower():
queue.append(succ)
yield n
@ -615,21 +563,6 @@ class Circuit: @@ -615,21 +563,6 @@ class Circuit:
if marks[n]:
yield n
def fanout(self, origin_nodes):
"""Generator function to iterate over the fan-out cone of a given list of origin nodes.
Nodes are yielded in topological order.
"""
marks = [False] * len(self.nodes)
for n in origin_nodes:
marks[n] = True
for n in self.topological_order():
if not marks[n]:
for line in n.ins.without_nones():
marks[n] |= marks[line.driver]
if marks[n]:
yield n
def fanout_free_regions(self):
for stem in self.reversed_topological_order():
if len(stem.outs) == 1 and 'dff' not in stem.kind.lower(): continue

18
src/kyupy/logic.py

@ -241,8 +241,6 @@ def mv_latch(d, t, q_prev, out=None): @@ -241,8 +241,6 @@ def mv_latch(d, t, q_prev, out=None):
def mv_transition(init, final, out=None):
"""Computes the logic transitions from the initial values of ``init`` to the final values of ``final``.
Pulses in the input data are ignored. If any of the inputs are ``UNKNOWN``, the result is ``UNKNOWN``.
If init is ``UNASSIGNED``, the result is the final value of ``final``.
If final is ``UNASSIGNED``, the result is the initial value of ``init``.
If both inputs are ``UNASSIGNED``, the result is ``UNASSIGNED``.
:param init: A multi-valued array.
@ -253,9 +251,7 @@ def mv_transition(init, final, out=None): @@ -253,9 +251,7 @@ def mv_transition(init, final, out=None):
out = out or np.empty(np.broadcast(init, final).shape, dtype=np.uint8)
out[...] = (init & 0b010) | (final & 0b001)
out[...] |= ((out << 1) ^ (out << 2)) & 0b100
out[...] = np.choose(init == UNASSIGNED, [out, (final & 0b001) * ONE])
out[...] = np.choose(final == UNASSIGNED, [out, ((init & 0b010) >> 1) * ONE])
unknown = (init == UNKNOWN) | (final == UNKNOWN)
unknown = (init == UNKNOWN) | (init == UNASSIGNED) | (final == UNKNOWN) | (final == UNASSIGNED)
unassigned = (init == UNASSIGNED) & (final == UNASSIGNED)
np.putmask(out, unknown, UNKNOWN)
np.putmask(out, unassigned, UNASSIGNED)
@ -269,18 +265,6 @@ def mv_to_bp(mva): @@ -269,18 +265,6 @@ def mv_to_bp(mva):
return np.packbits(unpackbits(mva)[...,:3], axis=-2, bitorder='little').swapaxes(-1,-2)
def mv_init(mva):
"""Returns the initial binary values for mva.
"""
return (mva>>1) & ((mva>>2)|mva) & 1
def mv_final(mva):
"""Returns the final binary value of mva.
"""
return mva & ((mva>>2)|(mva>>1)) & 1
def bparray(*a):
"""Converts (lists of) Boolean values or strings into a bit-parallel array.

162
src/kyupy/logic_sim.py

@ -10,10 +10,9 @@ import math @@ -10,10 +10,9 @@ import math
import numpy as np
from . import numba, logic, hr_bytes, sim, eng, cdiv
from . import numba, logic, hr_bytes, sim
from .circuit import Circuit
class LogicSim(sim.SimOps):
"""A bit-parallel naïve combinational simulator for 2-, 4-, or 8-valued logic.
@ -29,7 +28,7 @@ class LogicSim(sim.SimOps): @@ -29,7 +28,7 @@ class LogicSim(sim.SimOps):
self.m = m
self.mdim = math.ceil(math.log2(m))
self.sims = sims
nbytes = cdiv(sims, 8)
nbytes = (sims - 1) // 8 + 1
self.c = np.zeros((self.c_len, self.mdim, nbytes), dtype=np.uint8)
self.s = np.zeros((2, self.s_len, 3, nbytes), dtype=np.uint8)
@ -45,14 +44,14 @@ class LogicSim(sim.SimOps): @@ -45,14 +44,14 @@ class LogicSim(sim.SimOps):
self.s[:,:,1,:] = 255 # unassigned
def __repr__(self):
return f'{{name: "{self.circuit.name}", sims: {self.sims}, m: {self.m}, c_bytes: {eng(self.c.nbytes)}}}'
return f'{{name: "{self.circuit.name}", sims: {self.sims}, m: {self.m}, c_bytes: {self.c.nbytes}}}'
def s_to_c(self):
"""Copies the values from ``s[0]`` the inputs of the combinational portion.
"""
self.c[self.pippi_c_locs] = self.s[0, self.pippi_s_locs, :self.mdim]
def c_prop(self, sims=None, inject_cb=None, flip_line=-1, flip_mask=None):
def c_prop(self, inject_cb=None):
"""Propagate the input values through the combinational circuit towards the outputs.
Performs all logic operations in topological order.
@ -68,17 +67,10 @@ class LogicSim(sim.SimOps): @@ -68,17 +67,10 @@ class LogicSim(sim.SimOps):
t1 = self.c_locs[self.tmp2_idx]
if self.m == 2:
if inject_cb is None:
if flip_mask is None:
flip_mask = np.full(self.c.shape[-1], 255, dtype=np.uint8)
else:
if len(flip_mask) < self.c.shape[-1]:
flip_mask2 = np.full(self.c.shape[-1], 0, dtype=np.uint8)
flip_mask2[:len(flip_mask)] = flip_mask
flip_mask = flip_mask2
_prop_cpu(self.ops, self.c_locs, self.c, int(flip_line), flip_mask)
_prop_cpu(self.ops, self.c_locs, self.c)
else:
for op, o0l, i0l, i1l, i2l, i3l in self.ops[:,:6]:
o0, i0, i1, i2, i3 = [self.c_locs[x] for x in (o0l, i0l, i1l, i2l, i3l)]
for op, o0, i0, i1, i2, i3 in self.ops[:,:6]:
o0, i0, i1, i2, i3 = [self.c_locs[x] for x in (o0, i0, i1, i2, i3)]
if op == sim.BUF1: self.c[o0]=self.c[i0]
elif op == sim.INV1: self.c[o0] = ~self.c[i0]
elif op == sim.AND2: self.c[o0] = self.c[i0] & self.c[i1]
@ -113,10 +105,10 @@ class LogicSim(sim.SimOps): @@ -113,10 +105,10 @@ class LogicSim(sim.SimOps):
elif op == sim.OAI211:self.c[o0] = ~((self.c[i0] | self.c[i1]) & self.c[i2] & self.c[i3])
elif op == sim.MUX21: self.c[o0] = (self.c[i0] & ~self.c[i2]) | (self.c[i1] & self.c[i2])
else: print(f'unknown op {op}')
inject_cb(o0l, self.c[o0])
inject_cb(o0, self.s[o0])
elif self.m == 4:
for op, o0l, i0l, i1l, i2l, i3l in self.ops[:,:6]:
o0, i0, i1, i2, i3 = [self.c_locs[x] for x in (o0l, i0l, i1l, i2l, i3l)]
for op, o0, i0, i1, i2, i3 in self.ops[:,:6]:
o0, i0, i1, i2, i3 = [self.c_locs[x] for x in (o0, i0, i1, i2, i3)]
if op == sim.BUF1: self.c[o0]=self.c[i0]
elif op == sim.INV1: logic.bp4v_not(self.c[o0], self.c[i0])
elif op == sim.AND2: logic.bp4v_and(self.c[o0], self.c[i0], self.c[i1])
@ -189,10 +181,9 @@ class LogicSim(sim.SimOps): @@ -189,10 +181,9 @@ class LogicSim(sim.SimOps):
logic.bp4v_and(self.c[t1], self.c[i1], self.c[i2])
logic.bp4v_or(self.c[o0], self.c[t0], self.c[t1])
else: print(f'unknown op {op}')
if inject_cb is not None: inject_cb(o0l, self.c[o0])
else:
for op, o0l, i0l, i1l, i2l, i3l in self.ops[:,:6]:
o0, i0, i1, i2, i3 = [self.c_locs[x] for x in (o0l, i0l, i1l, i2l, i3l)]
for op, o0, i0, i1, i2, i3 in self.ops[:,:6]:
o0, i0, i1, i2, i3 = [self.c_locs[x] for x in (o0, i0, i1, i2, i3)]
if op == sim.BUF1: self.c[o0]=self.c[i0]
elif op == sim.INV1: logic.bp8v_not(self.c[o0], self.c[i0])
elif op == sim.AND2: logic.bp8v_and(self.c[o0], self.c[i0], self.c[i1])
@ -265,7 +256,7 @@ class LogicSim(sim.SimOps): @@ -265,7 +256,7 @@ class LogicSim(sim.SimOps):
logic.bp8v_and(self.c[t1], self.c[i1], self.c[i2])
logic.bp8v_or(self.c[o0], self.c[t0], self.c[t1])
else: print(f'unknown op {op}')
if inject_cb is not None: inject_cb(o0l, self.c[o0])
if inject_cb is not None: inject_cb(o0, self.s[o0])
def c_to_s(self):
"""Copies (captures) the results of the combinational portion to ``s[1]``.
@ -305,9 +296,9 @@ class LogicSim(sim.SimOps): @@ -305,9 +296,9 @@ class LogicSim(sim.SimOps):
@numba.njit
def _prop_cpu(ops, c_locs, c, flip_line, flip_mask):
for op, o0l, i0l, i1l, i2l, i3l in ops[:,:6]:
o0, i0, i1, i2, i3 = [c_locs[x] for x in (o0l, i0l, i1l, i2l, i3l)]
def _prop_cpu(ops, c_locs, c):
for op, o0, i0, i1, i2, i3 in ops[:,:6]:
o0, i0, i1, i2, i3 = [c_locs[x] for x in (o0, i0, i1, i2, i3)]
if op == sim.BUF1: c[o0]=c[i0]
elif op == sim.INV1: c[o0] = ~c[i0]
elif op == sim.AND2: c[o0] = c[i0] & c[i1]
@ -342,124 +333,3 @@ def _prop_cpu(ops, c_locs, c, flip_line, flip_mask): @@ -342,124 +333,3 @@ def _prop_cpu(ops, c_locs, c, flip_line, flip_mask):
elif op == sim.OAI211: c[o0] = ~((c[i0] | c[i1]) & c[i2] & c[i3])
elif op == sim.MUX21: c[o0] = (c[i0] & ~c[i2]) | (c[i1] & c[i2])
else: print(f'unknown op {op}')
if flip_line >= 0 and o0l == flip_line:
#n = len(flip_mask)
c[o0] = c[o0] ^ flip_mask
class LogicSim6V(sim.SimOps):
"""A bit-parallel naïve combinational simulator for 6-valued logic.
:param circuit: The circuit to simulate.
:param sims: The number of parallel logic simulations to perform.
:param c_reuse: If True, intermediate signal values may get overwritten when not needed anymore to save memory.
:param strip_forks: If True, forks are not included in the simulation model to save memory and simulation time.
"""
def __init__(self, circuit: Circuit, sims: int = 8, c_reuse: bool = False, strip_forks: bool = False):
super().__init__(circuit, c_reuse=c_reuse, strip_forks=strip_forks)
self.sims = sims
nbytes = cdiv(sims, 8)
self.c = np.zeros((self.c_len, 3, nbytes), dtype=np.uint8)
self.s = np.zeros((2, self.s_len, self.sims), dtype=np.uint8)
"""Logic values of the sequential elements (flip-flops) and ports.
It is a pair of arrays in mv storage format:
* ``s[0]`` Assigned values. Simulator will read (P)PI value from here.
* ``s[1]`` Result values. Simulator will write (P)PO values here.
Access this array to assign new values to the (P)PIs or read values from the (P)POs.
"""
def __repr__(self):
return f'{{name: "{self.circuit.name}", sims: {self.sims}, c_bytes: {eng(self.c.nbytes)}}}'
def s_to_c(self):
"""Assigns the values from ``s[0]`` to the inputs of the combinational portion.
"""
self.c[self.pippi_c_locs] = logic.mv_to_bp(self.s[0, self.pippi_s_locs])
def c_prop(self):
c_prop_cpu(self.ops, self.c, self.c_locs, self.tmp_idx, self.tmp2_idx)
def c_to_s(self):
"""Captures the results of the combinational portion into ``s[1]``.
"""
self.s[1, self.poppo_s_locs] = logic.bp_to_mv(self.c[self.poppo_c_locs])[:,:self.sims]
@numba.njit
def c_prop_cpu(ops, c, c_locs, tmp_idx, tmp2_idx):
t0 = c[c_locs[tmp_idx]]
t1 = c[c_locs[tmp2_idx]]
inv_op = np.array([255, 255, 0], dtype=np.uint8)[np.newaxis, :, np.newaxis]
for op, o0l, i0l, i1l, i2l, i3l in ops[:,:6]:
o0, i0, i1, i2, i3 = [c[c_locs[x]] for x in (o0l, i0l, i1l, i2l, i3l)]
if op == sim.BUF1 or op == sim.INV1:
o0[...] = i0
elif op == sim.AND2 or op == sim.NAND2:
o0[0] = i0[0] & i1[0]
o0[1] = i0[1] & i1[1]
o0[2] = (i0[2]&(i1[0]|i1[1]|i1[2])|
i1[2]&(i0[0]|i0[1]|i0[2]))
elif op == sim.AND3 or op == sim.NAND3:
o0[0] = i0[0] & i1[0] & i2[0]
o0[1] = i0[1] & i1[1] & i2[1]
o0[2] = (i0[2]&(i1[0]|i1[1]|i1[2])&(i2[0]|i2[1]|i2[2])|
i1[2]&(i0[0]|i0[1]|i0[2])&(i2[0]|i2[1]|i2[2])|
i2[2]&(i0[0]|i0[1]|i0[2])&(i1[0]|i1[1]|i1[2]))
elif op == sim.AND4 or op == sim.NAND4:
o0[0] = i0[0] & i1[0] & i2[0] & i3[0]
o0[1] = i0[1] & i1[1] & i2[1] & i3[1]
o0[2] = (i0[2]&(i1[0]|i1[1]|i1[2])&(i2[0]|i2[1]|i2[2])&(i3[0]|i3[1]|i3[2])|
i1[2]&(i0[0]|i0[1]|i0[2])&(i2[0]|i2[1]|i2[2])&(i3[0]|i3[1]|i3[2])|
i2[2]&(i0[0]|i0[1]|i0[2])&(i1[0]|i1[1]|i1[2])&(i3[0]|i3[1]|i3[2])|
i3[2]&(i0[0]|i0[1]|i0[2])&(i1[0]|i1[1]|i1[2])&(i2[0]|i2[1]|i2[2]))
elif op == sim.OR2 or op == sim.NOR2:
o0[0] = i0[0] | i1[0]
o0[1] = i0[1] | i1[1]
o0[2] = (i0[2]&(~i1[0]|~i1[1]|i1[2])|
i1[2]&(~i0[0]|~i0[1]|i0[2]))
elif op == sim.OR3 or op == sim.NOR3:
o0[0] = i0[0] | i1[0] | i2[0]
o0[1] = i0[1] | i1[1] | i2[1]
o0[2] = (i0[2]&(~i1[0]|~i1[1]|i1[2])&(~i2[0]|~i2[1]|i2[2])|
i1[2]&(~i0[0]|~i0[1]|i0[2])&(~i2[0]|~i2[1]|i2[2])|
i2[2]&(~i0[0]|~i0[1]|i0[2])&(~i1[0]|~i1[1]|i1[2]))
elif op == sim.OR4 or op == sim.NOR4:
o0[0] = i0[0] | i1[0] | i2[0] | i3[0]
o0[1] = i0[1] | i1[1] | i2[1] | i3[1]
o0[2] = (i0[2]&(~i1[0]|~i1[1]|i1[2])&(~i2[0]|~i2[1]|i2[2])&(~i3[0]|~i3[1]|i3[2])|
i1[2]&(~i0[0]|~i0[1]|i0[2])&(~i2[0]|~i2[1]|i2[2])&(~i3[0]|~i3[1]|i3[2])|
i2[2]&(~i0[0]|~i0[1]|i0[2])&(~i1[0]|~i1[1]|i1[2])&(~i3[0]|~i3[1]|i3[2])|
i3[2]&(~i0[0]|~i0[1]|i0[2])&(~i1[0]|~i1[1]|i1[2])&(~i2[0]|~i2[1]|i2[2]))
elif op == sim.XOR2 or op == sim.XNOR2:
o0[0] = i0[0] ^ i1[0]
o0[1] = i0[1] ^ i1[1]
o0[2] = i0[2] | i1[2]
elif op == sim.MUX21:
# t1 = ~i2
t1[...] = i2 ^ inv_op
# t0 = i0 & t1
t0[0] = i0[0] & t1[0]
t0[1] = i0[1] & t1[1]
t0[2] = (i0[2]&(t1[0]|t1[1]|t1[2])|
t1[2]&(i0[0]|i0[1]|i0[2]))
# t1 = i1 & i2
t1[0] = i1[0] & i2[0]
t1[1] = i1[1] & i2[1]
t1[2] = (i1[2]&(i2[0]|i2[1]|i2[2])|
i2[2]&(i1[0]|i1[1]|i1[2]))
# o0 = t0 | t1
o0[0] = t0[0] | t1[0]
o0[1] = t0[1] | t1[1]
o0[2] = (t0[2]&(~t1[0]|~t1[1]|t1[2])|
t1[2]&(~t0[0]|~t0[1]|t0[2]))
else: print(f'unknown op {op}')
if (op == sim.INV1 or
op == sim.NAND2 or op == sim.NAND3 or op == sim.NAND4 or
op == sim.NOR2 or op == sim.NOR3 or op == sim.NOR4 or
op == sim.XNOR2):
o0[...] = o0 ^ inv_op

37
src/kyupy/sdf.py

@ -61,7 +61,6 @@ class DelayFile: @@ -61,7 +61,6 @@ class DelayFile:
delays = np.zeros((len(circuit.lines), 2, 2, 3)) # dataset last during construction.
with log.limit(50):
for name, iopaths in self.cells.items():
name = name.replace('\\', '')
if cell := circuit.cells.get(name, None):
@ -103,12 +102,11 @@ class DelayFile: @@ -103,12 +102,11 @@ class DelayFile:
delays = np.zeros((len(circuit.lines), 2, 2, 3)) # dataset last during construction.
nonfork_annotations = 0
for n1, n2, *delvals in self._interconnects:
delvals = [d if len(d) > 0 else [0, 0, 0] for d in delvals]
if max(max(delvals)) == 0: continue
cn1, pn1 = (n1, None) if (slash := n1.rfind('/')) < 0 else (n1[:slash], n1[slash+1:])
cn2, pn2 = (n2, None) if (slash := n2.rfind('/')) < 0 else (n2[:slash], n2[slash+1:])
cn1, pn1 = n1.split('/') if '/' in n1 else (n1, None)
cn2, pn2 = n2.split('/') if '/' in n2 else (n2, None)
cn1 = cn1.replace('\\','')
cn2 = cn2.replace('\\','')
c1, c2 = circuit.cells[cn1], circuit.cells[cn2]
@ -121,27 +119,19 @@ class DelayFile: @@ -121,27 +119,19 @@ class DelayFile:
log.warn(f'No line to annotate pin {pn2} of {c2}')
continue
f1, f2 = c1.outs[p1].reader, c2.ins[p2].driver # find the forks between cells.
if f1 == c2 and f2 == c1:
nonfork_annotations += 1
if nonfork_annotations < 10:
log.warn(f'No fork between {c1.name}/{p1} and {c2.name}/{p2}, using {c2.name}/{p2}')
line = c2.ins[p2]
else:
assert f1.kind == '__fork__'
assert f2.kind == '__fork__'
if len(f2.outs) == 1:
assert f1 == f2 or f1.outs[f2.ins[0].driver_pin] == f2.ins[0]
if f1 != f2: # at least two forks, make sure f2 is a branchfork connected to f1
assert len(f2.outs) == 1
assert f1.outs[f2.ins[0].driver_pin] == f2.ins[0]
line = f2.ins[0]
elif len(f2.outs) == 1: # f1==f2, only OK when there is no fanout.
line = f2.ins[0]
else:
nonfork_annotations += 1
if nonfork_annotations < 10:
log.warn(f'No branchfork between {c1.name}/{p1} and {c2.name}/{p2}, using {c2.name}/{p2}')
line = c2.ins[p2]
log.warn(f'No branchfork to annotate interconnect delay {c1.name}/{p1}->{c2.name}/{p2}')
continue
delays[line, :] = delvals
if nonfork_annotations > 0:
log.warn(f'{nonfork_annotations} interconnect annotations were moved to gate inputs due to missing forks.')
return np.moveaxis(delays, -1, 0)
@ -166,10 +156,6 @@ class SdfTransformer(Transformer): @@ -166,10 +156,6 @@ class SdfTransformer(Transformer):
entries = [e for a in args if hasattr(a, 'children') for e in a.children]
return name, entries
@staticmethod
def cond(args): # ignore conditions
return args[1]
@staticmethod
def start(args):
name = next((a for a in args if isinstance(a, str)), None)
@ -194,12 +180,9 @@ GRAMMAR = r""" @@ -194,12 +180,9 @@ GRAMMAR = r"""
| "(INSTANCE" ID? ")"
| "(TIMINGCHECK" _ignore* ")"
| delay )* ")"
delay: "(DELAY" "(ABSOLUTE" (interconnect | iopath | cond)* ")" ")"
delay: "(DELAY" "(ABSOLUTE" (interconnect | iopath)* ")" ")"
interconnect: "(INTERCONNECT" ID ID triple* ")"
iopath: "(IOPATH" ID_OR_EDGE ID_OR_EDGE triple* ")"
cond: "(" "COND" cond_port_expr iopath ")"
?cond_port_expr: ID | "(" cond_port_expr ")" | cond_port_expr BINARY_OP cond_port_expr
BINARY_OP: /&&/ | /==/
NAME: /[^"]+/
ID_OR_EDGE: ( /[^() ]+/ | "(" /[^)]+/ ")" )
ID: ( /[^"() ]+/ | "\"" /[^"]+/ "\"" )

132
src/kyupy/sim.py

@ -4,14 +4,9 @@ from bisect import bisect, insort_left @@ -4,14 +4,9 @@ from bisect import bisect, insort_left
import numpy as np
from .circuit import Circuit
BUF1 = np.uint16(0b1010_1010_1010_1010)
INV1 = ~BUF1
__const0__ = BUF1
__const1__ = INV1
AND2 = np.uint16(0b1000_1000_1000_1000)
AND3 = np.uint16(0b1000_0000_1000_0000)
AND4 = np.uint16(0b1000_0000_0000_0000)
@ -44,10 +39,7 @@ AOI211, OAI211 = ~AO211, ~OA211 @@ -44,10 +39,7 @@ AOI211, OAI211 = ~AO211, ~OA211
MUX21 = np.uint16(0b1100_1010_1100_1010) # z = i1 if i2 else i0 (i2 is select)
names = dict([(v, k) for k, v in globals().items() if isinstance(v, np.uint16) and '__' not in k])
prim2name = dict([(v, k) for k, v in globals().items() if isinstance(v, np.uint16) and '__' not in k])
name2prim = dict([(k, v) for k, v in globals().items() if isinstance(v, np.uint16)])
names = dict([(v, k) for k, v in globals().items() if isinstance(v, np.uint16)])
kind_prefixes = {
'nand': (NAND4, NAND3, NAND2),
@ -164,7 +156,7 @@ class SimOps: @@ -164,7 +156,7 @@ class SimOps:
:param c_reuse: If enabled, memory of intermediate signal waveforms will be re-used. This greatly reduces
memory footprint, but intermediate signal waveforms become unaccessible after a propagation.
"""
def __init__(self, circuit: Circuit, c_caps=1, c_caps_min=1, a_ctrl=None, c_reuse=False, strip_forks=False):
def __init__(self, circuit, c_caps=1, c_caps_min=1, a_ctrl=None, c_reuse=False, strip_forks=False):
self.circuit = circuit
self.s_len = len(circuit.s_nodes)
@ -183,74 +175,84 @@ class SimOps: @@ -183,74 +175,84 @@ class SimOps:
self.ppo_offset = self.ppi_offset + self.s_len
self.c_locs_len = self.ppo_offset + self.s_len
# ALAP-toposort the circuit into self.ops
levels = []
ppio2idx = dict((n, i) for i, n in enumerate(circuit.s_nodes))
ppos = set([n for n in circuit.s_nodes if len(n.ins) > 0])
readers = np.array([1 if l.reader in ppos else len(l.reader.outs) for l in circuit.lines], dtype=np.int32) # for ref-counting forks
level_lines = [n.ins[0] for n in ppos] # start from PPOs
# FIXME: Should probably instanciate buffers for PPOs and attach DFF clocks
while len(level_lines) > 0: # traverse the circuit level-wise back towards (P)PIs
level_ops = []
prev_level_lines = []
for l in level_lines:
n = l.driver
in_idxs = [n.ins[x].index if len(n.ins) > x and n.ins[x] is not None else self.zero_idx for x in [0,1,2,3]]
if n in ppio2idx:
in_idxs[0] = self.ppi_offset + ppio2idx[n]
if l.driver_pin == 1 and 'dff' in n.kind.lower(): # second output of DFF is inverted
level_ops.append((INV1, l.index, *in_idxs, *a_ctrl[l]))
else:
level_ops.append((BUF1, l.index, *in_idxs, *a_ctrl[l]))
elif n.kind == '__fork__':
readers[n.ins[0]] -= 1
if readers[n.ins[0]] == 0: prev_level_lines.append(n.ins[0])
if not strip_forks: level_ops.append((BUF1, l.index, *in_idxs, *a_ctrl[l]))
else:
prev_level_lines += n.ins
sp = None
# translate circuit structure into self.ops
ops = []
interface_dict = dict((n, i) for i, n in enumerate(circuit.s_nodes))
for n in circuit.topological_order():
if n in interface_dict:
inp_idx = self.ppi_offset + interface_dict[n]
if len(n.outs) > 0 and n.outs[0] is not None: # first output of a PI/PPI
ops.append((BUF1, n.outs[0].index, inp_idx, self.zero_idx, self.zero_idx, self.zero_idx, *a_ctrl[n.outs[0]]))
if 'dff' in n.kind.lower(): # second output of DFF is inverted
if len(n.outs) > 1 and n.outs[1] is not None:
ops.append((INV1, n.outs[1].index, inp_idx, self.zero_idx, self.zero_idx, self.zero_idx, *a_ctrl[n.outs[1]]))
else: # if not DFF, no output is inverted.
for o_line in n.outs[1:]:
if o_line is not None:
ops.append((BUF1, o_line.index, inp_idx, self.zero_idx, self.zero_idx, self.zero_idx, *a_ctrl[o_line]))
continue
# regular node, not PI/PPI or PO/PPO
o0_idx = n.outs[0].index if len(n.outs) > 0 and n.outs[0] is not None else self.tmp_idx
i0_idx = n.ins[0].index if len(n.ins) > 0 and n.ins[0] is not None else self.zero_idx
i1_idx = n.ins[1].index if len(n.ins) > 1 and n.ins[1] is not None else self.zero_idx
i2_idx = n.ins[2].index if len(n.ins) > 2 and n.ins[2] is not None else self.zero_idx
i3_idx = n.ins[3].index if len(n.ins) > 3 and n.ins[3] is not None else self.zero_idx
kind = n.kind.lower()
if kind == '__fork__':
if not strip_forks:
for o_line in n.outs:
if o_line is not None:
ops.append((BUF1, o_line.index, i0_idx, i1_idx, i2_idx, i3_idx, *a_ctrl[o_line]))
continue
sp = None
for prefix, prims in kind_prefixes.items():
if kind.startswith(prefix):
sp = prims[0]
if in_idxs[3] == self.zero_idx:
if i3_idx == self.zero_idx:
sp = prims[1]
if in_idxs[2] == self.zero_idx:
if i2_idx == self.zero_idx:
sp = prims[2]
break
if sp is None:
print('unknown cell type', kind)
else:
level_ops.append((sp, l.index, *in_idxs, *a_ctrl[l]))
if len(level_ops) > 0: levels.append(level_ops)
level_lines = prev_level_lines
ops.append((sp, o0_idx, i0_idx, i1_idx, i2_idx, i3_idx, *a_ctrl[o0_idx]))
self.levels = [np.asarray(lv, dtype=np.int32) for lv in levels[::-1]]
level_sums = np.cumsum([0]+[len(lv) for lv in self.levels], dtype=np.int32)
self.level_starts, self.level_stops = level_sums[:-1], level_sums[1:]
self.ops = np.vstack(self.levels)
self.ops = np.asarray(ops, dtype='int32')
# create a map from fanout lines to stem lines for fork stripping
stems = np.full(self.c_locs_len, -1, dtype=np.int32) # default to -1: 'no fanout line'
stems = np.zeros(self.c_locs_len, dtype='int32') - 1 # default to -1: 'no fanout line'
if strip_forks:
for f in circuit.forks.values():
prev_line = f.ins[0]
while prev_line.driver.kind == '__fork__':
prev_line = prev_line.driver.ins[0]
stem_idx = prev_line.index
for ol in f.outs:
if ol is not None:
stems[ol] = prev_line.index
ref_count = np.zeros(self.c_locs_len, dtype=np.int32)
for op in self.ops:
for x in [2, 3, 4, 5]:
ref_count[stems[op[x]] if stems[op[x]] >= 0 else op[x]] += 1
stems[ol] = stem_idx
# calculate level (distance from PI/PPI) and reference count for each line
levels = np.zeros(self.c_locs_len, dtype='int32')
ref_count = np.zeros(self.c_locs_len, dtype='int32')
level_starts = [0]
current_level = 1
for i, op in enumerate(self.ops):
# if we fork-strip, always take the stems for determining fan-in level
i0_idx = stems[op[2]] if stems[op[2]] >= 0 else op[2]
i1_idx = stems[op[3]] if stems[op[3]] >= 0 else op[3]
i2_idx = stems[op[4]] if stems[op[4]] >= 0 else op[4]
i3_idx = stems[op[5]] if stems[op[5]] >= 0 else op[5]
if levels[i0_idx] >= current_level or levels[i1_idx] >= current_level or levels[i2_idx] >= current_level or levels[i3_idx] >= current_level:
current_level += 1
level_starts.append(i)
levels[op[1]] = current_level # set level of the output line
ref_count[i0_idx] += 1
ref_count[i1_idx] += 1
ref_count[i2_idx] += 1
ref_count[i3_idx] += 1
self.level_starts = np.asarray(level_starts, dtype='int32')
self.level_stops = np.asarray(level_starts[1:] + [len(self.ops)], dtype='int32')
# combinational signal allocation table. maps line and interface indices to self.c memory locations
self.c_locs = np.full((self.c_locs_len,), -1, dtype=np.int32)
@ -276,9 +278,9 @@ class SimOps: @@ -276,9 +278,9 @@ class SimOps:
ref_count[i0_idx] += 1
# allocate memory for the rest of the circuit
for ops in self.levels:
for op_start, op_stop in zip(self.level_starts, self.level_stops):
free_set = set()
for op in ops:
for op in self.ops[op_start:op_stop]:
# if we fork-strip, always take the stems
i0_idx = stems[op[2]] if stems[op[2]] >= 0 else op[2]
i1_idx = stems[op[3]] if stems[op[3]] >= 0 else op[3]
@ -297,7 +299,6 @@ class SimOps: @@ -297,7 +299,6 @@ class SimOps:
self.c_locs[o_idx], self.c_caps[o_idx] = h.alloc(cap), cap
if c_reuse:
for loc in free_set:
if loc >= 0: # DFF clocks are not allocated. Ignore for now.
h.free(loc)
# copy memory location and capacity from stems to fanout lines
@ -310,15 +311,6 @@ class SimOps: @@ -310,15 +311,6 @@ class SimOps:
if len(n.ins) > 0:
self.c_locs[self.ppo_offset + i], self.c_caps[self.ppo_offset + i] = self.c_locs[n.ins[0]], self.c_caps[n.ins[0]]
# line use information
self.line_use_start = np.full(self.c_locs_len, -1, dtype=np.int32)
self.line_use_stop = np.full(self.c_locs_len, len(self.levels), dtype=np.int32)
for i, lv in enumerate(self.levels):
for op in lv:
self.line_use_start[op[1]] = i
for x in [2, 3, 4, 5]:
self.line_use_stop[op[x]] = i
self.c_len = h.max_size
d = defaultdict(int)

32
src/kyupy/stil.py

@ -41,7 +41,7 @@ class StilFile: @@ -41,7 +41,7 @@ class StilFile:
unload = {}
for so_port in self.so_ports:
if so_port in call.parameters:
unload[so_port] = call.parameters[so_port]
unload[so_port] = call.parameters[so_port].replace('\n', '').replace('N', '-')
if len(capture) > 0:
self.patterns.append(ScanPattern(sload, launch, capture, unload))
capture = {}
@ -49,9 +49,11 @@ class StilFile: @@ -49,9 +49,11 @@ class StilFile:
sload = {}
for si_port in self.si_ports:
if si_port in call.parameters:
sload[si_port] = call.parameters[si_port]
if call.name.endswith('_launch'): launch = call.parameters
if call.name.endswith('_capture'): capture = call.parameters
sload[si_port] = call.parameters[si_port].replace('\n', '').replace('N', '-')
if call.name.endswith('_launch'):
launch = dict((k, v.replace('\n', '').replace('N', '-')) for k, v in call.parameters.items())
if call.name.endswith('_capture'):
capture = dict((k, v.replace('\n', '').replace('N', '-')) for k, v in call.parameters.items())
def _maps(self, c):
interface = list(c.io_nodes) + [n for n in c.nodes if 'DFF' in n.kind]
@ -98,12 +100,12 @@ class StilFile: @@ -98,12 +100,12 @@ class StilFile:
tests = np.full((len(interface), len(self.patterns)), logic.UNASSIGNED)
for i, p in enumerate(self.patterns):
for si_port in self.si_ports.keys():
pattern = logic.mvarray(p.load[si_port][0])
pattern = logic.mvarray(p.load[si_port])
inversions = np.choose((pattern == logic.UNASSIGNED) | (pattern == logic.UNKNOWN),
[scan_inversions[si_port], logic.ZERO]).astype(np.uint8)
np.bitwise_xor(pattern, inversions, out=pattern)
tests[scan_maps[si_port], i] = pattern
tests[pi_map, i] = logic.mvarray(p.capture['_pi'][0])
tests[pi_map, i] = logic.mvarray(p.capture['_pi'])
return tests
def tests_loc(self, circuit, init_filter=None, launch_filter=None):
@ -132,12 +134,12 @@ class StilFile: @@ -132,12 +134,12 @@ class StilFile:
for i, p in enumerate(self.patterns):
# init.set_values(i, '0' * len(interface))
for si_port in self.si_ports.keys():
pattern = logic.mvarray(p.load[si_port][0])
pattern = logic.mvarray(p.load[si_port])
inversions = np.choose((pattern == logic.UNASSIGNED) | (pattern == logic.UNKNOWN),
[scan_inversions[si_port], logic.ZERO]).astype(np.uint8)
np.bitwise_xor(pattern, inversions, out=pattern)
init[scan_maps[si_port], i] = pattern
init[pi_map, i] = logic.mvarray(p.launch['_pi'][0] if '_pi' in p.launch else p.capture['_pi'][0])
init[pi_map, i] = logic.mvarray(p.launch['_pi'] if '_pi' in p.launch else p.capture['_pi'])
if init_filter: init = init_filter(init)
sim8v = LogicSim(circuit, init.shape[-1], m=8)
sim8v.s[0] = logic.mv_to_bp(init)
@ -147,12 +149,12 @@ class StilFile: @@ -147,12 +149,12 @@ class StilFile:
launch = logic.bp_to_mv(sim8v.s[1])[..., :init.shape[-1]]
for i, p in enumerate(self.patterns):
# if there was no launch cycle or launch clock, then init = launch
if '_pi' not in p.launch or 'P' not in p.launch['_pi'][0] or 'P' not in p.capture['_pi'][0]:
if '_pi' not in p.launch or 'P' not in p.launch['_pi'] or 'P' not in p.capture['_pi']:
for si_port in self.si_ports.keys():
pattern = logic.mv_xor(logic.mvarray(p.load[si_port][0]), scan_inversions[si_port])
pattern = logic.mv_xor(logic.mvarray(p.load[si_port]), scan_inversions[si_port])
launch[scan_maps[si_port], i] = pattern
if '_pi' in p.capture and 'P' in p.capture['_pi'][0]:
launch[pi_map, i] = logic.mvarray(p.capture['_pi'][0])
if '_pi' in p.capture and 'P' in p.capture['_pi']:
launch[pi_map, i] = logic.mvarray(p.capture['_pi'])
launch[po_map, i] = logic.UNASSIGNED
if launch_filter: launch = launch_filter(launch)
@ -169,9 +171,9 @@ class StilFile: @@ -169,9 +171,9 @@ class StilFile:
interface, _, po_map, scan_maps, scan_inversions = self._maps(circuit)
resp = np.full((len(interface), len(self.patterns)), logic.UNASSIGNED)
for i, p in enumerate(self.patterns):
resp[po_map, i] = logic.mvarray(p.capture['_po'][0] if len(p.capture) > 0 else p.launch['_po'][0])
resp[po_map, i] = logic.mvarray(p.capture['_po'] if len(p.capture) > 0 else p.launch['_po'])
for so_port in self.so_ports.keys():
pattern = logic.mv_xor(logic.mvarray(p.unload[so_port][0]), scan_inversions[so_port])
pattern = logic.mv_xor(logic.mvarray(p.unload[so_port]), scan_inversions[so_port])
resp[scan_maps[so_port], i] = pattern
return resp
@ -190,7 +192,7 @@ class StilTransformer(Transformer): @@ -190,7 +192,7 @@ class StilTransformer(Transformer):
def call(args): return Call(args[0], dict(args[1:]))
@staticmethod
def call_parameter(args): return args[0], (args[1].value.replace('\n', '').replace('N', '-'), args[1].start_pos)
def call_parameter(args): return args[0], args[1].value
@staticmethod
def signal_group(args): return args[0], args[1:]

168
src/kyupy/techlib.py

@ -11,6 +11,50 @@ from itertools import product @@ -11,6 +11,50 @@ from itertools import product
from . import bench
class TechLibOld:
@staticmethod
def pin_index(kind, pin):
if isinstance(pin, int):
return max(0, pin-1)
if kind[:3] in ('OAI', 'AOI'):
if pin[0] == 'A': return int(pin[1]) - 1
if pin == 'B': return int(kind[3])
if pin[0] == 'B': return int(pin[1]) - 1 + int(kind[3])
for prefix, pins, index in [('HADD', ('B0', 'SO'), 1),
('HADD', ('A0', 'C1'), 0),
('MUX21', ('S', 'S0'), 2),
('MX2', ('S0',), 2),
('TBUF', ('OE',), 1),
('TINV', ('OE',), 1),
('LATCH', ('D',), 0),
('LATCH', ('QN',), 1),
('DFF', ('D',), 0),
('DFF', ('QN',), 1),
('SDFF', ('D',), 0),
('SDFF', ('QN',), 1),
('SDFF', ('CLK',), 3),
('SDFF', ('RSTB', 'RN'), 4),
('SDFF', ('SETB',), 5),
('ISOL', ('ISO',), 0),
('ISOL', ('D',), 1)]:
if kind.startswith(prefix) and pin in pins: return index
for index, pins in enumerate([('A1', 'IN1', 'A', 'S', 'INP', 'I', 'Q', 'QN', 'Y', 'Z', 'ZN'),
('A2', 'IN2', 'B', 'CK', 'CLK', 'CO', 'SE'),
('A3', 'IN3', 'C', 'RN', 'RSTB', 'CI', 'SI'),
('A4', 'IN4', 'D', 'SN', 'SETB'),
('A5', 'IN5', 'E'),
('A6', 'IN6', 'F')]):
if pin in pins: return index
raise ValueError(f'Unknown pin index for {kind}.{pin}')
@staticmethod
def pin_is_output(kind, pin):
if isinstance(pin, int):
return pin == 0
if 'MUX' in kind and pin == 'S': return False
return pin in ('Q', 'QN', 'Z', 'ZN', 'Y', 'CO', 'S', 'SO', 'C1')
class TechLib:
"""Class for standard cell library definitions.
@ -49,14 +93,6 @@ class TechLib: @@ -49,14 +93,6 @@ class TechLib:
assert pin in self.cells[kind][1], f'Unknown pin: {pin} for cell {kind}'
return self.cells[kind][1][pin][0]
def pin_name(self, kind, pos, output=False):
"""Returns the pin name for a given node kind, list position, and direction."""
assert kind in self.cells, f'Unknown cell: {kind}'
for name, (ppos, isout) in self.cells[kind][1].items():
if isout == output and ppos == pos:
return name
return None
def pin_is_output(self, kind, pin):
"""Returns True, if given pin name of a node kind is an output."""
assert kind in self.cells, f'Unknown cell: {kind}'
@ -102,92 +138,21 @@ TLATX1 input(C,D) output(Q,QN) Q=LATCH(D,C) QN=INV1(Q) ; @@ -102,92 +138,21 @@ TLATX1 input(C,D) output(Q,QN) Q=LATCH(D,C) QN=INV1(Q) ;
"""
NANGATE = TechLib(r"""
FILLTIE ;
FILL_X{1,2,4,8,16} ;
ANTENNA input(I) ;
TIEH output(Z) Z=__const1__() ;
TIEL output(ZN) ZN=__const0__() ;
BUF_X{1,2,4,8,12,16} input(I) output(Z) Z=BUF1(I) ;
INV_X{1,2,4,8,12,16} input(I) output(ZN) ZN=INV1(I) ;
CLKBUF_X{1,2,4,8,12,16} input(I) output(Z) Z=BUF1(I) ;
CLKGATETST_X1 input(CLK,E,TE) output(Q) Q=OA21(CLK,E,TE) ;
AND2_X{1,2} input(A1,A2) output(Z) Z=AND2(A1,A2) ;
AND3_X{1,2} input(A1,A2,A3) output(Z) Z=AND3(A1,A2,A3) ;
AND4_X{1,2} input(A1,A2,A3,A4) output(Z) Z=AND4(A1,A2,A3,A4) ;
NAND2_X{1,2} input(A1,A2) output(ZN) ZN=NAND2(A1,A2) ;
NAND3_X{1,2} input(A1,A2,A3) output(ZN) ZN=NAND3(A1,A2,A3) ;
NAND4_X{1,2} input(A1,A2,A3,A4) output(ZN) ZN=NAND4(A1,A2,A3,A4) ;
OR2_X{1,2} input(A1,A2) output(Z) Z=OR2(A1,A2) ;
OR3_X{1,2} input(A1,A2,A3) output(Z) Z=OR3(A1,A2,A3) ;
OR4_X{1,2} input(A1,A2,A3,A4) output(Z) Z=OR4(A1,A2,A3,A4) ;
NOR2_X{1,2} input(A1,A2) output(ZN) ZN=NOR2(A1,A2) ;
NOR3_X{1,2} input(A1,A2,A3) output(ZN) ZN=NOR3(A1,A2,A3) ;
NOR4_X{1,2} input(A1,A2,A3,A4) output(ZN) ZN=NOR4(A1,A2,A3,A4) ;
XOR2_X1 input(A1,A2) output(Z) Z=XOR2(A1,A2) ;
XNOR2_X1 input(A1,A2) output(ZN) ZN=XNOR2(A1,A2) ;
MUX2_X1 input(I0,I1,S) output(Z) Z=MUX21(I0,I1,S) ;
HA_X1 input(A,B) output(CO,S) CO=XOR2(A,B) S=AND2(A,B) ;
FA_X1 input(A,B,CI) output(CO,S) AB=XOR2(A,B) CO=XOR2(AB,CI) S=AO22(CI,A,B) ;
AOI21_X{1,2} input(A1,A2,B) output(ZN) ZN=AOI21(A1,A2,B) ;
OAI21_X{1,2} input(A1,A2,B) output(ZN) ZN=OAI21(A1,A2,B) ;
AOI22_X{1,2} input(A1,A2,B1,B2) output(ZN) ZN=AOI22(A1,A2,B1,B2) ;
OAI22_X{1,2} input(A1,A2,B1,B2) output(ZN) ZN=OAI22(A1,A2,B1,B2) ;
DFFRNQ_X1 input(D,RN,CLK) output(Q) DR=AND2(D,RN) Q=DFF(DR,CLK) ;
DFFSNQ_X1 input(D,SN,CLK) output(Q) S=INV1(SN) DS=OR2(D,S) Q=DFF(DS,CLK) ;
SDFFRNQ_X1 input(D,RN,SE,SI,CLK) output(Q) DR=AND2(D,RN) DI=MUX21(DR,SI,SE) Q=DFF(DI,CLK) ;
SDFFSNQ_X1 input(D,SE,SI,SN,CLK) output(Q) S=INV1(SN) DS=OR2(D,S) DI=MUX21(DS,SI,SE) Q=DFF(DI,CLK) ;
TBUF_X{1,2,4,8,12,16} input(EN,I) output(Z) Z=BUF1(I) ;
LHQ_X1 input(D,E) output(Q) Q=LATCH(D,E) ;
""")
"""Nangate 15nm Open Cell Library (NanGate_15nm_OCL_v0.1_2014_06.A).
"""
NANGATE45 = TechLib(r"""
_nangate_common = r"""
FILLCELL_X{1,2,4,8,16,32} ;
ANTENNA_X1 input(A) ;
LOGIC0_X1 output(Z) Z=__const0__() ;
LOGIC1_X1 output(Z) Z=__const1__() ;
BUF_X{1,2,4,8,16,32} input(A) output(Z) Z=BUF1(A) ;
INV_X{1,2,4,8,16,32} input(A) output(ZN) ZN=INV1(A) ;
CLKBUF_X{1,2,3} input(A) output(Z) Z=BUF1(A) ;
CLKGATETST_X{1,2,4,8} input(CK,E,SE) output(GCK) GCK=OA21(CK,E,SE) ;
CLKGATE_X{1,2,4,8} input(CK,E) output(GCK) GCK=AND2(CK,E) ;
AND2_X{1,2,4} input(A1,A2) output(ZN) ZN=AND2(A1,A2) ;
AND3_X{1,2,4} input(A1,A2,A3) output(ZN) ZN=AND3(A1,A2,A3) ;
AND4_X{1,2,4} input(A1,A2,A3,A4) output(ZN) ZN=AND4(A1,A2,A3,A4) ;
NAND2_X{1,2,4} input(A1,A2) output(ZN) ZN=NAND2(A1,A2) ;
NAND3_X{1,2,4} input(A1,A2,A3) output(ZN) ZN=NAND3(A1,A2,A3) ;
NAND4_X{1,2,4} input(A1,A2,A3,A4) output(ZN) ZN=NAND4(A1,A2,A3,A4) ;
OR2_X{1,2,4} input(A1,A2) output(ZN) ZN=OR2(A1,A2) ;
OR3_X{1,2,4} input(A1,A2,A3) output(ZN) ZN=OR3(A1,A2,A3) ;
OR4_X{1,2,4} input(A1,A2,A3,A4) output(ZN) ZN=OR4(A1,A2,A3,A4) ;
NOR2_X{1,2,4} input(A1,A2) output(ZN) ZN=NOR2(A1,A2) ;
NOR3_X{1,2,4} input(A1,A2,A3) output(ZN) ZN=NOR3(A1,A2,A3) ;
NOR4_X{1,2,4} input(A1,A2,A3,A4) output(ZN) ZN=NOR4(A1,A2,A3,A4) ;
XOR2_X{1,2} input(A,B) output(Z) Z=XOR2(A,B) ;
XNOR2_X{1,2} input(A,B) output(ZN) ZN=XNOR2(A,B) ;
MUX2_X{1,2} input(A,B,S) output(Z) Z=MUX21(A,B,S) ;
HA_X1 input(A,B) output(CO,S) CO=XOR2(A,B) S=AND2(A,B) ;
FA_X1 input(A,B,CI) output(CO,S) AB=XOR2(A,B) CO=XOR2(AB,CI) S=AO22(CI,A,B) ;
AOI21_X{1,2,4} input(A,B1,B2) output(ZN) ZN=AOI21(B1,B2,A) ;
OAI21_X{1,2,4} input(A,B1,B2) output(ZN) ZN=OAI21(B1,B2,A) ;
@ -197,6 +162,8 @@ OAI22_X{1,2,4} input(A1,A2,B1,B2) output(ZN) ZN=OAI22(A1,A2,B1,B2) ; @@ -197,6 +162,8 @@ OAI22_X{1,2,4} input(A1,A2,B1,B2) output(ZN) ZN=OAI22(A1,A2,B1,B2) ;
OAI211_X{1,2,4} input(A,B,C1,C2) output(ZN) ZN=OAI211(C1,C2,A,B) ;
AOI211_X{1,2,4} input(A,B,C1,C2) output(ZN) ZN=AOI211(C1,C2,A,B) ;
MUX2_X{1,2} input(A,B,S) output(Z) Z=MUX21(A,B,S) ;
AOI221_X{1,2,4} input(A,B1,B2,C1,C2) output(ZN) BC=AO22(B1,B2,C1,C2) ZN=NOR2(BC,A) ;
OAI221_X{1,2,4} input(A,B1,B2,C1,C2) output(ZN) BC=OA22(B1,B2,C1,C2) ZN=NAND2(BC,A) ;
@ -205,6 +172,14 @@ OAI222_X{1,2,4} input(A1,A2,B1,B2,C1,C2) output(ZN) BC=OA22(B1,B2,C1,C2) ZN=OAI2 @@ -205,6 +172,14 @@ OAI222_X{1,2,4} input(A1,A2,B1,B2,C1,C2) output(ZN) BC=OA22(B1,B2,C1,C2) ZN=OAI2
OAI33_X1 input(A1,A2,A3,B1,B2,B3) output(ZN) AA=OR2(A1,A2) BB=OR2(B1,B2) ZN=OAI22(AA,A3,BB,B3) ;
HA_X1 input(A,B) output(CO,S) CO=XOR2(A,B) S=AND2(A,B) ;
FA_X1 input(A,B,CI) output(CO,S) AB=XOR2(A,B) CO=XOR2(AB,CI) S=AO22(CI,A,B) ;
CLKGATE_X{1,2,4,8} input(CK,E) output(GCK) GCK=AND2(CK,E) ;
CLKGATETST_X{1,2,4,8} input(CK,E,SE) output(GCK) GCK=OA21(CK,E,SE) ;
DFF_X{1,2} input(D,CK) output(Q,QN) Q=DFF(D,CK) QN=INV1(Q) ;
DFFR_X{1,2} input(D,RN,CK) output(Q,QN) DR=AND2(D,RN) Q=DFF(DR,CK) QN=INV1(Q) ;
DFFS_X{1,2} input(D,SN,CK) output(Q,QN) S=INV1(SN) DS=OR2(D,S) Q=DFF(DS,CK) QN=INV1(Q) ;
@ -216,16 +191,43 @@ SDFFS_X{1,2} input(D,SE,SI,SN,CK) output(Q,QN) S=INV1(SN) DS=OR2(D,S) DI=MU @@ -216,16 +191,43 @@ SDFFS_X{1,2} input(D,SE,SI,SN,CK) output(Q,QN) S=INV1(SN) DS=OR2(D,S) DI=MU
SDFFRS_X{1,2} input(D,RN,SE,SI,SN,CK) output(Q,QN) S=INV1(SN) DS=OR2(D,S) DRS=AND2(DS,RN) DI=MUX21(DRS,SI,SE) Q=DFF(DI,CK) QN=INV1(Q) ;
TBUF_X{1,2,4,8,16} input(A,EN) output(Z) Z=BUF1(A) ;
TINV_X1 input(I,EN) output(ZN) ZN=INV1(I) ;
TLAT_X1 input(D,G,OE) output(Q) Q=LATCH(D,G) ;
DLH_X{1,2} input(D,G) output(Q) Q=LATCH(D,G) ;
DLL_X{1,2} input(D,GN) output(Q) G=INV1(GN) Q=LATCH(D,G) ;
"""
NANGATE = TechLib(_nangate_common + r"""
INV_X{1,2,4,8,16,32} input(I) output(ZN) ZN=INV1(I) ;
AND2_X{1,2,4} input(A1,A2) output(Z) Z=AND2(A1,A2) ;
AND3_X{1,2,4} input(A1,A2,A3) output(Z) Z=AND3(A1,A2,A3) ;
AND4_X{1,2,4} input(A1,A2,A3,A4) output(Z) Z=AND4(A1,A2,A3,A4) ;
OR2_X{1,2,4} input(A1,A2) output(Z) Z=OR2(A1,A2) ;
OR3_X{1,2,4} input(A1,A2,A3) output(Z) Z=OR3(A1,A2,A3) ;
OR4_X{1,2,4} input(A1,A2,A3,A4) output(Z) Z=OR4(A1,A2,A3,A4) ;
XOR2_X{1,2} input(A1,A2) output(Z) Z=XOR2(A1,A2) ;
XNOR2_X{1,2} input(A1,A2) output(ZN) ZN=XNOR2(A1,A2) ;
""")
"""An newer NANGATE-variant that uses 'Z' as output pin names for AND and OR gates.
"""
NANGATE_ZN = TechLib(_nangate_common + r"""
INV_X{1,2,4,8,16,32} input(A) output(ZN) ZN=INV1(A) ;
AND2_X{1,2,4} input(A1,A2) output(ZN) ZN=AND2(A1,A2) ;
AND3_X{1,2,4} input(A1,A2,A3) output(ZN) ZN=AND3(A1,A2,A3) ;
AND4_X{1,2,4} input(A1,A2,A3,A4) output(ZN) ZN=AND4(A1,A2,A3,A4) ;
OR2_X{1,2,4} input(A1,A2) output(ZN) ZN=OR2(A1,A2) ;
OR3_X{1,2,4} input(A1,A2,A3) output(ZN) ZN=OR3(A1,A2,A3) ;
OR4_X{1,2,4} input(A1,A2,A3,A4) output(ZN) ZN=OR4(A1,A2,A3,A4) ;
XOR2_X{1,2} input(A,B) output(Z) Z=XOR2(A,B) ;
XNOR2_X{1,2} input(A,B) output(ZN) ZN=XNOR2(A,B) ;
""")
"""Nangate 45nm Open Cell Library (NangateOpenCellLibrary_PDKv1_3_v2010_12).
This NANGATE-variant that uses 'ZN' as output pin names for AND and OR gates.
"""An older NANGATE-variant that uses 'ZN' as output pin names for AND and OR gates.
"""

9
src/kyupy/verilog.py

@ -123,9 +123,6 @@ class VerilogTransformer(Transformer): @@ -123,9 +123,6 @@ class VerilogTransformer(Transformer):
assignments = []
for stmt in args[2:]: # pass 1: instantiate cells and driven signals
if isinstance(stmt, Instantiation):
if stmt.type not in self.tlib.cells:
log.warn(f'Ignoring cell of unknown kind "{stmt.type}"')
continue
n = Node(c, stmt.name, kind=stmt.type)
for p, s in stmt.pins.items():
if self.tlib.pin_is_output(n.kind, p):
@ -144,8 +141,6 @@ class VerilogTransformer(Transformer): @@ -144,8 +141,6 @@ class VerilogTransformer(Transformer):
c.io_nodes[positions[name]] = n
if sd.kind == 'input':
Line(c, n, Node(c, name))
while len(assignments) > 0:
more_assignments = []
for target, source in assignments: # pass 1.5: process signal assignments
target_sigs = []
if not isinstance(target, list): target = [target]
@ -172,13 +167,9 @@ class VerilogTransformer(Transformer): @@ -172,13 +167,9 @@ class VerilogTransformer(Transformer):
cnode = Node(c, f'__const{s[3]}_{const_count}__', f'__const{s[3]}__')
const_count += 1
Line(c, cnode, Node(c, t))
else:
more_assignments.append((target, source))
assignments = more_assignments
for stmt in args[2:]: # pass 2: connect signals to readers
if isinstance(stmt, Instantiation):
for p, s in stmt.pins.items():
if stmt.name not in c.cells: continue
n = c.cells[stmt.name]
if self.tlib.pin_is_output(n.kind, p): continue
if s.startswith("1'b"):

173
src/kyupy/wave_sim.py

@ -13,11 +13,10 @@ Two simulators are available: :py:class:`WaveSim` runs on the CPU, and the deriv @@ -13,11 +13,10 @@ Two simulators are available: :py:class:`WaveSim` runs on the CPU, and the deriv
"""
import math
from collections import defaultdict
import numpy as np
from . import log, numba, cuda, sim, cdiv, eng
from . import numba, cuda, sim, cdiv
TMAX = np.float32(2 ** 127)
@ -60,8 +59,8 @@ class WaveSim(sim.SimOps): @@ -60,8 +59,8 @@ class WaveSim(sim.SimOps):
self.delays = np.zeros((len(delays), self.c_locs_len, 2, 2), dtype=delays.dtype)
self.delays[:, :delays.shape[1]] = delays
self.c = np.full((self.c_len, self.sims), TMAX, dtype=np.float32)
self.s = np.zeros((11, self.s_len, self.sims), dtype=np.float32)
self.c = np.zeros((self.c_len, sims), dtype=np.float32) + TMAX
self.s = np.zeros((11, self.s_len, sims), dtype=np.float32)
"""Information about the logic values and transitions around the sequential elements (flip-flops) and ports.
The first 3 values are read by :py:func:`s_to_c`.
@ -99,18 +98,12 @@ class WaveSim(sim.SimOps): @@ -99,18 +98,12 @@ class WaveSim(sim.SimOps):
self.simctl_int[0] = range(sims) # unique seed for each sim by default, zero this to pick same delays for all sims.
self.simctl_int[1] = 2 # random picking by default.
self.e = np.zeros((self.c_locs_len, sims, 2), dtype=np.uint8) # aux data for each line and sim
self.error_counts = np.zeros(self.s_len, dtype=np.uint32) # number of capture errors by PPO
self.lsts = np.zeros(self.s_len, dtype=np.float32) # LST by PPO
self.overflows = np.zeros(self.s_len, dtype=np.uint32) # Overflows by PPO
self.nbytes = sum([a.nbytes for a in (self.c, self.s, self.e, self.c_locs, self.c_caps, self.ops, self.simctl_int)])
self.nbytes = sum([a.nbytes for a in (self.c, self.s, self.c_locs, self.c_caps, self.ops, self.simctl_int)])
def __repr__(self):
dev = 'GPU' if hasattr(self.c, 'copy_to_host') else 'CPU'
return f'{{name: "{self.circuit.name}", device: "{dev}", sims: {self.sims}, ops: {len(self.ops)}, ' + \
f'levels: {len(self.level_starts)}, nbytes: {eng(self.nbytes)}}}'
f'levels: {len(self.level_starts)}, nbytes: {self.nbytes}}}'
def s_to_c(self):
"""Transfers values of sequential elements and primary inputs to the combinational portion.
@ -123,7 +116,7 @@ class WaveSim(sim.SimOps): @@ -123,7 +116,7 @@ class WaveSim(sim.SimOps):
self.c[self.pippi_c_locs+1] = np.choose(cond, [TMAX, TMAX, sins[1], TMAX])
self.c[self.pippi_c_locs+2] = TMAX
def c_prop(self, sims=None, seed=1, delta=0):
def c_prop(self, sims=None, seed=1):
"""Propagates all waveforms from the (pseudo) primary inputs to the (pseudo) primary outputs.
:param sims: Number of parallel simulations to execute. If None, all available simulations are performed.
@ -131,7 +124,7 @@ class WaveSim(sim.SimOps): @@ -131,7 +124,7 @@ class WaveSim(sim.SimOps):
"""
sims = min(sims or self.sims, self.sims)
for op_start, op_stop in zip(self.level_starts, self.level_stops):
level_eval_cpu(self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, self.e, self.abuf, 0, sims, self.delays, self.simctl_int, seed, delta)
level_eval_cpu(self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, self.abuf, 0, sims, self.delays, self.simctl_int, seed)
def c_to_s(self, time=TMAX, sd=0.0, seed=1):
"""Simulates a capture operation at all sequential elements and primary outputs.
@ -159,7 +152,7 @@ class WaveSim(sim.SimOps): @@ -159,7 +152,7 @@ class WaveSim(sim.SimOps):
self.s[2, self.ppio_s_locs] = self.s[8, self.ppio_s_locs]
def _wave_eval(op, cbuf, c_locs, c_caps, ebuf, sim, delays, simctl_int, seed, delta):
def _wave_eval(op, cbuf, c_locs, c_caps, sim, delays, simctl_int, seed=0):
overflows = int(0)
lut = op[0]
@ -169,18 +162,6 @@ def _wave_eval(op, cbuf, c_locs, c_caps, ebuf, sim, delays, simctl_int, seed, de @@ -169,18 +162,6 @@ def _wave_eval(op, cbuf, c_locs, c_caps, ebuf, sim, delays, simctl_int, seed, de
c_idx = op[4]
d_idx = op[5]
input_epoch = (ebuf[a_idx, sim, 1]|
ebuf[b_idx, sim, 1]|
ebuf[c_idx, sim, 1]|
ebuf[d_idx, sim, 1])
output_epoch = ebuf[z_idx, sim, 1]
if (delta):
if input_epoch == 0 and output_epoch == 0: return 0, 0
out_changed = output_epoch
if len(delays) > 1:
if simctl_int[1] == 0:
delays = delays[seed]
@ -225,25 +206,25 @@ def _wave_eval(op, cbuf, c_locs, c_caps, ebuf, sim, delays, simctl_int, seed, de @@ -225,25 +206,25 @@ def _wave_eval(op, cbuf, c_locs, c_caps, ebuf, sim, delays, simctl_int, seed, de
if a == current_t:
a_cur += 1
inputs ^= 1
thresh = delays[a_idx, a_cur & 1 ^ 1, z_val]
thresh = delays[a_idx, a_cur & 1, z_val]
a = cbuf[a_mem + a_cur, sim] + delays[a_idx, a_cur & 1, z_val]
next_t = cbuf[a_mem + a_cur, sim] + delays[a_idx, (a_cur & 1) ^ 1, z_val ^ 1]
elif b == current_t:
b_cur += 1
inputs ^= 2
thresh = delays[b_idx, b_cur & 1 ^ 1, z_val]
thresh = delays[b_idx, b_cur & 1, z_val]
b = cbuf[b_mem + b_cur, sim] + delays[b_idx, b_cur & 1, z_val]
next_t = cbuf[b_mem + b_cur, sim] + delays[b_idx, (b_cur & 1) ^ 1, z_val ^ 1]
elif c == current_t:
c_cur += 1
inputs ^= 4
thresh = delays[c_idx, c_cur & 1 ^ 1, z_val]
thresh = delays[c_idx, c_cur & 1, z_val]
c = cbuf[c_mem + c_cur, sim] + delays[c_idx, c_cur & 1, z_val]
next_t = cbuf[c_mem + c_cur, sim] + delays[c_idx, (c_cur & 1) ^ 1, z_val ^ 1]
else:
d_cur += 1
inputs ^= 8
thresh = delays[d_idx, d_cur & 1 ^ 1, z_val]
thresh = delays[d_idx, d_cur & 1, z_val]
d = cbuf[d_mem + d_cur, sim] + delays[d_idx, d_cur & 1, z_val]
next_t = cbuf[d_mem + d_cur, sim] + delays[d_idx, (d_cur & 1) ^ 1, z_val ^ 1]
@ -254,15 +235,13 @@ def _wave_eval(op, cbuf, c_locs, c_caps, ebuf, sim, delays, simctl_int, seed, de @@ -254,15 +235,13 @@ def _wave_eval(op, cbuf, c_locs, c_caps, ebuf, sim, delays, simctl_int, seed, de
or (current_t - previous_t) > thresh # -OR- the generated hazard is wider than pulse threshold.
):
if z_cur < (z_cap - 1): # enough space in z_mem?
if delta and (cbuf[z_mem + z_cur, sim] != current_t):
out_changed = 1
cbuf[z_mem + z_cur, sim] = current_t
previous_t = current_t
z_cur += 1
else:
overflows += 1
previous_t = cbuf[z_mem + z_cur - 1, sim]
z_cur -= 1
previous_t = cbuf[z_mem + z_cur, sim]
else:
z_cur -= 1
previous_t = cbuf[z_mem + z_cur - 1, sim] if z_cur > 0 else TMIN
@ -276,23 +255,12 @@ def _wave_eval(op, cbuf, c_locs, c_caps, ebuf, sim, delays, simctl_int, seed, de @@ -276,23 +255,12 @@ def _wave_eval(op, cbuf, c_locs, c_caps, ebuf, sim, delays, simctl_int, seed, de
current_t = min(a, b, c, d)
if delta and (cbuf[z_mem + z_cur, sim] != TMAX):
out_changed = 1
# generate or propagate overflow flag
cbuf[z_mem + z_cur, sim] = TMAX_OVL if overflows > 0 else max(a, b, c, d)
nrise = max(0, (z_cur+1) // 2 - (cbuf[z_mem, sim] == TMIN))
nfall = z_cur // 2
e = int(((cbuf[z_mem, sim] == TMIN) << 1) & 2) # initial value
e |= z_val # final value
e |= (nrise + nfall)<<2 # number of transitions
ebuf[z_idx, sim, 0] = e
ebuf[z_idx, sim, 1] = input_epoch & out_changed
return nrise, nfall
@ -300,11 +268,11 @@ wave_eval_cpu = numba.njit(_wave_eval) @@ -300,11 +268,11 @@ wave_eval_cpu = numba.njit(_wave_eval)
@numba.njit
def level_eval_cpu(ops, op_start, op_stop, c, c_locs, c_caps, ebuf, abuf, sim_start, sim_stop, delays, simctl_int, seed, delta):
def level_eval_cpu(ops, op_start, op_stop, c, c_locs, c_caps, abuf, sim_start, sim_stop, delays, simctl_int, seed):
for op_idx in range(op_start, op_stop):
op = ops[op_idx]
for sim in range(sim_start, sim_stop):
nrise, nfall = wave_eval_cpu(op, c, c_locs, c_caps, ebuf, sim, delays, simctl_int[:, sim], seed, delta)
nrise, nfall = wave_eval_cpu(op, c, c_locs, c_caps, sim, delays, simctl_int[:, sim], seed)
a_loc = op[6]
a_wr = op[7]
a_wf = op[8]
@ -377,18 +345,12 @@ class WaveSimCuda(WaveSim): @@ -377,18 +345,12 @@ class WaveSimCuda(WaveSim):
self.delays = cuda.to_device(self.delays)
self.simctl_int = cuda.to_device(self.simctl_int)
self.abuf = cuda.to_device(self.abuf)
self.e = cuda.to_device(self.e)
self.error_counts = cuda.to_device(self.error_counts)
self.lsts = cuda.to_device(self.lsts)
self.overflows = cuda.to_device(self.overflows)
self.aux = cuda.to_device(np.zeros(8*1024, dtype=np.int32))
self._block_dim = (32, 16)
def __getstate__(self):
state = self.__dict__.copy()
del state['c']
state['c'] = np.array(self.c)
state['s'] = np.array(self.s)
state['ops'] = np.array(self.ops)
state['c_locs'] = np.array(self.c_locs)
@ -396,16 +358,11 @@ class WaveSimCuda(WaveSim): @@ -396,16 +358,11 @@ class WaveSimCuda(WaveSim):
state['delays'] = np.array(self.delays)
state['simctl_int'] = np.array(self.simctl_int)
state['abuf'] = np.array(self.abuf)
state['e'] = np.array(self.e)
state['error_counts'] = np.array(self.error_counts)
state['lsts'] = np.array(self.lsts)
state['overflows'] = np.array(self.overflows)
state['aux'] = np.array(self.aux)
return state
def __setstate__(self, state):
self.__dict__.update(state)
self.c = cuda.to_device(np.full((self.c_len, self.sims), TMAX, dtype=np.float32))
self.c = cuda.to_device(self.c)
self.s = cuda.to_device(self.s)
self.ops = cuda.to_device(self.ops)
self.c_locs = cuda.to_device(self.c_locs)
@ -413,11 +370,6 @@ class WaveSimCuda(WaveSim): @@ -413,11 +370,6 @@ class WaveSimCuda(WaveSim):
self.delays = cuda.to_device(self.delays)
self.simctl_int = cuda.to_device(self.simctl_int)
self.abuf = cuda.to_device(self.abuf)
self.e = cuda.to_device(self.e)
self.error_counts = cuda.to_device(self.error_counts)
self.lsts = cuda.to_device(self.lsts)
self.overflows = cuda.to_device(self.overflows)
self.aux = cuda.to_device(self.aux)
def s_to_c(self):
grid_dim = self._grid_dim(self.sims, self.s_len)
@ -425,24 +377,14 @@ class WaveSimCuda(WaveSim): @@ -425,24 +377,14 @@ class WaveSimCuda(WaveSim):
def _grid_dim(self, x, y): return cdiv(x, self._block_dim[0]), cdiv(y, self._block_dim[1])
def c_prop(self, sims=None, seed=1, op_from=0, op_to=None, delta=0):
def c_prop(self, sims=None, seed=1):
sims = min(sims or self.sims, self.sims)
for op_start, op_stop in zip(self.level_starts, self.level_stops):
if op_from > op_start: continue
if op_to is not None and op_to <= op_start: break
grid_dim = self._grid_dim(sims, op_stop - op_start)
wave_eval_gpu[grid_dim, self._block_dim](self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, self.e, self.abuf, int(0),
sims, self.delays, self.simctl_int, seed, delta)
wave_eval_gpu[grid_dim, self._block_dim](self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, self.abuf, int(0),
sims, self.delays, self.simctl_int, seed)
cuda.synchronize()
def c_prop_level(self, level, sims=None, seed=1, delta=0):
sims = min(sims or self.sims, self.sims)
op_start = self.level_starts[level]
op_stop = self.level_stops[level]
grid_dim = self._grid_dim(sims, op_stop - op_start)
wave_eval_gpu[grid_dim, self._block_dim](self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, self.e, self.abuf, int(0),
sims, self.delays, self.simctl_int, seed, delta)
def c_to_s(self, time=TMAX, sd=0.0, seed=1):
grid_dim = self._grid_dim(self.sims, self.s_len)
wave_capture_gpu[grid_dim, self._block_dim](self.c, self.s, self.c_locs, self.c_caps, self.ppo_offset,
@ -452,77 +394,6 @@ class WaveSimCuda(WaveSim): @@ -452,77 +394,6 @@ class WaveSimCuda(WaveSim):
grid_dim = self._grid_dim(self.sims, self.s_len)
ppo_to_ppi_gpu[grid_dim, self._block_dim](self.s, self.c_locs, time, self.ppi_offset, self.ppo_offset)
def acc_error_counts(self, sims=None):
sims = min(sims or self.sims, self.sims)
grid_dim = cdiv(self.s_len, 256)
acc_error_counts_gpu[grid_dim, 256](self.s, sims, self.error_counts)
def reset_error_counts(self):
self.error_counts[:] = 0
def get_error_counts(self):
return np.array(self.error_counts)
def acc_overflows(self, sims=None):
sims = min(sims or self.sims, self.sims)
grid_dim = cdiv(self.s_len, 256)
acc_overflows_gpu[grid_dim, 256](self.s, sims, self.overflows)
def reset_overflows(self):
self.overflows[:] = 0
def get_overflows(self):
return np.array(self.overflows)
def acc_lsts(self, sims=None):
sims = min(sims or self.sims, self.sims)
grid_dim = cdiv(self.s_len, 256)
acc_lsts_gpu[grid_dim, 256](self.s, sims, self.lsts)
def reset_lsts(self):
self.lsts[:] = 0.0
def get_lsts(self):
return np.array(self.lsts)
@cuda.jit()
def memcpy_gpu (src, dst, nitems):
tid = cuda.grid(1)
stride = cuda.gridDim.x * cuda.blockDim.x
for i in range(tid, nitems, stride):
dst.flat[i] = src.flat[i]
@cuda.jit()
def acc_error_counts_gpu(s, sims, error_counts):
x = cuda.grid(1)
if x >= s.shape[1]: return
cnt = 0
for i in range(sims):
cnt += (s[6,x,i] != s[8,x,i])
error_counts[x] += cnt
@cuda.jit()
def acc_overflows_gpu(s, sims, overflows):
x = cuda.grid(1)
if x >= s.shape[1]: return
cnt = 0
for i in range(sims):
cnt += s[10,x,i]
overflows[x] += cnt
@cuda.jit()
def acc_lsts_gpu(s, sims, lsts):
x = cuda.grid(1)
if x >= s.shape[1]: return
lst = 0
for i in range(sims):
lst = max(lst, s[5,x,i])
lsts[x] = max(lsts[x], lst)
@cuda.jit()
def wave_assign_gpu(c, s, c_locs, ppi_offset):
@ -552,7 +423,7 @@ _wave_eval_gpu = cuda.jit(_wave_eval, device=True) @@ -552,7 +423,7 @@ _wave_eval_gpu = cuda.jit(_wave_eval, device=True)
@cuda.jit()
def wave_eval_gpu(ops, op_start, op_stop, cbuf, c_locs, c_caps, ebuf, abuf, sim_start, sim_stop, delays, simctl_int, seed, delta):
def wave_eval_gpu(ops, op_start, op_stop, cbuf, c_locs, c_caps, abuf, sim_start, sim_stop, delays, simctl_int, seed):
x, y = cuda.grid(2)
sim = sim_start + x
op_idx = op_start + y
@ -564,7 +435,7 @@ def wave_eval_gpu(ops, op_start, op_stop, cbuf, c_locs, c_caps, ebuf, abuf, sim_ @@ -564,7 +435,7 @@ def wave_eval_gpu(ops, op_start, op_stop, cbuf, c_locs, c_caps, ebuf, abuf, sim_
a_wr = op[7]
a_wf = op[8]
nrise, nfall = _wave_eval_gpu(op, cbuf, c_locs, c_caps, ebuf, sim, delays, simctl_int[:, sim], seed, delta)
nrise, nfall = _wave_eval_gpu(op, cbuf, c_locs, c_caps, sim, delays, simctl_int[:, sim], seed)
# accumulate WSA into abuf
if a_loc >= 0:

BIN
tests/b15_4ig.sa_rf.stil.gz

Binary file not shown.

36
tests/conftest.py

@ -13,44 +13,8 @@ def b15_2ig_circuit(mydir): @@ -13,44 +13,8 @@ def b15_2ig_circuit(mydir):
from kyupy.techlib import SAED32
return verilog.load(mydir / 'b15_2ig.v.gz', branchforks=True, tlib=SAED32)
@pytest.fixture(scope='session')
def b15_2ig_circuit_resolved(b15_2ig_circuit):
from kyupy.techlib import SAED32
cr = b15_2ig_circuit.copy()
cr.resolve_tlib_cells(SAED32)
return cr
@pytest.fixture(scope='session')
def b15_4ig_circuit(mydir):
from kyupy import verilog
from kyupy.techlib import SAED32
return verilog.load(mydir / 'b15_4ig.v.gz', branchforks=True, tlib=SAED32)
@pytest.fixture(scope='session')
def b15_4ig_circuit_resolved(b15_4ig_circuit):
from kyupy.techlib import SAED32
cr = b15_4ig_circuit.copy()
cr.resolve_tlib_cells(SAED32)
return cr
@pytest.fixture(scope='session')
def b15_2ig_delays(mydir, b15_2ig_circuit):
from kyupy import sdf
from kyupy.techlib import SAED32
return sdf.load(mydir / 'b15_2ig.sdf.gz').iopaths(b15_2ig_circuit, tlib=SAED32)[1:2]
@pytest.fixture(scope='session')
def b15_2ig_sa_nf_test_resp(mydir, b15_2ig_circuit_resolved):
from kyupy import stil
s = stil.load(mydir / 'b15_2ig.sa_nf.stil.gz')
tests = s.tests(b15_2ig_circuit_resolved)[:,1:]
resp = s.responses(b15_2ig_circuit_resolved)[:,1:]
return (tests, resp)
@pytest.fixture(scope='session')
def b15_4ig_sa_rf_test_resp(mydir, b15_4ig_circuit_resolved):
from kyupy import stil
s = stil.load(mydir / 'b15_4ig.sa_rf.stil.gz')
tests = s.tests(b15_4ig_circuit_resolved)[:,1:]
resp = s.responses(b15_4ig_circuit_resolved)[:,1:]
return (tests, resp)

39
tests/gates.sdf

@ -7,49 +7,22 @@ @@ -7,49 +7,22 @@
(TEMPERATURE 25.00:25.00:25.00)
(TIMESCALE 1ns)
(CELL
(CELLTYPE "NAND2_X1")
(CELLTYPE "NAND2X1")
(INSTANCE nandgate)
(DELAY
(ABSOLUTE
(IOPATH A1 ZN (0.099:0.103:0.103) (0.122:0.127:0.127))
(IOPATH A2 ZN (0.083:0.086:0.086) (0.100:0.104:0.104))
(IOPATH IN1 QN (0.099:0.103:0.103) (0.122:0.127:0.127))
(IOPATH IN2 QN (0.083:0.086:0.086) (0.100:0.104:0.104))
)
)
)
(CELL
(CELLTYPE "AND2_X1")
(CELLTYPE "AND2X1")
(INSTANCE andgate)
(DELAY
(ABSOLUTE
(IOPATH A1 ZN (0.367:0.378:0.378) (0.351:0.377:0.377))
(IOPATH A2 ZN (0.366:0.375:0.375) (0.359:0.370:0.370))
)
)
)
(CELL
(CELLTYPE "OAI21_X1")
(INSTANCE oai21gate)
(DELAY
(ABSOLUTE
(IOPATH B1 ZN (0.000:0.025:0.025) (0.000:0.013:0.013))
(IOPATH B2 ZN (0.000:0.030:0.030) (0.000:0.016:0.016))
( COND (B1 == 1'b0) && (B2 == 1'b1) (IOPATH A ZN (0.000:0.018:0.018)))
(COND (B1 == 1'b1) && (B2 == 1'b0) (IOPATH A ZN (0.000:0.018:0.018) (0.000:0.016:0.016)))
(COND (B1 == 1'b1) && (B2 == 1'b1) (IOPATH A ZN (0.000:0.019:0.019) (0.000:0.014:0.014)))
)
)
)
(CELL
(CELLTYPE "MUX2_X1")
(INSTANCE mux2gate)
(DELAY
(ABSOLUTE
(COND (B == 1'b0) && (S == 1'b0) (IOPATH A Z (0.000:0.037:0.037) (0.000:0.058:0.058)))
(COND (B == 1'b1) && (S == 1'b0) (IOPATH A Z (0.000:0.037:0.037) (0.000:0.058:0.058)))
(COND (A == 1'b0) && (S == 1'b1) (IOPATH B Z (0.000:0.035:0.035) (0.000:0.056:0.056)))
(COND (A == 1'b1) && (S == 1'b1) (IOPATH B Z (0.000:0.035:0.035) (0.000:0.056:0.056)))
(COND (A == 1'b0) && (B == 1'b1) (IOPATH S Z (0.000:0.047:0.047) (0.000:0.073:0.073)))
(COND (A == 1'b1) && (B == 1'b0) (IOPATH S Z (0.000:0.072:0.072) (0.000:0.064:0.064)))
(IOPATH IN1 Q (0.367:0.378:0.378) (0.351:0.377:0.377))
(IOPATH IN2 Q (0.366:0.375:0.375) (0.359:0.370:0.370))
)
)
)

12
tests/gates.v

@ -1,15 +1,11 @@ @@ -1,15 +1,11 @@
module gates (a, b, c, o0, o1, o2, o3 );
module gates (a, b, o0, o1 );
input a;
input b;
input c;
output o0;
output o1;
output o2;
output o3;
AND2_X1 andgate (.A1 ( a ) , .A2 ( b ) , .ZN ( o0 ) ) ;
NAND2_X1 nandgate (.A1 ( a ) , .A2 ( b ) , .ZN ( o1 ) ) ;
OAI21_X1 oai21gate (.B1(a), .B2(b), .A(c), .ZN(o2) ) ;
MUX2_X1 mux2gate (.A(a), .B(b), .S(c), .Z(o3)) ;
AND2X1 andgate (.IN1 ( a ) , .IN2 ( b ) , .Q ( o0 ) ) ;
NAND2X1 nandgate (.IN1 ( a ) , .IN2 ( b ) , .QN ( o1 ) ) ;
endmodule

23
tests/test_circuit.py

@ -1,30 +1,9 @@ @@ -1,30 +1,9 @@
import pickle
from kyupy.circuit import GrowingList, Circuit, Node, Line
from kyupy.circuit import Circuit, Node, Line
from kyupy import verilog, bench
from kyupy.techlib import SAED32
def test_growing_list():
gl = GrowingList()
assert gl.free_idx == 0
gl[0] = 1
assert gl.free_idx == 1
gl[2] = 1
assert gl.free_idx == 1
gl[0] = None
assert gl.free_idx == 0
gl[0] = 1
assert gl.free_idx == 1
gl[1] = 1
assert gl.free_idx == 3
gl.append(1)
assert gl.free_idx == 4
gl[2] = None
assert gl.free_idx == 2
gl[2] = 1
gl[1] = None
assert gl.free_idx == 1
def test_lines():
c = Circuit()
n1 = Node(c, 'n1')

87
tests/test_logic_sim.py

@ -1,6 +1,6 @@ @@ -1,6 +1,6 @@
import numpy as np
from kyupy.logic_sim import LogicSim, LogicSim6V
from kyupy.logic_sim import LogicSim
from kyupy import bench, logic, sim
from kyupy.logic import mvarray, bparray, bp_to_mv, mv_to_bp
@ -94,30 +94,6 @@ def test_4v(): @@ -94,30 +94,6 @@ def test_4v():
'--0XX', '--X1X', '--XXX', '--XXX'))
def test_6v():
c = bench.parse('input(x, y) output(a, o, n, xo, no) a=AND2(x,y) o=OR2(x,y) n=INV1(x) xo=XOR2(x,y) no=NOR2(x,y)')
s = LogicSim6V(c, 36)
assert s.s_len == 7
mva = mvarray(
'0000101', '0101110', '0R0R1RF', '0F0F1FR', '0P0P1PN', '0N0N1NP',
'1001010', '1111000', '1RR10F0', '1FF10R0', '1PP10N0', '1NN10P0',
'R00RFRF', 'R1R1FF0', 'RRRRFPF', 'RFPNFNP', 'RPPRFRF', 'RNRNFFP',
'F00FRFR', 'F1F1RR0', 'FRPNRNP', 'FFFFRPR', 'FPPFRFR', 'FNFNRRP',
'P00PNPN', 'P1P1NN0', 'PRPRNRF', 'PFPFNFR', 'PPPPNPN', 'PNPNNNP',
'N00NPNP', 'N1N1PP0', 'NRRNPFP', 'NFFNPRP', 'NPPNPNP', 'NNNNPPP')
tests = np.copy(mva)
tests[2:] = logic.ZERO
s.s[0] = tests
s.s_to_c()
s.c_prop()
s.c_to_s()
resp = s.s[1].copy()
exp_resp = np.copy(mva)
exp_resp[:2] = logic.ZERO
np.testing.assert_allclose(resp, exp_resp)
def test_8v():
c = bench.parse('input(x, y) output(a, o, n, xo) a=and(x,y) o=or(x,y) n=not(x) xo=xor(x,y)')
s = LogicSim(c, 64, m=8)
@ -197,64 +173,3 @@ def test_b01(mydir): @@ -197,64 +173,3 @@ def test_b01(mydir):
s.c_prop()
s.c_to_s()
bp_to_mv(s.s[1])
def sim_and_compare(c, test_resp, m=8):
tests, resp = test_resp
lsim = LogicSim(c, m=m, sims=tests.shape[1])
lsim.s[0] = logic.mv_to_bp(tests)
lsim.s_to_c()
lsim.c_prop()
lsim.c_to_s()
resp_sim = logic.bp_to_mv(lsim.s[1])[:,:tests.shape[1]]
idxs, pats = np.nonzero(((resp == logic.ONE) & (resp_sim != logic.ONE)) | ((resp == logic.ZERO) & (resp_sim != logic.ZERO)))
for i, (idx, pat) in enumerate(zip(idxs, pats)):
if i >= 10:
print(f'...')
break
print(f'mismatch pattern:{pat} ppio:{idx} exp:{logic.mv_str(resp[idx,pat])} act:{logic.mv_str(resp_sim[idx,pat])}')
assert len(idxs) == 0
def sim_and_compare_6v(c, test_resp):
tests, resp = test_resp
lsim = LogicSim6V(c, sims=tests.shape[1])
lsim.s[0] = tests
lsim.s_to_c()
lsim.c_prop()
lsim.c_to_s()
resp_sim = lsim.s[1]
idxs, pats = np.nonzero(((resp == logic.ONE) & (resp_sim != logic.ONE)) | ((resp == logic.ZERO) & (resp_sim != logic.ZERO)))
for i, (idx, pat) in enumerate(zip(idxs, pats)):
if i >= 10:
print(f'...')
break
print(f'mismatch pattern:{pat} ppio:{idx} exp:{logic.mv_str(resp[idx,pat])} act:{logic.mv_str(resp_sim[idx,pat])}')
assert len(idxs) == 0
def test_b15_2ig_sa_2v(b15_2ig_circuit_resolved, b15_2ig_sa_nf_test_resp):
sim_and_compare(b15_2ig_circuit_resolved, b15_2ig_sa_nf_test_resp, m=2)
def test_b15_2ig_sa_4v(b15_2ig_circuit_resolved, b15_2ig_sa_nf_test_resp):
sim_and_compare(b15_2ig_circuit_resolved, b15_2ig_sa_nf_test_resp, m=4)
def test_b15_2ig_sa_6v(b15_2ig_circuit_resolved, b15_2ig_sa_nf_test_resp):
sim_and_compare_6v(b15_2ig_circuit_resolved, b15_2ig_sa_nf_test_resp)
def test_b15_2ig_sa_8v(b15_2ig_circuit_resolved, b15_2ig_sa_nf_test_resp):
sim_and_compare(b15_2ig_circuit_resolved, b15_2ig_sa_nf_test_resp, m=8)
def test_b15_4ig_sa_2v(b15_4ig_circuit_resolved, b15_4ig_sa_rf_test_resp):
sim_and_compare(b15_4ig_circuit_resolved, b15_4ig_sa_rf_test_resp, m=2)
def test_b15_4ig_sa_4v(b15_4ig_circuit_resolved, b15_4ig_sa_rf_test_resp):
sim_and_compare(b15_4ig_circuit_resolved, b15_4ig_sa_rf_test_resp, m=4)
def test_b15_4ig_sa_8v(b15_4ig_circuit_resolved, b15_4ig_sa_rf_test_resp):
sim_and_compare(b15_4ig_circuit_resolved, b15_4ig_sa_rf_test_resp, m=8)

6
tests/test_sdf.py

@ -2,7 +2,7 @@ import numpy as np @@ -2,7 +2,7 @@ import numpy as np
from kyupy import sdf, verilog, bench
from kyupy.wave_sim import WaveSim, TMAX, TMIN
from kyupy.techlib import SAED32, NANGATE45
from kyupy.techlib import SAED32, SAED90
def test_parse():
test = '''
@ -80,9 +80,9 @@ def test_b15(mydir): @@ -80,9 +80,9 @@ def test_b15(mydir):
def test_gates(mydir):
c = verilog.load(mydir / 'gates.v', tlib=NANGATE45)
c = verilog.load(mydir / 'gates.v', tlib=SAED90)
df = sdf.load(mydir / 'gates.sdf')
lt = df.iopaths(c, tlib=NANGATE45)[1]
lt = df.iopaths(c, tlib=SAED90)[1]
nand_a = c.cells['nandgate'].ins[0]
nand_b = c.cells['nandgate'].ins[1]
and_a = c.cells['andgate'].ins[0]

12
tests/test_verilog.py

@ -1,5 +1,5 @@ @@ -1,5 +1,5 @@
from kyupy import verilog
from kyupy.techlib import SAED90, SAED32, NANGATE45
from kyupy.techlib import SAED90, SAED32
def test_b01(mydir):
with open(mydir / 'b01.v', 'r') as f:
@ -26,12 +26,12 @@ def test_b15(mydir): @@ -26,12 +26,12 @@ def test_b15(mydir):
def test_gates(mydir):
c = verilog.load(mydir / 'gates.v', tlib=NANGATE45)
assert len(c.nodes) == 18
assert len(c.lines) == 21
c = verilog.load(mydir / 'gates.v', tlib=SAED90)
assert len(c.nodes) == 10
assert len(c.lines) == 10
stats = c.stats
assert stats['input'] == 3
assert stats['output'] == 4
assert stats['input'] == 2
assert stats['output'] == 2
assert stats['__seq__'] == 0

60
tests/test_wave_sim.py

@ -5,56 +5,22 @@ from kyupy.logic_sim import LogicSim @@ -5,56 +5,22 @@ from kyupy.logic_sim import LogicSim
from kyupy import logic, bench, sim
from kyupy.logic import mvarray
def test_xnor2_delays():
op = (sim.XNOR2, 2, 0, 1, 3, 3, -1, 0, 0)
#op = (0b0111, 4, 0, 1)
c = np.full((4*16, 1), TMAX, dtype=np.float32) # 4 waveforms of capacity 16
c_locs = np.zeros((4,), dtype='int')
c_caps = np.zeros((4,), dtype='int')
ebuf = np.zeros((4, 1, 2), dtype=np.int32)
for i in range(4): c_locs[i], c_caps[i] = i*16, 16 # 1:1 mapping
delays = np.zeros((1, 4, 2, 2))
delays[0, 0, 0, 0] = 0.031 # A rise -> Z rise
delays[0, 0, 0, 1] = 0.027 # A rise -> Z fall
delays[0, 0, 1, 0] = 0.033 # A fall -> Z rise
delays[0, 0, 1, 1] = 0.037 # A fall -> Z fall
delays[0, 1, 0, 0] = 0.032 # B rise -> Z rise
delays[0, 1, 0, 1] = 0.030 # B rise -> Z fall
delays[0, 1, 1, 0] = 0.038 # B fall -> Z rise
delays[0, 1, 1, 1] = 0.036 # B fall -> Z fall
simctl_int = np.asarray([0], dtype=np.int32)
def wave_assert(inputs, output):
for i, a in zip(inputs, c.reshape(-1,16)): a[:len(i)] = i
wave_eval_cpu(op, c, c_locs, c_caps, ebuf, 0, delays, simctl_int, 0, 0)
for i, v in enumerate(output): np.testing.assert_allclose(c.reshape(-1,16)[2,i], v)
wave_assert([[TMIN,TMAX],[TMIN,TMAX]], [TMIN,TMAX]) # XNOR(1,1) => 1
wave_assert([[TMAX,TMAX],[TMIN,TMAX]], [TMAX]) # XNOR(0,1) => 0
# using Afall/Zfall for pulse length, bug: was using Arise/Zfall
#wave_assert([[0.07, 0.10, TMAX], [0.0, TMAX]], [TMIN, 0.03, 0.101, 0.137, TMAX])
wave_assert([[0.07, 0.10, TMAX], [0.0, TMAX]], [TMIN, 0.03, TMAX])
wave_assert([[0.06, 0.10, TMAX], [0.0, TMAX]], [TMIN, 0.03, 0.091, 0.137, TMAX])
def test_nand_delays():
op = (sim.NAND4, 4, 0, 1, 2, 3, -1, 0, 0)
#op = (0b0111, 4, 0, 1)
c = np.full((5*16, 1), TMAX, dtype=np.float32) # 5 waveforms of capacity 16
c = np.full((5*16, 1), TMAX) # 5 waveforms of capacity 16
c_locs = np.zeros((5,), dtype='int')
c_caps = np.zeros((5,), dtype='int')
ebuf = np.zeros((4, 1, 2), dtype=np.int32)
for i in range(5): c_locs[i], c_caps[i] = i*16, 16 # 1:1 mapping
# SDF specifies IOPATH delays with respect to output polarity
# SDF pulse rejection value is determined by IOPATH causing last transition and polarity of last transition
delays = np.zeros((1, 5, 2, 2))
delays[0, 0, 0, 0] = 0.1 # A rise -> Z rise
delays[0, 0, 0, 1] = 0.2 # A rise -> Z fall
delays[0, 0, 1, 0] = 0.1 # A fall -> Z rise
delays[0, 0, 1, 1] = 0.2 # A fall -> Z fall
delays[0, 0, 0, 0] = 0.1 # A -> Z rise delay
delays[0, 0, 0, 1] = 0.2 # A -> Z fall delay
delays[0, 0, 1, 0] = 0.1 # A -> Z negative pulse limit (terminate in rising Z)
delays[0, 0, 1, 1] = 0.2 # A -> Z positive pulse limit
delays[0, 1, :, 0] = 0.3 # as above for B -> Z
delays[0, 1, :, 1] = 0.4
delays[0, 2, :, 0] = 0.5 # as above for C -> Z
@ -66,7 +32,7 @@ def test_nand_delays(): @@ -66,7 +32,7 @@ def test_nand_delays():
def wave_assert(inputs, output):
for i, a in zip(inputs, c.reshape(-1,16)): a[:len(i)] = i
wave_eval_cpu(op, c, c_locs, c_caps, ebuf, 0, delays, simctl_int, 0, 0)
wave_eval_cpu(op, c, c_locs, c_caps, 0, delays, simctl_int)
for i, v in enumerate(output): np.testing.assert_allclose(c.reshape(-1,16)[4,i], v)
wave_assert([[TMAX,TMAX],[TMAX,TMAX],[TMIN,TMAX],[TMIN,TMAX]], [TMIN,TMAX]) # NAND(0,0,1,1) => 1
@ -179,7 +145,7 @@ def compare_to_logic_sim(wsim: WaveSim): @@ -179,7 +145,7 @@ def compare_to_logic_sim(wsim: WaveSim):
lsim.s_to_c()
lsim.c_prop()
lsim.c_to_s()
exp = logic.bp_to_mv(lsim.s[1])[:,:tests.shape[-1]]
exp = logic.bp_to_mv(lsim.s[1])
resp[resp == logic.PPULSE] = logic.ZERO
resp[resp == logic.NPULSE] = logic.ONE
@ -190,13 +156,13 @@ def compare_to_logic_sim(wsim: WaveSim): @@ -190,13 +156,13 @@ def compare_to_logic_sim(wsim: WaveSim):
np.testing.assert_allclose(resp, exp)
def test_b15(b15_2ig_circuit_resolved, b15_2ig_delays):
compare_to_logic_sim(WaveSim(b15_2ig_circuit_resolved, b15_2ig_delays, 8))
def test_b15(b15_2ig_circuit, b15_2ig_delays):
compare_to_logic_sim(WaveSim(b15_2ig_circuit, b15_2ig_delays, 8))
def test_b15_strip_forks(b15_2ig_circuit_resolved, b15_2ig_delays):
compare_to_logic_sim(WaveSim(b15_2ig_circuit_resolved, b15_2ig_delays, 8, strip_forks=True))
def test_b15_strip_forks(b15_2ig_circuit, b15_2ig_delays):
compare_to_logic_sim(WaveSim(b15_2ig_circuit, b15_2ig_delays, 8, strip_forks=True))
def test_b15_cuda(b15_2ig_circuit_resolved, b15_2ig_delays):
compare_to_logic_sim(WaveSimCuda(b15_2ig_circuit_resolved, b15_2ig_delays, 8, strip_forks=True))
def test_b15_cuda(b15_2ig_circuit, b15_2ig_delays):
compare_to_logic_sim(WaveSimCuda(b15_2ig_circuit, b15_2ig_delays, 8, strip_forks=True))

Loading…
Cancel
Save