diff --git a/Demo.ipynb b/Demo.ipynb index 805c60c..6ba4c1f 100644 --- a/Demo.ipynb +++ b/Demo.ipynb @@ -1009,7 +1009,7 @@ { "data": { "text/plain": [ - "119676" + "120628" ] }, "execution_count": 36, @@ -1195,7 +1195,7 @@ { "data": { "text/plain": [ - "2.0610005855560303" + "2.17240047454834" ] }, "execution_count": 42, @@ -1222,7 +1222,7 @@ { "data": { "text/plain": [ - "0.0" + "2.0" ] }, "execution_count": 43, @@ -1286,13 +1286,17 @@ "name": "stdout", "output_type": "stream", "text": [ - "Found 1 CUDA devices\n", - "id 0 b'TITAN V' [SUPPORTED]\n", + "Found 2 CUDA devices\n", + "id 0 b'NVIDIA GeForce RTX 3090' [SUPPORTED]\n", + " compute capability: 8.6\n", + " pci device id: 0\n", + " pci bus id: 3\n", + "id 1 b'NVIDIA TITAN V' [SUPPORTED]\n", " compute capability: 7.0\n", " pci device id: 0\n", " pci bus id: 2\n", "Summary:\n", - "\t1/1 devices are supported\n" + "\t2/2 devices are supported\n" ] }, { @@ -1322,9 +1326,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "worker", "language": "python", - "name": "python3" + "name": "worker" }, "language_info": { "codemirror_mode": { @@ -1336,7 +1340,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.8" + "version": "3.6.13" } }, "nbformat": 4, diff --git a/LICENSE.txt b/LICENSE.txt index 293fa79..c0da9ca 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2020-2021 Stefan Holst +Copyright (c) 2020-2022 Stefan Holst Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/setup.py b/setup.py index efb49ec..0f443ff 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ with open('README.rst', 'r') as f: setup( name='kyupy', - version='0.0.3', + version='0.0.4', description='High-performance processing and analysis of non-hierarchical VLSI designs', long_description=long_description, long_description_content_type='text/x-rst', diff --git a/src/kyupy/__init__.py b/src/kyupy/__init__.py index 8bbfc9f..07b15e0 100644 --- a/src/kyupy/__init__.py +++ b/src/kyupy/__init__.py @@ -78,6 +78,7 @@ class Log: """A very simple logger that formats the messages with the number of seconds since program start. """ + def __init__(self): self.start = time.perf_counter() self.logfile = None @@ -85,6 +86,13 @@ class Log: After each write, ``flush()`` is called as well. """ + def __getstate__(self): + return {'elapsed': time.perf_counter() - self.start} + + def __setstate__(self, state): + self.logfile = None + self.start = time.perf_counter() - state['elapsed'] + def log(self, level, message): t = time.perf_counter() - self.start if self.logfile is None: @@ -120,7 +128,8 @@ class Log: elapsed_time = current_time - start_time total_time = elapsed_time / done rem_time = total_time - elapsed_time - self.log(':', f'{done*100:.0f}% done {hr_time(elapsed_time)} elapsed {hr_time(rem_time)} remaining') + self.log( + ':', f'{done*100:.0f}% done {hr_time(elapsed_time)} elapsed {hr_time(rem_time)} remaining') log_interval = min(600, int(log_interval*1.5)) lastlog_time = current_time @@ -167,8 +176,10 @@ class MockCuda: for grid_y in range(grid_dim[1]): for block_x in range(block_dim[0]): for block_y in range(block_dim[1]): - outer.x = grid_x * block_dim[0] + block_x - outer.y = grid_y * block_dim[1] + block_y + outer.x = grid_x * \ + block_dim[0] + block_x + outer.y = grid_y * \ + block_dim[1] + block_y self.func(*args, **kwargs) return inner return Launcher(func) diff --git a/src/kyupy/circuit.py b/src/kyupy/circuit.py index 5801f0c..53b2e51 100644 --- a/src/kyupy/circuit.py +++ b/src/kyupy/circuit.py @@ -99,6 +99,16 @@ class Node: del self.circuit.cells[self.name] self.circuit = None + def __eq__(self, other): + """Checks equality of node name and kind. Does not check pin connections. + + This is ok, because (name, kind) is unique within a circuit. + """ + return self.name == other.name and self.kind == other.kind + + def __hash__(self): + return hash((self.name, self.kind)) + class Line: """A line is a directional 1:1 connection between two nodes. @@ -172,6 +182,13 @@ class Line: def __lt__(self, other): return self.index < other.index + def __eq__(self, other): + return self.driver == other.driver and self.driver_pin == other.driver_pin and \ + self.reader == other.reader and self.reader_pin == other.reader_pin + + def __hash__(self): + return hash((self.driver, self.driver_pin, self.reader, self.reader_pin)) + class Circuit: """A Circuit is a container for interconnected nodes and lines. @@ -238,6 +255,32 @@ class Circuit: c.interface.append(n) return c + def __getstate__(self): + nodes = [(node.name, node.kind) for node in self.nodes] + lines = [(line.driver.index, line.driver_pin, line.reader.index, line.reader_pin) for line in self.lines] + interface = [n.index for n in self.interface] + return {'name': self.name, + 'nodes': nodes, + 'lines': lines, + 'interface': interface } + + def __setstate__(self, state): + self.name = state['name'] + self.nodes = IndexList() + self.lines = IndexList() + self.interface = GrowingList() + self.cells = {} + self.forks = {} + for s in state['nodes']: + Node(self, *s) + for driver, driver_pin, reader, reader_pin in state['lines']: + Line(self, (self.nodes[driver], driver_pin), (self.nodes[reader], reader_pin)) + for n in state['interface']: + self.interface.append(self.nodes[n]) + + def __eq__(self, other): + return self.nodes == other.nodes and self.lines == other.lines and self.interface == other.interface + def dump(self): """Returns a string representation of the circuit and all its nodes. """ @@ -256,14 +299,14 @@ class Circuit: yielded first. """ visit_count = [0] * len(self.nodes) - queue = deque(n for n in self.nodes if len(n.ins) == 0 or 'DFF' in n.kind) + queue = deque(n for n in self.nodes if len(n.ins) == 0 or 'dff' in n.kind.lower()) while len(queue) > 0: n = queue.popleft() for line in n.outs: if line is None: continue succ = line.reader visit_count[succ] += 1 - if visit_count[succ] == len(succ.ins) and 'DFF' not in succ.kind: + if visit_count[succ] == len(succ.ins) and 'dff' not in succ.kind.lower(): queue.append(succ) yield n @@ -282,13 +325,13 @@ class Circuit: yielded first. """ visit_count = [0] * len(self.nodes) - queue = deque(n for n in self.nodes if len(n.outs) == 0 or 'DFF' in n.kind) + queue = deque(n for n in self.nodes if len(n.outs) == 0 or 'dff' in n.kind.lower()) while len(queue) > 0: n = queue.popleft() for line in n.ins: pred = line.driver visit_count[pred] += 1 - if visit_count[pred] == len(pred.outs) and 'DFF' not in pred.kind: + if visit_count[pred] == len(pred.outs) and 'dff' not in pred.kind.lower(): queue.append(pred) yield n @@ -310,21 +353,21 @@ class Circuit: def fanout_free_regions(self): for stem in self.reversed_topological_order(): - if len(stem.outs) == 1 and 'DFF' not in stem.kind: continue + if len(stem.outs) == 1 and 'dff' not in stem.kind.lower(): continue region = [] - if 'DFF' in stem.kind: + if 'dff' in stem.kind.lower(): n = stem.ins[0] - if len(n.driver.outs) == 1 and 'DFF' not in n.driver.kind: + if len(n.driver.outs) == 1 and 'dff' not in n.driver.kind.lower(): queue = deque([n.driver]) else: queue = deque() else: queue = deque(n.driver for n in stem.ins - if len(n.driver.outs) == 1 and 'DFF' not in n.driver.kind) + if len(n.driver.outs) == 1 and 'dff' not in n.driver.kind.lower()) while len(queue) > 0: n = queue.popleft() preds = [pred.driver for pred in n.ins - if len(pred.driver.outs) == 1 and 'DFF' not in pred.driver.kind] + if len(pred.driver.outs) == 1 and 'dff' not in pred.driver.kind.lower()] queue.extend(preds) region.append(n) yield stem, region diff --git a/src/kyupy/logic.py b/src/kyupy/logic.py index 7b0c149..e78357e 100644 --- a/src/kyupy/logic.py +++ b/src/kyupy/logic.py @@ -291,6 +291,23 @@ def mv_xor(x1, x2, out=None): return out +def mv_latch(d, t, q_prev, out=None): + """A latch that is transparent if `t` is high. `q_prev` has to be the output value from the previous clock cycle. + """ + m = mv_getm(d, t, q_prev) + d, t, q_prev = mv_cast(d, t, q_prev, m=m) + out = out or MVArray(np.broadcast(d.data, t.data, q_prev).shape, m=m) + out.data[...] = t.data & d.data & 0b011 + out.data[...] |= ~t.data & 0b010 & (q_prev.data << 1) + out.data[...] |= ~t.data & 0b001 & (out.data >> 1) + out.data[...] |= ((out.data << 1) ^ (out.data << 2)) & 0b100 + unknown = (t.data == UNKNOWN) \ + | (t.data == UNASSIGNED) \ + | (((d.data == UNKNOWN) | (d.data == UNASSIGNED)) & (t.data != ZERO)) + np.putmask(out.data, unknown, UNKNOWN) + return out + + def mv_transition(init, final, out=None): """Computes the logic transitions from the initial values of ``init`` to the final values of ``final``. Pulses in the input data are ignored. If any of the inputs are ``UNKNOWN``, the result is ``UNKNOWN``. @@ -460,3 +477,27 @@ def bp_xor(out, *ins): out[..., 0, :] |= any_unknown out[..., 1, :] &= ~any_unknown out[..., 2, :] &= ~any_unknown + + +def bp_latch(out, d, t, q_prev): + md = out.shape[-2] + assert md == d.shape[-2] + assert md == t.shape[-2] + assert md == q_prev.shape[-2] + if md == 1: + out[...] = (d & t) | (q_prev & ~t) + elif md == 2: + any_unknown = t[..., 0, :] ^ t[..., 1, :] + any_unknown |= (d[..., 0, :] ^ d[..., 1, :]) & (t[..., 0, :] | t[..., 1, :]) + out[...] = (d & t) | (q_prev & ~t) + out[..., 0, :] |= any_unknown + out[..., 1, :] &= ~any_unknown + else: + any_unknown = (t[..., 0, :] ^ t[..., 1, :]) & ~t[..., 2, :] + any_unknown |= ((d[..., 0, :] ^ d[..., 1, :]) & ~d[..., 2, :]) & (t[..., 0, :] | t[..., 1, :] | t[..., 2, :]) + out[..., 1, :] = (d[..., 1, :] & t[..., 1, :]) | (q_prev[..., 0, :] & ~t[..., 1, :]) + out[..., 0, :] = (d[..., 0, :] & t[..., 0, :]) | (out[..., 1, :] & ~t[..., 0, :]) + out[..., 2, :] = out[..., 1, :] ^ out[..., 0, :] + out[..., 0, :] |= any_unknown + out[..., 1, :] &= ~any_unknown + out[..., 2, :] &= ~any_unknown diff --git a/src/kyupy/logic_sim.py b/src/kyupy/logic_sim.py index 92641f2..29be2b6 100644 --- a/src/kyupy/logic_sim.py +++ b/src/kyupy/logic_sim.py @@ -30,15 +30,22 @@ class LogicSim: self.circuit = circuit self.sims = sims nbytes = (sims - 1) // 8 + 1 - self.interface = list(circuit.interface) + [n for n in circuit.nodes if 'dff' in n.kind.lower()] + dffs = [n for n in circuit.nodes if 'dff' in n.kind.lower()] + latches = [n for n in circuit.nodes if 'latch' in n.kind.lower()] + self.interface = list(circuit.interface) + dffs + latches + self.width = len(self.interface) """The number of bits in the circuit state (number of ports + number of state-elements).""" + self.state = np.zeros((len(circuit.lines), mdim, nbytes), dtype='uint8') self.state_epoch = np.zeros(len(circuit.nodes), dtype='int8') - 1 self.tmp = np.zeros((5, mdim, nbytes), dtype='uint8') self.zero = np.zeros((mdim, nbytes), dtype='uint8') self.epoch = 0 + self.latch_dict = dict((n.index, i) for i, n in enumerate(latches)) + self.latch_state = np.zeros((len(latches), mdim, nbytes), dtype='uint8') + known_fct = [(f[:-4], getattr(self, f)) for f in dir(self) if f.endswith('_fct')] self.node_fct = [] for n in circuit.nodes: @@ -69,8 +76,11 @@ class LogicSim: """ for node, stim in zip(self.interface, stimuli.data if hasattr(stimuli, 'data') else stimuli): if len(node.outs) == 0: continue - outputs = [self.state[line] if line else self.tmp[3] for line in node.outs] - self.node_fct[node]([stim], outputs) + if node.index in self.latch_dict: + self.latch_state[self.latch_dict[node.index]] = stim + else: + outputs = [self.state[line] if line else self.tmp[3] for line in node.outs] + self.node_fct[node]([stim], outputs) for line in node.outs: if line is not None: self.state_epoch[line.reader] = self.epoch for n in self.circuit.nodes: @@ -83,13 +93,29 @@ class LogicSim: def capture(self, responses): """Capture the current values at the primary outputs and in the state-elements (flip-flops). + For primary outputs, the logic value is stored unmodified in the given target array. + For flip-flops, the logic value is constructed from the previous state and the new state. :param responses: A bit-parallel storage target for the responses in a compatible shape. :type responses: :py:class:`~kyupy.logic.BPArray` :returns: The given responses object. """ for node, resp in zip(self.interface, responses.data if hasattr(responses, 'data') else responses): - if len(node.ins) > 0: resp[...] = self.state[node.ins[0]] + if len(node.ins) == 0: continue + if node.index in self.latch_dict: + resp[...] = self.state[node.outs[0]] + else: + resp[...] = self.state[node.ins[0]] + # FIXME: unclear why we should use outs for DFFs + #if self.m > 2 and 'dff' in node.kind.lower() and len(node.outs) > 0: + # if node.outs[0] is None: + # resp[1, :] = ~self.state[node.outs[1], 0, :] # assume QN is connected, take inverse of that. + # else: + # resp[1, :] = self.state[node.outs[0], 0, :] + # if self.m > 4: + # resp[..., 2, :] = resp[..., 0, :] ^ resp[..., 1, :] + # # We don't handle X or - correctly. + return responses def propagate(self, inject_cb=None): @@ -116,7 +142,8 @@ class LogicSim: if self.state_epoch[node] != self.epoch: continue inputs = [self.state[line] if line else self.zero for line in node.ins] outputs = [self.state[line] if line else self.tmp[3] for line in node.outs] - # print('sim', node) + if node.index in self.latch_dict: + inputs.append(self.latch_state[self.latch_dict[node.index]]) self.node_fct[node](inputs, outputs) for line in node.outs: if inject_cb is not None: inject_cb(line, self.state[line]) @@ -137,59 +164,57 @@ class LogicSim: self.propagate(inject_cb) return self.capture(state) - @staticmethod - def fork_fct(inputs, outputs): + def fork_fct(self, inputs, outputs): for o in outputs: o[...] = inputs[0] - @staticmethod - def const0_fct(_, outputs): + def const0_fct(self, _, outputs): for o in outputs: o[...] = 0 - @staticmethod - def const1_fct(_, outputs): + def const1_fct(self, _, outputs): for o in outputs: o[...] = 0 logic.bp_not(o, o) - @staticmethod - def not_fct(inputs, outputs): + def not_fct(self, inputs, outputs): logic.bp_not(outputs[0], inputs[0]) - @staticmethod - def and_fct(inputs, outputs): + def and_fct(self, inputs, outputs): logic.bp_and(outputs[0], *inputs) - @staticmethod - def or_fct(inputs, outputs): + def or_fct(self, inputs, outputs): logic.bp_or(outputs[0], *inputs) - @staticmethod - def xor_fct(inputs, outputs): + def xor_fct(self, inputs, outputs): logic.bp_xor(outputs[0], *inputs) - @staticmethod - def sdff_fct(inputs, outputs): + def sdff_fct(self, inputs, outputs): logic.bp_buf(outputs[0], inputs[0]) if len(outputs) > 1: logic.bp_not(outputs[1], inputs[0]) - @staticmethod - def dff_fct(inputs, outputs): + def dff_fct(self, inputs, outputs): logic.bp_buf(outputs[0], inputs[0]) if len(outputs) > 1: logic.bp_not(outputs[1], inputs[0]) - @staticmethod - def nand_fct(inputs, outputs): + def latch_fct(self, inputs, outputs): + logic.bp_latch(outputs[0], inputs[0], inputs[1], inputs[2]) + if len(outputs) > 1: + logic.bp_not(outputs[1], inputs[0]) + + def nand_fct(self, inputs, outputs): logic.bp_and(outputs[0], *inputs) logic.bp_not(outputs[0], outputs[0]) - @staticmethod - def nor_fct(inputs, outputs): + def nor_fct(self, inputs, outputs): logic.bp_or(outputs[0], *inputs) logic.bp_not(outputs[0], outputs[0]) - @staticmethod - def xnor_fct(inputs, outputs): + def xnor_fct(self, inputs, outputs): logic.bp_xor(outputs[0], *inputs) logic.bp_not(outputs[0], outputs[0]) + + def aoi21_fct(self, inputs, outputs): + logic.bp_and(self.tmp[0], inputs[0], inputs[1]) + logic.bp_or(outputs[0], self.tmp[0], inputs[2]) + logic.bp_not(outputs[0], outputs[0]) \ No newline at end of file diff --git a/src/kyupy/sdf.py b/src/kyupy/sdf.py index 78715f7..c1e3ebf 100644 --- a/src/kyupy/sdf.py +++ b/src/kyupy/sdf.py @@ -92,7 +92,7 @@ class DelayFile: continue cell = find_cell(cn) if cell is None: - log.warn(f'Cell from SDF not found in circuit: {cn}') + #log.warn(f'Cell from SDF not found in circuit: {cn}') continue ipn = re.sub(r'\((neg|pos)edge ([^)]+)\)', r'\2', ipn) ipin = tlib.pin_index(cell.kind, ipn) @@ -111,12 +111,15 @@ class DelayFile: if ffdelays and (len(cell.outs) > opin): add_delays(cell.outs[opin]) else: - if kind.startswith(('xor', 'xnor')): - # print(ipn, ipin, times[cell.i_lines[ipin], 0, 0]) - take_avg = timing[cell.ins[ipin]].sum() > 0 - add_delays(cell.ins[ipin]) - if take_avg: - timing[cell.ins[ipin]] /= 2 + if ipin < len(cell.ins): + if kind.startswith(('xor', 'xnor')): + # print(ipn, ipin, times[cell.i_lines[ipin], 0, 0]) + take_avg = timing[cell.ins[ipin]].sum() > 0 + add_delays(cell.ins[ipin]) + if take_avg: + timing[cell.ins[ipin]] /= 2 + else: + log.warn(f'No line to annotate pin {ipn} of {cell}') if not interconnect or self.interconnects is None: return timing @@ -139,14 +142,17 @@ class DelayFile: cn2, pn2 = (n2, 'IN') c1 = find_cell(cn1) if c1 is None: - log.warn(f'Cell from SDF not found in circuit: {cn1}') + #log.warn(f'Cell from SDF not found in circuit: {cn1}') continue c2 = find_cell(cn2) if c2 is None: - log.warn(f'Cell from SDF not found in circuit: {cn2}') + #log.warn(f'Cell from SDF not found in circuit: {cn2}') continue p1, p2 = tlib.pin_index(c1.kind, pn1), tlib.pin_index(c2.kind, pn2) line = None + if len(c2.ins) <= p2: + log.warn(f'No line to annotate pin {pn2} of {c2}') + continue f1, f2 = c1.outs[p1].reader, c2.ins[p2].driver if f1 != f2: # possible branchfork assert len(f2.ins) == 1 diff --git a/src/kyupy/stil.py b/src/kyupy/stil.py index c0c789d..7cacc5b 100644 --- a/src/kyupy/stil.py +++ b/src/kyupy/stil.py @@ -40,19 +40,19 @@ class StilFile: unload = {} for so_port in self.so_ports: if so_port in call.parameters: - unload[so_port] = call.parameters[so_port].replace('\n', '') - if len(launch) > 0: + unload[so_port] = call.parameters[so_port].replace('\n', '').replace('N', '-') + if len(capture) > 0: self.patterns.append(ScanPattern(sload, launch, capture, unload)) capture = {} launch = {} sload = {} for si_port in self.si_ports: if si_port in call.parameters: - sload[si_port] = call.parameters[si_port].replace('\n', '') + sload[si_port] = call.parameters[si_port].replace('\n', '').replace('N', '-') if call.name.endswith('_launch'): - launch = dict((k, v.replace('\n', '')) for k, v in call.parameters.items()) + launch = dict((k, v.replace('\n', '').replace('N', '-')) for k, v in call.parameters.items()) if call.name.endswith('_capture'): - capture = dict((k, v.replace('\n', '')) for k, v in call.parameters.items()) + capture = dict((k, v.replace('\n', '').replace('N', '-')) for k, v in call.parameters.items()) def _maps(self, c): interface = list(c.interface) + [n for n in c.nodes if 'DFF' in n.kind] @@ -96,15 +96,15 @@ class StilFile: for si_port in self.si_ports.keys(): pattern = logic.mv_xor(p.load[si_port], scan_inversions[si_port]) tests.data[scan_maps[si_port], i] = pattern.data[:, 0] - tests.data[pi_map, i] = logic.MVArray(p.launch['_pi']).data[:, 0] + tests.data[pi_map, i] = logic.MVArray(p.capture['_pi']).data[:, 0] return tests def tests_loc(self, circuit): """Assembles and returns a LoC scan test pattern set for given circuit. This function assumes a launch-on-capture (LoC) delay test. - It performs a logic simulation to obtain the first capture pattern (the one that launches the - delay test) and assembles the test pattern set from from pairs for initialization- and launch-patterns. + It performs a logic simulation to obtain the first capture pattern (the one that launches the delay + test) and assembles the test pattern set from from pairs for initialization- and launch-patterns. """ interface, pi_map, po_map, scan_maps, scan_inversions = self._maps(circuit) init = logic.MVArray((len(interface), len(self.patterns)), m=4) @@ -114,7 +114,7 @@ class StilFile: for si_port in self.si_ports.keys(): pattern = logic.mv_xor(p.load[si_port], scan_inversions[si_port]) init.data[scan_maps[si_port], i] = pattern.data[:, 0] - init.data[pi_map, i] = logic.MVArray(p.launch['_pi']).data[:, 0] + init.data[pi_map, i] = logic.MVArray(p.launch['_pi'] if '_pi' in p.launch else p.capture['_pi']).data[:, 0] launch_bp = logic.BPArray(init) sim4v = LogicSim(circuit, len(init), m=4) sim4v.assign(launch_bp) @@ -122,8 +122,8 @@ class StilFile: sim4v.capture(launch_bp) launch = logic.MVArray(launch_bp) for i, p in enumerate(self.patterns): - # if there was no launch clock, then init = launch - if ('P' not in p.launch['_pi']) or ('P' not in p.capture['_pi']): + # if there was no launch cycle or launch clock, then init = launch + if '_pi' not in p.launch or 'P' not in p.launch['_pi'] or 'P' not in p.capture['_pi']: for si_port in self.si_ports.keys(): pattern = logic.mv_xor(p.load[si_port], scan_inversions[si_port]) launch.data[scan_maps[si_port], i] = pattern.data[:, 0] diff --git a/src/kyupy/techlib.py b/src/kyupy/techlib.py index e4c4955..21c82a6 100644 --- a/src/kyupy/techlib.py +++ b/src/kyupy/techlib.py @@ -30,22 +30,24 @@ class TechLib: def pin_index(kind, pin): """Returns a pin list position for a given node kind and pin name.""" if kind[:3] in ('OAI', 'AOI'): - if pin[0] == 'A': return int(pin[1]) - if pin[0] == 'B': return int(pin[1]) + int(kind[4]) + if pin[0] == 'A': return int(pin[1]) - 1 + if pin[0] == 'B': return int(pin[1]) + int(kind[4]) - 1 for prefix, pins, index in [('HADD', ('B0', 'SO'), 1), - ('MUX21', ('S',), 2), + ('MUX21', ('S', 'S0'), 2), ('MX2', ('S0',), 2), ('TBUF', ('OE',), 1), ('TINV', ('OE',), 1), - ('DFF', ('QN',), 1), + ('LATCH', ('D',), 0), + ('LATCH', ('QN',), 1), ('DFF', ('D',), 0), + ('DFF', ('QN',), 1), ('SDFF', ('D',), 0), ('SDFF', ('QN',), 1), ('SDFF', ('CLK',), 3), - ('SDFF', ('RSTB',), 4), + ('SDFF', ('RSTB', 'RN'), 4), ('SDFF', ('SETB',), 5)]: if kind.startswith(prefix) and pin in pins: return index - for index, pins in enumerate([('A1', 'IN1', 'A', 'S', 'INP', 'Q', 'QN', 'Y', 'Z', 'ZN'), + for index, pins in enumerate([('A1', 'IN1', 'A', 'S', 'INP', 'I', 'Q', 'QN', 'Y', 'Z', 'ZN'), ('A2', 'IN2', 'B', 'CK', 'CLK', 'CO', 'SE'), ('A3', 'IN3', 'C', 'RN', 'RSTB', 'CI', 'SI'), ('A4', 'IN4', 'D', 'SN', 'SETB'), diff --git a/src/kyupy/wave_sim.py b/src/kyupy/wave_sim.py index bd04f10..763e39f 100644 --- a/src/kyupy/wave_sim.py +++ b/src/kyupy/wave_sim.py @@ -122,6 +122,9 @@ class WaveSim: self.lst_eat_valid = False self.cdata = np.zeros((len(self.interface), sims, 7), dtype='float32') + + self.sdata = np.zeros((sims, 4), dtype='float32') + self.sdata[...,0] = 1.0 if isinstance(wavecaps, int): wavecaps = [wavecaps] * len(circuit.lines) @@ -158,7 +161,8 @@ class WaveSim: if kind == '__fork__': if not strip_forks: for o_line in n.outs: - ops.append((0b1010, o_line.index, i0_idx, i1_idx)) + if o_line is not None: + ops.append((0b1010, o_line.index, i0_idx, i1_idx)) elif kind.startswith('nand'): ops.append((0b0111, o0_idx, i0_idx, i1_idx)) elif kind.startswith('nor'): @@ -328,7 +332,7 @@ class WaveSim: sims = min(sims or self.sims, self.sims) for op_start, op_stop in zip(self.level_starts, self.level_stops): self.overflows += level_eval(self.ops, op_start, op_stop, self.state, self.sat, 0, sims, - self.timing, sd, seed) + self.timing, self.sdata, sd, seed) self.lst_eat_valid = False def wave(self, line, vector): @@ -521,12 +525,12 @@ class WaveSim: @numba.njit -def level_eval(ops, op_start, op_stop, state, sat, st_start, st_stop, line_times, sd, seed): +def level_eval(ops, op_start, op_stop, state, sat, st_start, st_stop, line_times, sdata, sd, seed): overflows = 0 for op_idx in range(op_start, op_stop): op = ops[op_idx] for st_idx in range(st_start, st_stop): - overflows += wave_eval(op, state, sat, st_idx, line_times, sd, seed) + overflows += wave_eval(op, state, sat, st_idx, line_times, sdata[st_idx], sd, seed) return overflows @@ -547,7 +551,7 @@ def rand_gauss(seed, sd): @numba.njit -def wave_eval(op, state, sat, st_idx, line_times, sd=0.0, seed=0): +def wave_eval(op, state, sat, st_idx, line_times, sdata, sd=0.0, seed=0): lut, z_idx, a_idx, b_idx = op overflows = int(0) @@ -563,9 +567,11 @@ def wave_eval(op, state, sat, st_idx, line_times, sd=0.0, seed=0): if z_cur == 1: state[z_mem, st_idx] = TMIN - a = state[a_mem, st_idx] + line_times[a_idx, 0, z_cur] * rand_gauss(_seed ^ a_mem ^ z_cur, sd) - b = state[b_mem, st_idx] + line_times[b_idx, 0, z_cur] * rand_gauss(_seed ^ b_mem ^ z_cur, sd) - + a = state[a_mem, st_idx] + line_times[a_idx, 0, z_cur] * rand_gauss(_seed ^ a_mem ^ z_cur, sd) * sdata[0] + if int(sdata[1]) == a_idx: a += sdata[2+z_cur] + b = state[b_mem, st_idx] + line_times[b_idx, 0, z_cur] * rand_gauss(_seed ^ b_mem ^ z_cur, sd) * sdata[0] + if int(sdata[1]) == b_idx: b += sdata[2+z_cur] + previous_t = TMIN current_t = min(a, b) @@ -576,15 +582,21 @@ def wave_eval(op, state, sat, st_idx, line_times, sd=0.0, seed=0): if b < a: b_cur += 1 b = state[b_mem + b_cur, st_idx] - b += line_times[b_idx, 0, z_val ^ 1] * rand_gauss(_seed ^ b_mem ^ z_val ^ 1, sd) - thresh = line_times[b_idx, 1, z_val] * rand_gauss(_seed ^ b_mem ^ z_val, sd) + b += line_times[b_idx, 0, z_val ^ 1] * rand_gauss(_seed ^ b_mem ^ z_val ^ 1, sd) * sdata[0] + thresh = line_times[b_idx, 1, z_val] * rand_gauss(_seed ^ b_mem ^ z_val, sd) * sdata[0] + if int(sdata[1]) == b_idx: + b += sdata[2+(z_val^1)] + thresh += sdata[2+z_val] inputs ^= 2 next_t = b else: a_cur += 1 a = state[a_mem + a_cur, st_idx] - a += line_times[a_idx, 0, z_val ^ 1] * rand_gauss(_seed ^ a_mem ^ z_val ^ 1, sd) - thresh = line_times[a_idx, 1, z_val] * rand_gauss(_seed ^ a_mem ^ z_val, sd) + a += line_times[a_idx, 0, z_val ^ 1] * rand_gauss(_seed ^ a_mem ^ z_val ^ 1, sd) * sdata[0] + thresh = line_times[a_idx, 1, z_val] * rand_gauss(_seed ^ a_mem ^ z_val, sd) * sdata[0] + if int(sdata[1]) == a_idx: + a += sdata[2+(z_val^1)] + thresh += sdata[2+z_val] inputs ^= 1 next_t = a @@ -618,6 +630,7 @@ def wave_eval(op, state, sat, st_idx, line_times, sd=0.0, seed=0): return overflows + class WaveSimCuda(WaveSim): """A GPU-accelerated waveform-based combinational logic timing simulator. @@ -636,6 +649,7 @@ class WaveSimCuda(WaveSim): self.d_timing = cuda.to_device(self.timing) self.d_tdata = cuda.to_device(self.tdata) self.d_cdata = cuda.to_device(self.cdata) + self.d_sdata = cuda.to_device(self.sdata) self._block_dim = (32, 16) @@ -650,6 +664,9 @@ class WaveSimCuda(WaveSim): def set_line_delay(self, line, polarity, delay): self.d_timing[line, 0, polarity] = delay + + def sdata_to_device(self): + cuda.to_device(self.sdata, to=self.d_sdata) def assign(self, vectors, time=0.0, offset=0): assert (offset % 8) == 0 @@ -676,7 +693,7 @@ class WaveSimCuda(WaveSim): for op_start, op_stop in zip(self.level_starts, self.level_stops): grid_dim = self._grid_dim(sims, op_stop - op_start) wave_kernel[grid_dim, self._block_dim](self.d_ops, op_start, op_stop, self.d_state, self.sat, int(0), - sims, self.d_timing, sd, seed) + sims, self.d_timing, self.d_sdata, sd, seed) cuda.synchronize() self.lst_eat_valid = False @@ -858,7 +875,7 @@ def rand_gauss_dev(seed, sd): @cuda.jit() -def wave_kernel(ops, op_start, op_stop, state, sat, st_start, st_stop, line_times, sd, seed): +def wave_kernel(ops, op_start, op_stop, state, sat, st_start, st_stop, line_times, sdata, sd, seed): x, y = cuda.grid(2) st_idx = st_start + x op_idx = op_start + y @@ -869,6 +886,7 @@ def wave_kernel(ops, op_start, op_stop, state, sat, st_start, st_stop, line_time a_idx = ops[op_idx, 2] b_idx = ops[op_idx, 3] overflows = int(0) + sdata = sdata[st_idx] _seed = (seed << 4) + (z_idx << 20) + (st_idx << 1) @@ -882,9 +900,11 @@ def wave_kernel(ops, op_start, op_stop, state, sat, st_start, st_stop, line_time if z_cur == 1: state[z_mem, st_idx] = TMIN - a = state[a_mem, st_idx] + line_times[a_idx, 0, z_cur] * rand_gauss_dev(_seed ^ a_mem ^ z_cur, sd) - b = state[b_mem, st_idx] + line_times[b_idx, 0, z_cur] * rand_gauss_dev(_seed ^ b_mem ^ z_cur, sd) - + a = state[a_mem, st_idx] + line_times[a_idx, 0, z_cur] * rand_gauss_dev(_seed ^ a_mem ^ z_cur, sd) * sdata[0] + if int(sdata[1]) == a_idx: a += sdata[2+z_cur] + b = state[b_mem, st_idx] + line_times[b_idx, 0, z_cur] * rand_gauss_dev(_seed ^ b_mem ^ z_cur, sd) * sdata[0] + if int(sdata[1]) == b_idx: b += sdata[2+z_cur] + previous_t = TMIN current_t = min(a, b) @@ -895,15 +915,21 @@ def wave_kernel(ops, op_start, op_stop, state, sat, st_start, st_stop, line_time if b < a: b_cur += 1 b = state[b_mem + b_cur, st_idx] - b += line_times[b_idx, 0, z_val ^ 1] * rand_gauss_dev(_seed ^ b_mem ^ z_val ^ 1, sd) - thresh = line_times[b_idx, 1, z_val] * rand_gauss_dev(_seed ^ b_mem ^ z_val, sd) + b += line_times[b_idx, 0, z_val ^ 1] * rand_gauss_dev(_seed ^ b_mem ^ z_val ^ 1, sd) * sdata[0] + thresh = line_times[b_idx, 1, z_val] * rand_gauss_dev(_seed ^ b_mem ^ z_val, sd) * sdata[0] + if int(sdata[1]) == b_idx: + b += sdata[2+(z_val^1)] + thresh += sdata[2+z_val] inputs ^= 2 next_t = b else: a_cur += 1 a = state[a_mem + a_cur, st_idx] - a += line_times[a_idx, 0, z_val ^ 1] * rand_gauss_dev(_seed ^ a_mem ^ z_val ^ 1, sd) - thresh = line_times[a_idx, 1, z_val] * rand_gauss_dev(_seed ^ a_mem ^ z_val, sd) + a += line_times[a_idx, 0, z_val ^ 1] * rand_gauss_dev(_seed ^ a_mem ^ z_val ^ 1, sd) * sdata[0] + thresh = line_times[a_idx, 1, z_val] * rand_gauss_dev(_seed ^ a_mem ^ z_val, sd) * sdata[0] + if int(sdata[1]) == a_idx: + a += sdata[2+(z_val^1)] + thresh += sdata[2+z_val] inputs ^= 1 next_t = a diff --git a/tests/test_circuit.py b/tests/test_circuit.py index b5d6055..446ba90 100644 --- a/tests/test_circuit.py +++ b/tests/test_circuit.py @@ -1,5 +1,7 @@ -from kyupy.circuit import Circuit, Node, Line +import pickle +from kyupy.circuit import Circuit, Node, Line +from kyupy import verilog def test_lines(): c = Circuit() @@ -99,3 +101,12 @@ def test_circuit(): for n in c.topological_order(): repr(n) + + +def test_pickle(mydir): + c = verilog.load(mydir / 'b14.v.gz') + assert c is not None + cs = pickle.dumps(c) + assert cs is not None + c2 = pickle.loads(cs) + assert c == c2 diff --git a/tests/test_logic.py b/tests/test_logic.py index 8fb933a..27b61ae 100644 --- a/tests/test_logic.py +++ b/tests/test_logic.py @@ -145,6 +145,20 @@ def test_mv_operations(): assert lg.mv_xor(x1_4v, x2_4v)[0] == '0XX1XXXXXXXX1XX0' assert lg.mv_xor(x1_8v, x2_8v)[0] == '0XX1PRFNXXXXXXXXXXXXXXXX1XX0NFRPPXXNPRFNRXXFRPNFFXXRFNPRNXXPNFRP' + x30_2v = lg.MVArray("0000", m=2) + x31_2v = lg.MVArray("1111", m=2) + x30_4v = lg.MVArray("0000000000000000", m=4) + x31_4v = lg.MVArray("1111111111111111", m=4) + x30_8v = lg.MVArray("0000000000000000000000000000000000000000000000000000000000000000", m=8) + x31_8v = lg.MVArray("1111111111111111111111111111111111111111111111111111111111111111", m=8) + + assert lg.mv_latch(x1_2v, x2_2v, x30_2v)[0] == '0001' + assert lg.mv_latch(x1_2v, x2_2v, x31_2v)[0] == '1011' + assert lg.mv_latch(x1_4v, x2_4v, x30_4v)[0] == '0XX00XXX0XXX0XX1' + assert lg.mv_latch(x1_4v, x2_4v, x31_4v)[0] == '1XX01XXX1XXX1XX1' + assert lg.mv_latch(x1_8v, x2_8v, x30_8v)[0] == '0XX000000XXXXXXX0XXXXXXX0XX10R110XX000000XXR0R0R0XXF001F0XX10R11' + assert lg.mv_latch(x1_8v, x2_8v, x31_8v)[0] == '1XX01F001XXXXXXX1XXXXXXX1XX111111XX01F001XXR110R1XXF1F1F1XX11111' + def test_bparray(): @@ -212,3 +226,27 @@ def test_bparray(): assert lg.MVArray(out_2v)[0] == '0110' assert lg.MVArray(out_4v)[0] == '0XX1XXXXXXXX1XX0' assert lg.MVArray(out_8v)[0] == '0XX1PRFNXXXXXXXXXXXXXXXX1XX0NFRPPXXNPRFNRXXFRPNFFXXRFNPRNXXPNFRP' + + x30_2v = lg.BPArray("0000", m=2) + x30_4v = lg.BPArray("0000000000000000", m=4) + x30_8v = lg.BPArray("0000000000000000000000000000000000000000000000000000000000000000", m=8) + + lg.bp_latch(out_2v.data, x1_2v.data, x2_2v.data, x30_2v.data) + lg.bp_latch(out_4v.data, x1_4v.data, x2_4v.data, x30_4v.data) + lg.bp_latch(out_8v.data, x1_8v.data, x2_8v.data, x30_8v.data) + + assert lg.MVArray(out_2v)[0] == '0001' + assert lg.MVArray(out_4v)[0] == '0XX00XXX0XXX0XX1' + assert lg.MVArray(out_8v)[0] == '0XX000000XXXXXXX0XXXXXXX0XX10R110XX000000XXR0R0R0XXF001F0XX10R11' + + x31_2v = lg.BPArray("1111", m=2) + x31_4v = lg.BPArray("1111111111111111", m=4) + x31_8v = lg.BPArray("1111111111111111111111111111111111111111111111111111111111111111", m=8) + + lg.bp_latch(out_2v.data, x1_2v.data, x2_2v.data, x31_2v.data) + lg.bp_latch(out_4v.data, x1_4v.data, x2_4v.data, x31_4v.data) + lg.bp_latch(out_8v.data, x1_8v.data, x2_8v.data, x31_8v.data) + + assert lg.MVArray(out_2v)[0] == '1011' + assert lg.MVArray(out_4v)[0] == '1XX01XXX1XXX1XX1' + assert lg.MVArray(out_8v)[0] == '1XX01F001XXXXXXX1XXXXXXX1XX111111XX01F001XXR110R1XXF1F1F1XX11111' diff --git a/tests/test_logic_sim.py b/tests/test_logic_sim.py index 76edb95..b581cb6 100644 --- a/tests/test_logic_sim.py +++ b/tests/test_logic_sim.py @@ -73,6 +73,45 @@ def test_8v(): assert resp[i] == mva[i] +def test_loop(): + c = bench.parse('q=dff(d) d=not(q)') + s = LogicSim(c, 4, m=8) + assert len(s.interface) == 1 + mva = MVArray([['0'], ['1'], ['R'], ['F']], m=8) + + s.assign(BPArray(mva)) + s.propagate() + resp_bp = BPArray((len(s.interface), s.sims)) + s.capture(resp_bp) + resp = MVArray(resp_bp) + + assert resp[0] == '1' + assert resp[1] == '0' + assert resp[2] == 'F' + assert resp[3] == 'R' + + resp_bp = s.cycle(resp_bp) + resp = MVArray(resp_bp) + + assert resp[0] == '0' + assert resp[1] == '1' + assert resp[2] == 'R' + assert resp[3] == 'F' + + +def test_latch(): + c = bench.parse('input(d, t) output(q) q=latch(d, t)') + s = LogicSim(c, 8, m=8) + assert len(s.interface) == 4 + mva = MVArray(['00-0', '00-1', '01-0', '01-1', '10-0', '10-1', '11-0', '11-1'], m=8) + exp = MVArray(['0000', '0011', '0100', '0100', '1000', '1011', '1111', '1111'], m=8) + + resp = MVArray(s.cycle(BPArray(mva))) + + for i in range(len(mva)): + assert resp[i] == exp[i] + + def test_b01(mydir): c = bench.load(mydir / 'b01.bench') diff --git a/tests/test_stil.py b/tests/test_stil.py index 63f19e4..3bb0182 100644 --- a/tests/test_stil.py +++ b/tests/test_stil.py @@ -1,8 +1,21 @@ -from kyupy import stil +from kyupy import stil, verilog def test_b14(mydir): + b14 = verilog.load(mydir / 'b14.v.gz') + s = stil.load(mydir / 'b14.stuck.stil.gz') assert len(s.signal_groups) == 10 assert len(s.scan_chains) == 1 assert len(s.calls) == 2163 + tests = s.tests(b14) + resp = s.responses(b14) + assert len(tests) > 0 + assert len(resp) > 0 + + s2 = stil.load(mydir / 'b14.transition.stil.gz') + tests = s2.tests_loc(b14) + resp = s2.responses(b14) + assert len(tests) > 0 + assert len(resp) > 0 + diff --git a/tests/test_wave_sim.py b/tests/test_wave_sim.py index 8ddb94d..724a415 100644 --- a/tests/test_wave_sim.py +++ b/tests/test_wave_sim.py @@ -29,20 +29,22 @@ def test_wave_eval(): sat[1] = 16, 16, 0 sat[2] = 32, 16, 0 - wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times) + sdata = np.asarray([1, -1, 0, 0], dtype='float32') + + wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times, sdata) assert z[0] == TMIN a[0] = TMIN - wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times) + wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times, sdata) assert z[0] == TMIN b[0] = TMIN - wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times) + wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times, sdata) assert z[0] == TMAX a[0] = 1 # A _/^^^ b[0] = 2 # B __/^^ - wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times) + wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times, sdata) assert z[0] == TMIN # ^^^\___ B -> Z fall delay assert z[1] == 2.4 assert z[2] == TMAX @@ -50,7 +52,7 @@ def test_wave_eval(): a[0] = TMIN # A ^^^^^^ b[0] = TMIN # B ^^^\__ b[1] = 2 - wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times) + wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times, sdata) assert z[0] == 2.3 # ___/^^^ B -> Z rise delay assert z[1] == TMAX @@ -59,7 +61,7 @@ def test_wave_eval(): b[0] = TMIN b[1] = 2 # B ^^\__/^^ b[2] = 2.35 - wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times) + wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times, sdata) assert z[0] == 2.3 # __/^^\__ assert z[1] == 2.75 assert z[2] == TMAX @@ -69,7 +71,7 @@ def test_wave_eval(): b[0] = 2 # B __/^^\__ b[1] = 2.45 b[2] = TMAX - wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times) + wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times, sdata) assert z[0] == TMIN # ^^\__/^^ assert z[1] == 2.4 assert z[2] == 2.75 @@ -80,7 +82,7 @@ def test_wave_eval(): b[0] = 2 # B __/^^\__ b[1] = 2.35 b[2] = TMAX - wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times) + wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times, sdata) assert z[0] == TMIN # ^^^^^^ assert z[1] == TMAX @@ -89,7 +91,7 @@ def test_wave_eval(): b[0] = TMIN b[1] = 2 # B ^^\__/^^ b[2] = 2.25 - wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times) + wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times, sdata) assert z[0] == TMAX # ______