diff --git a/src/kyupy/__init__.py b/src/kyupy/__init__.py index 897e832..9be6485 100644 --- a/src/kyupy/__init__.py +++ b/src/kyupy/__init__.py @@ -76,6 +76,11 @@ def hr_time(seconds): return s +def batchrange(nitems, maxsize): + for offset in range(0, nitems, maxsize): + yield offset, min(nitems-offset, maxsize) + + class Timer: def __init__(self, s=0): self.s = s def __enter__(self): self.start_time = time.perf_counter(); return self diff --git a/src/kyupy/logic_sim.py b/src/kyupy/logic_sim.py index c1f9d45..422cc91 100644 --- a/src/kyupy/logic_sim.py +++ b/src/kyupy/logic_sim.py @@ -11,10 +11,8 @@ import math import numpy as np from . import numba, logic, hr_bytes, sim -from .sim import SimOps, SimPrim - -class LogicSim(SimOps): +class LogicSim(sim.SimOps): """A bit-parallel naïve combinational simulator for 2-, 4-, or 8-valued logic. :param circuit: The circuit to simulate. @@ -36,17 +34,17 @@ class LogicSim(SimOps): self.s = np.zeros((2, self.s_len, 3, nbytes), dtype=np.uint8) self.s[:,:,1,:] = 255 # unassigned - self.pi_s_locs = np.flatnonzero(self.vat[self.ppi_offset+np.arange(len(self.circuit.io_nodes)), 0] >= 0) - self.po_s_locs = np.flatnonzero(self.vat[self.ppo_offset+np.arange(len(self.circuit.io_nodes)), 0] >= 0) + self.pi_s_locs = np.flatnonzero(self.c_locs[self.ppi_offset+np.arange(len(self.circuit.io_nodes))] >= 0) + self.po_s_locs = np.flatnonzero(self.c_locs[self.ppo_offset+np.arange(len(self.circuit.io_nodes))] >= 0) self.ppio_s_locs = np.arange(len(self.circuit.io_nodes), len(self.s_nodes)) self.pippi_s_locs = np.concatenate([self.pi_s_locs, self.ppio_s_locs]) self.poppo_s_locs = np.concatenate([self.po_s_locs, self.ppio_s_locs]) - self.pi_c_locs = self.vat[self.ppi_offset+self.pi_s_locs, 0] - self.po_c_locs = self.vat[self.ppo_offset+self.po_s_locs, 0] - self.ppi_c_locs = self.vat[self.ppi_offset+self.ppio_s_locs, 0] - self.ppo_c_locs = self.vat[self.ppo_offset+self.ppio_s_locs, 0] + self.pi_c_locs = self.c_locs[self.ppi_offset+self.pi_s_locs] + self.po_c_locs = self.c_locs[self.ppo_offset+self.po_s_locs] + self.ppi_c_locs = self.c_locs[self.ppi_offset+self.ppio_s_locs] + self.ppo_c_locs = self.c_locs[self.ppo_offset+self.ppio_s_locs] self.pippi_c_locs = np.concatenate([self.pi_c_locs, self.ppi_c_locs]) self.poppo_c_locs = np.concatenate([self.po_c_locs, self.ppo_c_locs]) @@ -103,34 +101,34 @@ class LogicSim(SimOps): nbytes = (sims - 1) // 8 + 1 if self.m == 2: if inject_cb is None: - _prop_cpu(self.ops, self.vat, self.c[...,:nbytes]) + _prop_cpu(self.ops, self.c_locs, self.c[...,:nbytes]) else: for op, o0, i0, i1, i2, i3 in self.ops: - o0, i0, i1, i2, i3 = [self.vat[x,0] for x in (o0, i0, i1, i2, i3)] - if op == SimPrim.BUF1: self.c[o0]=self.c[i0] - elif op == SimPrim.INV1: self.c[o0] = ~self.c[i0] - elif op == SimPrim.AND2: self.c[o0] = self.c[i0] & self.c[i1] - elif op == SimPrim.NAND2: self.c[o0] = ~(self.c[i0] & self.c[i1]) - elif op == SimPrim.OR2: self.c[o0] = self.c[i0] | self.c[i1] - elif op == SimPrim.NOR2: self.c[o0] = ~(self.c[i0] | self.c[i1]) - elif op == SimPrim.XOR2: self.c[o0] = self.c[i0] ^ self.c[i1] - elif op == SimPrim.XNOR2: self.c[o0] = ~(self.c[i0] ^ self.c[i1]) - else: print(f'unknown SimPrim {op}') + o0, i0, i1, i2, i3 = [self.c_locs[x] for x in (o0, i0, i1, i2, i3)] + if op == sim.BUF1: self.c[o0]=self.c[i0] + elif op == sim.INV1: self.c[o0] = ~self.c[i0] + elif op == sim.AND2: self.c[o0] = self.c[i0] & self.c[i1] + elif op == sim.NAND2: self.c[o0] = ~(self.c[i0] & self.c[i1]) + elif op == sim.OR2: self.c[o0] = self.c[i0] | self.c[i1] + elif op == sim.NOR2: self.c[o0] = ~(self.c[i0] | self.c[i1]) + elif op == sim.XOR2: self.c[o0] = self.c[i0] ^ self.c[i1] + elif op == sim.XNOR2: self.c[o0] = ~(self.c[i0] ^ self.c[i1]) + else: print(f'unknown sim {op}') inject_cb(o0, self.s[o0]) elif self.m == 4: pass else: for op, o0, i0, i1, i2, i3 in self.ops: - o0, i0, i1, i2, i3 = [self.vat[x,0] for x in (o0, i0, i1, i2, i3)] - if op == SimPrim.BUF1: self.c[o0]=self.c[i0] - elif op == SimPrim.INV1: logic.bp_not(self.c[o0], self.c[i0]) - elif op == SimPrim.AND2: logic.bp_and(self.c[o0], self.c[i0], self.c[i1]) - elif op == SimPrim.NAND2: logic.bp_and(self.c[o0], self.c[i0], self.c[i1]); logic.bp_not(self.c[o0], self.c[o0]) - elif op == SimPrim.OR2: logic.bp_or(self.c[o0], self.c[i0], self.c[i1]) - elif op == SimPrim.NOR2: logic.bp_or(self.c[o0], self.c[i0], self.c[i1]); logic.bp_not(self.c[o0], self.c[o0]) - elif op == SimPrim.XOR2: logic.bp_xor(self.c[o0], self.c[i0], self.c[i1]) - elif op == SimPrim.XNOR2: logic.bp_xor(self.c[o0], self.c[i0], self.c[i1]); logic.bp_not(self.c[o0], self.c[o0]) - else: print(f'unknown SimPrim {op}') + o0, i0, i1, i2, i3 = [self.c_locs[x] for x in (o0, i0, i1, i2, i3)] + if op == sim.BUF1: self.c[o0]=self.c[i0] + elif op == sim.INV1: logic.bp_not(self.c[o0], self.c[i0]) + elif op == sim.AND2: logic.bp_and(self.c[o0], self.c[i0], self.c[i1]) + elif op == sim.NAND2: logic.bp_and(self.c[o0], self.c[i0], self.c[i1]); logic.bp_not(self.c[o0], self.c[o0]) + elif op == sim.OR2: logic.bp_or(self.c[o0], self.c[i0], self.c[i1]) + elif op == sim.NOR2: logic.bp_or(self.c[o0], self.c[i0], self.c[i1]); logic.bp_not(self.c[o0], self.c[o0]) + elif op == sim.XOR2: logic.bp_xor(self.c[o0], self.c[i0], self.c[i1]) + elif op == sim.XNOR2: logic.bp_xor(self.c[o0], self.c[i0], self.c[i1]); logic.bp_not(self.c[o0], self.c[o0]) + else: print(f'unknown sim {op}') if inject_cb is not None: inject_cb(o0, self.s[o0]) def s_ppo_to_ppi(self): @@ -159,9 +157,9 @@ class LogicSim(SimOps): @numba.njit -def _prop_cpu(ops, vat, c): +def _prop_cpu(ops, c_locs, c): for op, o0, i0, i1, i2, i3 in ops: - o0, i0, i1, i2, i3 = [vat[x,0] for x in (o0, i0, i1, i2, i3)] + o0, i0, i1, i2, i3 = [c_locs[x] for x in (o0, i0, i1, i2, i3)] if op == sim.BUF1: c[o0]=c[i0] elif op == sim.INV1: c[o0] = ~c[i0] elif op == sim.AND2: c[o0] = c[i0] & c[i1] @@ -170,4 +168,4 @@ def _prop_cpu(ops, vat, c): elif op == sim.NOR2: c[o0] = ~(c[i0] | c[i1]) elif op == sim.XOR2: c[o0] = c[i0] ^ c[i1] elif op == sim.XNOR2: c[o0] = ~(c[i0] ^ c[i1]) - else: print(f'unknown SimPrim {op}') + else: print(f'unknown sim {op}') diff --git a/src/kyupy/sim.py b/src/kyupy/sim.py index f0a9116..29da8fc 100644 --- a/src/kyupy/sim.py +++ b/src/kyupy/sim.py @@ -4,117 +4,70 @@ from bisect import bisect, insort_left import numpy as np -BUF1 = 0b1010_1010_1010_1010 -INV1 = 0b0101_0101_0101_0101 - -NAND4 = 0b0111_1111_1111_1111 -NAND3 = 0b0111_1111_0111_1111 -NAND2 = 0b0111_0111_0111_0111 - -NOR4 = 0b0000_0000_0000_0001 -NOR3 = 0b0000_0001_0000_0001 -NOR2 = 0b0001_0001_0001_0001 - -AND4 = 0b1000_0000_0000_0000 -AND3 = 0b1000_0000_1000_0000 -AND2 = 0b1000_1000_1000_1000 - -OR4 = 0b1111_1111_1111_1110 -OR3 = 0b1111_1110_1111_1110 -OR2 = 0b1110_1110_1110_1110 - -XOR4 = 0b0110_1001_1001_0110 -XOR3 = 0b1001_0110_1001_0110 -XOR2 = 0b0110_0110_0110_0110 - -XNOR4 = 0b1001_0110_0110_1001 -XNOR3 = 0b0110_1001_0110_1001 -XNOR2 = 0b1001_1001_1001_1001 - -AO22 = 0b1111_1000_1000_1000 -AOI22 = 0b0000_0111_0111_0111 -AO21 = 0b1110_1010_1110_1010 -AOI21 = 0b0001_0101_0001_0101 -OA22 = 0b1110_1110_1110_0000 -OAI22 = 0b0001_0001_0001_1111 -OA21 = 0b1010_1000_1010_1000 -OAI21 = 0b0101_0111_0101_0111 -MUX21 = 0b1110_0100_1110_0100 - -class SimPrim: - BUF1 = 0b1010_1010_1010_1010 - INV1 = 0b0101_0101_0101_0101 - - NAND4 = 0b0111_1111_1111_1111 - NAND3 = 0b0111_1111_0111_1111 - NAND2 = 0b0111_0111_0111_0111 - - NOR4 = 0b0000_0000_0000_0001 - NOR3 = 0b0000_0001_0000_0001 - NOR2 = 0b0001_0001_0001_0001 - - AND4 = 0b1000_0000_0000_0000 - AND3 = 0b1000_0000_1000_0000 - AND2 = 0b1000_1000_1000_1000 - - OR4 = 0b1111_1111_1111_1110 - OR3 = 0b1111_1110_1111_1110 - OR2 = 0b1110_1110_1110_1110 - - XOR4 = 0b0110_1001_1001_0110 - XOR3 = 0b1001_0110_1001_0110 - XOR2 = 0b0110_0110_0110_0110 - - XNOR4 = 0b1001_0110_0110_1001 - XNOR3 = 0b0110_1001_0110_1001 - XNOR2 = 0b1001_1001_1001_1001 - - AO22 = 0b1111_1000_1000_1000 - AOI22 = 0b0000_0111_0111_0111 - AO21 = 0b1110_1010_1110_1010 - AOI21 = 0b0001_0101_0001_0101 - OA22 = 0b1110_1110_1110_0000 - OAI22 = 0b0001_0001_0001_1111 - OA21 = 0b1010_1000_1010_1000 - OAI21 = 0b0101_0111_0101_0111 - MUX21 = 0b1110_0100_1110_0100 - - kind_prefixes = { - 'nand': (NAND4, NAND3, NAND2), - 'nor': (NOR4, NOR3, NOR2), - 'and': (AND4, AND3, AND2), - 'or': (OR4, OR3, OR2), - 'xor': (XOR4, XOR3, XOR2), - 'xnor': (XNOR4, XNOR3, XNOR2), - - 'not': (INV1, INV1, INV1), - 'inv': (INV1, INV1, INV1), - 'ibuf': (INV1, INV1, INV1), - '__const1__': (INV1, INV1, INV1), - 'tieh': (INV1, INV1, INV1), - - 'buf': (BUF1, BUF1, BUF1), - 'nbuf': (BUF1, BUF1, BUF1), - 'delln': (BUF1, BUF1, BUF1), - '__const0__': (BUF1, BUF1, BUF1), - 'tiel': (BUF1, BUF1, BUF1), - - 'ao22': (AO22, AO22, AO22), - 'aoi22': (AOI22, AOI22, AOI22), - 'ao21': (AO21, AO21, AO21), - 'aoi21': (AOI21, AOI21, AOI21), - - 'oa22': (OA22, OA22, OA22), - 'oai22': (OAI22, OAI22, OAI22), - 'oa21': (OA21, OA21, OA21), - 'oai21': (OAI21, OAI21, OAI21), - - 'mux21': (MUX21, MUX21, MUX21), - } - - @classmethod - def names(cls): - return dict([(v, k) for k, v in cls.__dict__.items() if isinstance(v, int)]) +BUF1 = np.uint16(0b1010_1010_1010_1010) +INV1 = ~BUF1 + +AND2 = np.uint16(0b1000_1000_1000_1000) +AND3 = np.uint16(0b1000_0000_1000_0000) +AND4 = np.uint16(0b1000_0000_0000_0000) + +NAND2, NAND3, NAND4 = ~AND2, ~AND3, ~AND4 + +OR2 = np.uint16(0b1110_1110_1110_1110) +OR3 = np.uint16(0b1111_1110_1111_1110) +OR4 = np.uint16(0b1111_1111_1111_1110) + +NOR2, NOR3, NOR4 = ~OR2, ~OR3, ~OR4 + +XOR2 = np.uint16(0b0110_0110_0110_0110) +XOR3 = np.uint16(0b1001_0110_1001_0110) +XOR4 = np.uint16(0b0110_1001_1001_0110) + +XNOR2, XNOR3, XNOR4 = ~XOR2, ~XOR3, ~XOR4 + +AO21 = np.uint16(0b1110_1010_1110_1010) +AO22 = np.uint16(0b1111_1000_1000_1000) +OA21 = np.uint16(0b1010_1000_1010_1000) +OA22 = np.uint16(0b1110_1110_1110_0000) + +AOI21, AOI22, OAI21, OAI22 = ~AO21, ~AO22, ~OA21, ~OA22 + +MUX21 = np.uint16(0b1110_0100_1110_0100) + +names = dict([(v, k) for k, v in globals().items() if isinstance(v, np.uint16)]) + +kind_prefixes = { + 'nand': (NAND4, NAND3, NAND2), + 'nor': (NOR4, NOR3, NOR2), + 'and': (AND4, AND3, AND2), + 'or': (OR4, OR3, OR2), + 'xor': (XOR4, XOR3, XOR2), + 'xnor': (XNOR4, XNOR3, XNOR2), + + 'not': (INV1, INV1, INV1), + 'inv': (INV1, INV1, INV1), + 'ibuf': (INV1, INV1, INV1), + '__const1__': (INV1, INV1, INV1), + 'tieh': (INV1, INV1, INV1), + + 'buf': (BUF1, BUF1, BUF1), + 'nbuf': (BUF1, BUF1, BUF1), + 'delln': (BUF1, BUF1, BUF1), + '__const0__': (BUF1, BUF1, BUF1), + 'tiel': (BUF1, BUF1, BUF1), + + 'ao22': (AO22, AO22, AO22), + 'aoi22': (AOI22, AOI22, AOI22), + 'ao21': (AO21, AO21, AO21), + 'aoi21': (AOI21, AOI21, AOI21), + + 'oa22': (OA22, OA22, OA22), + 'oai22': (OAI22, OAI22, OAI22), + 'oa21': (OA21, OA21, OA21), + 'oai21': (OAI21, OAI21, OAI21), + + 'mux21': (MUX21, MUX21, MUX21), +} class Heap: def __init__(self): @@ -184,7 +137,7 @@ class Heap: class SimOps: """A static scheduler that translates a Circuit into a topologically sorted list of basic logic operations (self.ops) and - a value allocation table (self.vat) for use in simulators. + a memory mapping (self.c_locs, self.c_caps) for use in simulators. :param circuit: The circuit to create a schedule for. :param strip_forks: If enabled, the scheduler will not include fork nodes to safe simulation time. @@ -203,12 +156,12 @@ class SimOps: if isinstance(c_caps, int): c_caps = [c_caps] * len(circuit.lines) - # indices for state allocation table (sat) + # special locations and offsets in c_locs/c_caps self.zero_idx = len(circuit.lines) self.tmp_idx = self.zero_idx + 1 self.ppi_offset = self.tmp_idx + 1 self.ppo_offset = self.ppi_offset + len(self.s_nodes) - self.vat_len = self.ppo_offset + len(self.s_nodes) + self.c_locs_len = self.ppo_offset + len(self.s_nodes) # translate circuit structure into self.ops ops = [] @@ -217,14 +170,14 @@ class SimOps: if n in interface_dict: inp_idx = self.ppi_offset + interface_dict[n] if len(n.outs) > 0 and n.outs[0] is not None: # first output of a PI/PPI - ops.append((SimPrim.BUF1, n.outs[0].index, inp_idx, self.zero_idx, self.zero_idx, self.zero_idx)) + ops.append((BUF1, n.outs[0].index, inp_idx, self.zero_idx, self.zero_idx, self.zero_idx)) if 'dff' in n.kind.lower(): # second output of DFF is inverted if len(n.outs) > 1 and n.outs[1] is not None: - ops.append((SimPrim.INV1, n.outs[1].index, inp_idx, self.zero_idx, self.zero_idx, self.zero_idx)) + ops.append((INV1, n.outs[1].index, inp_idx, self.zero_idx, self.zero_idx, self.zero_idx)) else: # if not DFF, no output is inverted. for o_line in n.outs[1:]: if o_line is not None: - ops.append((SimPrim.BUF1, o_line.index, inp_idx, self.zero_idx, self.zero_idx, self.zero_idx)) + ops.append((BUF1, o_line.index, inp_idx, self.zero_idx, self.zero_idx, self.zero_idx)) continue # regular node, not PI/PPI or PO/PPO o0_idx = n.outs[0].index if len(n.outs) > 0 and n.outs[0] is not None else self.tmp_idx @@ -237,10 +190,10 @@ class SimOps: if not strip_forks: for o_line in n.outs: if o_line is not None: - ops.append((SimPrim.BUF1, o_line.index, i0_idx, i1_idx, i2_idx, i3_idx)) + ops.append((BUF1, o_line.index, i0_idx, i1_idx, i2_idx, i3_idx)) continue sp = None - for prefix, prims in SimPrim.kind_prefixes.items(): + for prefix, prims in kind_prefixes.items(): if kind.startswith(prefix): sp = prims[0] if i3_idx == self.zero_idx: @@ -256,7 +209,7 @@ class SimOps: self.ops = np.asarray(ops, dtype='int32') # create a map from fanout lines to stem lines for fork stripping - stems = np.zeros(self.vat_len, dtype='int32') - 1 # default to -1: 'no fanout line' + stems = np.zeros(self.c_locs_len, dtype='int32') - 1 # default to -1: 'no fanout line' if strip_forks: for f in circuit.forks.values(): prev_line = f.ins[0] @@ -267,8 +220,8 @@ class SimOps: stems[ol] = stem_idx # calculate level (distance from PI/PPI) and reference count for each line - levels = np.zeros(self.vat_len, dtype='int32') - ref_count = np.zeros(self.vat_len, dtype='int32') + levels = np.zeros(self.c_locs_len, dtype='int32') + ref_count = np.zeros(self.c_locs_len, dtype='int32') level_starts = [0] current_level = 1 for i, op in enumerate(self.ops): @@ -289,21 +242,21 @@ class SimOps: self.level_stops = np.asarray(level_starts[1:] + [len(self.ops)], dtype='int32') # state allocation table. maps line and interface indices to self.state memory locations - self.vat = np.zeros((self.vat_len, 3), dtype='int') - self.vat[:, 0] = -1 - + self.c_locs = np.full((self.c_locs_len,), -1, dtype=np.int32) + self.c_caps = np.zeros((self.c_locs_len,), dtype=np.int32) + h = Heap() # allocate and keep memory for special fields - self.vat[self.zero_idx] = h.alloc(1), 1, 0 - self.vat[self.tmp_idx] = h.alloc(1), 1, 0 + self.c_locs[self.zero_idx], self.c_caps[self.zero_idx] = h.alloc(1), 1 + self.c_locs[self.tmp_idx], self.c_caps[self.tmp_idx] = h.alloc(1), 1 ref_count[self.zero_idx] += 1 ref_count[self.tmp_idx] += 1 # allocate and keep memory for PI/PPI, keep memory for PO/PPO (allocated later) for i, n in enumerate(self.s_nodes): if len(n.outs) > 0: - self.vat[self.ppi_offset + i] = h.alloc(1), 1, 0 + self.c_locs[self.ppi_offset + i], self.c_caps[self.ppi_offset + i] = h.alloc(1), 1 ref_count[self.ppi_offset + i] += 1 if len(n.ins) > 0: i0_idx = stems[n.ins[0]] if stems[n.ins[0]] >= 0 else n.ins[0] @@ -322,13 +275,13 @@ class SimOps: ref_count[i1_idx] -= 1 ref_count[i2_idx] -= 1 ref_count[i3_idx] -= 1 - if ref_count[i0_idx] <= 0: free_list.append(self.vat[i0_idx, 0]) - if ref_count[i1_idx] <= 0: free_list.append(self.vat[i1_idx, 0]) - if ref_count[i2_idx] <= 0: free_list.append(self.vat[i2_idx, 0]) - if ref_count[i3_idx] <= 0: free_list.append(self.vat[i3_idx, 0]) + if ref_count[i0_idx] <= 0: free_list.append(self.c_locs[i0_idx]) + if ref_count[i1_idx] <= 0: free_list.append(self.c_locs[i1_idx]) + if ref_count[i2_idx] <= 0: free_list.append(self.c_locs[i2_idx]) + if ref_count[i3_idx] <= 0: free_list.append(self.c_locs[i3_idx]) o_idx = op[1] cap = c_caps[o_idx] - self.vat[o_idx] = h.alloc(cap), cap, 0 + self.c_locs[o_idx], self.c_caps[o_idx] = h.alloc(cap), cap if not keep_signals: for loc in free_list: h.free(loc) @@ -336,16 +289,15 @@ class SimOps: # copy memory location and capacity from stems to fanout lines for lidx, stem in enumerate(stems): if stem >= 0: # if at a fanout line - self.vat[lidx] = self.vat[stem] + self.c_locs[lidx], self.c_caps[lidx] = self.c_locs[stem], self.c_caps[stem] # copy memory location to PO/PPO area for i, n in enumerate(self.s_nodes): if len(n.ins) > 0: - self.vat[self.ppo_offset + i] = self.vat[n.ins[0]] + self.c_locs[self.ppo_offset + i], self.c_caps[self.ppo_offset + i] = self.c_locs[n.ins[0]], self.c_caps[n.ins[0]] self.c_len = h.max_size from collections import defaultdict self.prim_counts = defaultdict(int) - names_dict = SimPrim.names() - for op, _, _, _, _, _ in self.ops: self.prim_counts[names_dict[op]] += 1 + for op, _, _, _, _, _ in self.ops: self.prim_counts[names[op]] += 1 diff --git a/src/kyupy/wave_sim.py b/src/kyupy/wave_sim.py index af1f02b..61054fc 100644 --- a/src/kyupy/wave_sim.py +++ b/src/kyupy/wave_sim.py @@ -16,8 +16,7 @@ import math import numpy as np -from . import numba, cuda, hr_bytes -from .sim import SimOps +from . import numba, cuda, hr_bytes, sim TMAX = np.float32(2 ** 127) @@ -29,7 +28,7 @@ TMIN = np.float32(-2 ** 127) """A large negative 32-bit floating point value used at the beginning of waveforms that start with logic-1.""" -class WaveSim(SimOps): +class WaveSim(sim.SimOps): """A waveform-based combinational logic timing simulator running on CPU. :param circuit: The circuit to simulate. @@ -54,30 +53,31 @@ class WaveSim(SimOps): self.sims = sims self.c_len *= 4 - self.vat[...,0:2] *= 4 + self.c_locs[...] *= 4 + self.c_caps[...] *= 4 - self.timing = np.zeros((self.vat_len, 2, 2)) + self.timing = np.zeros((self.c_locs_len, 2, 2)) self.timing[:len(timing)] = timing self.c = np.zeros((self.c_len, sims), dtype=np.float32) + TMAX - self.s = np.zeros((len(self.s_nodes), sims, 11), dtype=np.float32) + self.s = np.zeros((11, self.s_len, sims), dtype=np.float32) """Information about the logic values and transitions around the sequential elements (flip-flops) and ports. The first 3 values are read by ``s_to_c()``. The remaining values are written by ``c_to_s()``. The elements are as follows: - * ``s[..., 0]`` (P)PI initial value - * ``s[..., 1]`` (P)PI transition time - * ``s[..., 2]`` (P)PI final value - * ``s[..., 3]`` (P)PO initial value - * ``s[..., 4]`` (P)PO earliest arrival time (EAT): The time at which the output transitioned from its initial value. - * ``s[..., 5]`` (P)PO latest stabilization time (LST): The time at which the output settled to its final value. - * ``s[..., 6]`` (P)PO final value - * ``s[..., 7]`` (P)PO capture value: probability of capturing a 1 at a given capture time - * ``s[..., 8]`` (P)PO sampled capture value: decided by random sampling according to a given seed. - * ``s[..., 9]`` (P)PO sampled capture slack: (capture time - LST) - decided by random sampling according to a given seed. - * ``s[..., 10]`` Overflow indicator: If non-zero, some signals in the input cone of this output had more + * ``s[0]`` (P)PI initial value + * ``s[1]`` (P)PI transition time + * ``s[2]`` (P)PI final value + * ``s[3]`` (P)PO initial value + * ``s[4]`` (P)PO earliest arrival time (EAT): The time at which the output transitioned from its initial value. + * ``s[5]`` (P)PO latest stabilization time (LST): The time at which the output settled to its final value. + * ``s[6]`` (P)PO final value + * ``s[7]`` (P)PO capture value: probability of capturing a 1 at a given capture time + * ``s[8]`` (P)PO sampled capture value: decided by random sampling according to a given seed. + * ``s[9]`` (P)PO sampled capture slack: (capture time - LST) - decided by random sampling according to a given seed. + * ``s[10]`` Overflow indicator: If non-zero, some signals in the input cone of this output had more transitions than specified in ``c_caps``. Some transitions have been discarded, the final values in the waveforms are still valid. """ @@ -85,19 +85,19 @@ class WaveSim(SimOps): self.params = np.zeros((sims, 4), dtype=np.float32) self.params[...,0] = 1.0 - self.nbytes = sum([a.nbytes for a in (self.c, self.s, self.vat, self.ops, self.params)]) + self.nbytes = sum([a.nbytes for a in (self.c, self.s, self.c_locs, self.c_caps, self.ops, self.params)]) - self.pi_s_locs = np.flatnonzero(self.vat[self.ppi_offset+np.arange(len(self.circuit.io_nodes)), 0] >= 0) - self.po_s_locs = np.flatnonzero(self.vat[self.ppo_offset+np.arange(len(self.circuit.io_nodes)), 0] >= 0) + self.pi_s_locs = np.flatnonzero(self.c_locs[self.ppi_offset+np.arange(len(self.circuit.io_nodes))] >= 0) + self.po_s_locs = np.flatnonzero(self.c_locs[self.ppo_offset+np.arange(len(self.circuit.io_nodes))] >= 0) self.ppio_s_locs = np.arange(len(self.circuit.io_nodes), len(self.s_nodes)) self.pippi_s_locs = np.concatenate([self.pi_s_locs, self.ppio_s_locs]) self.poppo_s_locs = np.concatenate([self.po_s_locs, self.ppio_s_locs]) - self.pi_c_locs = self.vat[self.ppi_offset+self.pi_s_locs, 0] - self.po_c_locs = self.vat[self.ppo_offset+self.po_s_locs, 0] - self.ppi_c_locs = self.vat[self.ppi_offset+self.ppio_s_locs, 0] - self.ppo_c_locs = self.vat[self.ppo_offset+self.ppio_s_locs, 0] + self.pi_c_locs = self.c_locs[self.ppi_offset+self.pi_s_locs] + self.po_c_locs = self.c_locs[self.ppo_offset+self.po_s_locs] + self.ppi_c_locs = self.c_locs[self.ppi_offset+self.ppio_s_locs] + self.ppo_c_locs = self.c_locs[self.ppo_offset+self.ppio_s_locs] self.pippi_c_locs = np.concatenate([self.pi_c_locs, self.ppi_c_locs]) self.poppo_c_locs = np.concatenate([self.po_c_locs, self.ppo_c_locs]) @@ -112,7 +112,7 @@ class WaveSim(SimOps): Based on the data in ``self.s``, waveforms are generated on the input lines of the circuit. It modifies ``self.c``. """ - sins = np.moveaxis(self.s[self.pippi_s_locs], -1, 0) + sins = self.s[:, self.pippi_s_locs] cond = (sins[2] != 0) + 2*(sins[0] != 0) # choices order: 0 R F 1 self.c[self.pippi_c_locs] = np.choose(cond, [TMAX, sins[1], TMIN, TMIN]) self.c[self.pippi_c_locs+1] = np.choose(cond, [TMAX, TMAX, sins[1], TMAX]) @@ -127,7 +127,7 @@ class WaveSim(SimOps): """ sims = min(sims or self.sims, self.sims) for op_start, op_stop in zip(self.level_starts, self.level_stops): - level_eval_cpu(self.ops, op_start, op_stop, self.c, self.vat, 0, sims, + level_eval_cpu(self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, 0, sims, self.timing, self.params, sd, seed) def c_to_s(self, time=TMAX, sd=0.0, seed=1): @@ -140,9 +140,9 @@ class WaveSim(SimOps): :param sd: A standard deviation for uncertainty in the actual capture time. :param seed: The random seed for a capture with uncertainty. """ - for s_loc, (c_loc, c_len, _) in zip(self.poppo_s_locs, self.vat[self.ppo_offset+self.poppo_s_locs]): + for s_loc, c_loc, c_len in zip(self.poppo_s_locs, self.c_locs[self.ppo_offset+self.poppo_s_locs], self.c_caps[self.ppo_offset+self.poppo_s_locs]): for vector in range(self.sims): - self.s[s_loc, vector, 3:] = wave_capture_cpu(self.c, c_loc, c_len, vector, time=time, sd=sd, seed=seed) + self.s[3:, s_loc, vector] = wave_capture_cpu(self.c, c_loc, c_len, vector, time=time, sd=sd, seed=seed) def s_ppo_to_ppi(self, time=0.0): """Re-assigns the last sampled capture to the appropriate pseudo-primary inputs (PPI). @@ -151,9 +151,9 @@ class WaveSim(SimOps): :param time: The transition time at the inputs (usually 0.0). """ - self.s[self.ppio_s_locs, :, 0] = self.s[self.ppio_s_locs, :, 2] - self.s[self.ppio_s_locs, :, 1] = time - self.s[self.ppio_s_locs, :, 2] = self.s[self.ppio_s_locs, :, 8] + self.s[0, self.ppio_s_locs] = self.s[2, self.ppio_s_locs] + self.s[1, self.ppio_s_locs] = time + self.s[2, self.ppio_s_locs] = self.s[8, self.ppio_s_locs] @numba.njit @@ -173,7 +173,7 @@ def rand_gauss_cpu(seed, sd): @numba.njit -def wave_eval_cpu(op, cbuf, vat, st_idx, line_times, param, sd=0.0, seed=0): +def wave_eval_cpu(op, cbuf, c_locs, c_caps, st_idx, line_times, param, sd=0.0, seed=0): lut, z_idx, a_idx, b_idx, c_idx, d_idx = op # >>> same code as wave_eval_cpu (except rand_gauss_*pu()-calls) >>> @@ -181,11 +181,12 @@ def wave_eval_cpu(op, cbuf, vat, st_idx, line_times, param, sd=0.0, seed=0): _seed = (seed << 4) + (z_idx << 20) + (st_idx << 1) - a_mem = vat[a_idx, 0] - b_mem = vat[b_idx, 0] - c_mem = vat[c_idx, 0] - d_mem = vat[d_idx, 0] - z_mem, z_cap, _ = vat[z_idx] + a_mem = c_locs[a_idx] + b_mem = c_locs[b_idx] + c_mem = c_locs[c_idx] + d_mem = c_locs[d_idx] + z_mem = c_locs[z_idx] + z_cap = c_caps[z_idx] a_cur = int(0) b_cur = int(0) @@ -280,12 +281,12 @@ def wave_eval_cpu(op, cbuf, vat, st_idx, line_times, param, sd=0.0, seed=0): @numba.njit -def level_eval_cpu(ops, op_start, op_stop, c, vat, st_start, st_stop, line_times, params, sd, seed): +def level_eval_cpu(ops, op_start, op_stop, c, c_locs, c_caps, st_start, st_stop, line_times, params, sd, seed): overflows = 0 for op_idx in range(op_start, op_stop): op = ops[op_idx] for st_idx in range(st_start, st_stop): - wave_eval_cpu(op, c, vat, st_idx, line_times, params[st_idx], sd, seed) + wave_eval_cpu(op, c, c_locs, c_caps, st_idx, line_times, params[st_idx], sd, seed) @numba.njit @@ -347,7 +348,8 @@ class WaveSimCuda(WaveSim): self.c = cuda.to_device(self.c) self.s = cuda.to_device(self.s) self.ops = cuda.to_device(self.ops) - self.vat = cuda.to_device(self.vat) + self.c_locs = cuda.to_device(self.c_locs) + self.c_caps = cuda.to_device(self.c_caps) self.timing = cuda.to_device(self.timing) self.params = cuda.to_device(self.params) @@ -355,7 +357,7 @@ class WaveSimCuda(WaveSim): def s_to_c(self): grid_dim = self._grid_dim(self.sims, self.s_len) - wave_assign_gpu[grid_dim, self._block_dim](self.c, self.s, self.vat, self.ppi_offset) + wave_assign_gpu[grid_dim, self._block_dim](self.c, self.s, self.c_locs, self.ppi_offset) def _grid_dim(self, x, y): gx = math.ceil(x / self._block_dim[0]) @@ -366,29 +368,29 @@ class WaveSimCuda(WaveSim): sims = min(sims or self.sims, self.sims) for op_start, op_stop in zip(self.level_starts, self.level_stops): grid_dim = self._grid_dim(sims, op_stop - op_start) - wave_eval_gpu[grid_dim, self._block_dim](self.ops, op_start, op_stop, self.c, self.vat, int(0), + wave_eval_gpu[grid_dim, self._block_dim](self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, int(0), sims, self.timing, self.params, sd, seed) cuda.synchronize() def c_to_s(self, time=TMAX, sd=0.0, seed=1): grid_dim = self._grid_dim(self.sims, self.s_len) - wave_capture_gpu[grid_dim, self._block_dim](self.c, self.s, self.vat, self.ppo_offset, + wave_capture_gpu[grid_dim, self._block_dim](self.c, self.s, self.c_locs, self.c_caps, self.ppo_offset, time, sd * math.sqrt(2), seed) def s_ppo_to_ppi(self, time=0.0): grid_dim = self._grid_dim(self.sims, self.s_len) - ppo_to_ppi_gpu[grid_dim, self._block_dim](self.s, self.vat, time, self.ppi_offset, self.ppo_offset) + ppo_to_ppi_gpu[grid_dim, self._block_dim](self.s, self.c_locs, time, self.ppi_offset, self.ppo_offset) @cuda.jit() -def wave_assign_gpu(c, s, vat, ppi_offset): +def wave_assign_gpu(c, s, c_locs, ppi_offset): x, y = cuda.grid(2) - if y >= len(s): return - c_loc, c_len, _ = vat[ppi_offset + y] + if y >= s.shape[1]: return + c_loc = c_locs[ppi_offset + y] if c_loc < 0: return if x >= c.shape[-1]: return - value = int(s[y, x, 2] >= 0.5) | (2*int(s[y, x, 0] >= 0.5)) - ttime = s[y, x, 1] + value = int(s[2, y, x] >= 0.5) | (2*int(s[0, y, x] >= 0.5)) + ttime = s[1, y, x] if value == 0: c[c_loc, x] = TMAX c[c_loc+1, x] = TMAX @@ -421,7 +423,7 @@ def rand_gauss_gpu(seed, sd): @cuda.jit() -def wave_eval_gpu(ops, op_start, op_stop, cbuf, vat, st_start, st_stop, line_times, param, sd, seed): +def wave_eval_gpu(ops, op_start, op_stop, cbuf, c_locs, c_caps, st_start, st_stop, line_times, param, sd, seed): x, y = cuda.grid(2) st_idx = st_start + x op_idx = op_start + y @@ -442,11 +444,12 @@ def wave_eval_gpu(ops, op_start, op_stop, cbuf, vat, st_start, st_stop, line_tim _seed = (seed << 4) + (z_idx << 20) + (st_idx << 1) - a_mem = vat[a_idx, 0] - b_mem = vat[b_idx, 0] - c_mem = vat[c_idx, 0] - d_mem = vat[d_idx, 0] - z_mem, z_cap, _ = vat[z_idx] + a_mem = c_locs[a_idx] + b_mem = c_locs[b_idx] + c_mem = c_locs[c_idx] + d_mem = c_locs[d_idx] + z_mem = c_locs[z_idx] + z_cap = c_caps[z_idx] a_cur = int(0) b_cur = int(0) @@ -541,10 +544,11 @@ def wave_eval_gpu(ops, op_start, op_stop, cbuf, vat, st_start, st_stop, line_tim @cuda.jit() -def wave_capture_gpu(c, s, vat, ppo_offset, time, s_sqrt2, seed): +def wave_capture_gpu(c, s, c_locs, c_caps, ppo_offset, time, s_sqrt2, seed): x, y = cuda.grid(2) - if ppo_offset + y >= len(vat): return - line, tdim, _ = vat[ppo_offset + y] + if ppo_offset + y >= len(c_locs): return + line = c_locs[ppo_offset + y] + tdim = c_caps[ppo_offset + y] if line < 0: return if x >= c.shape[-1]: return vector = x @@ -588,25 +592,25 @@ def wave_capture_gpu(c, s, vat, ppo_offset, time, s_sqrt2, seed): else: acc = val - s[y, vector, 3] = (c[line, vector] <= TMIN) - s[y, vector, 4] = eat - s[y, vector, 5] = lst - s[y, vector, 6] = final - s[y, vector, 7] = acc - s[y, vector, 8] = val - s[y, vector, 9] = 0 # TODO - s[y, vector, 10] = ovl + s[3, y, vector] = (c[line, vector] <= TMIN) + s[4, y, vector] = eat + s[5, y, vector] = lst + s[6, y, vector] = final + s[7, y, vector] = acc + s[8, y, vector] = val + s[9, y, vector] = 0 # TODO + s[10, y, vector] = ovl @cuda.jit() -def ppo_to_ppi_gpu(s, vat, time, ppi_offset, ppo_offset): +def ppo_to_ppi_gpu(s, c_locs, time, ppi_offset, ppo_offset): x, y = cuda.grid(2) if y >= s.shape[0]: return if x >= s.shape[1]: return - if vat[ppi_offset + y, 0] < 0: return - if vat[ppo_offset + y, 0] < 0: return + if c_locs[ppi_offset + y] < 0: return + if c_locs[ppo_offset + y] < 0: return - s[y, x, 0] = s[y, x, 2] - s[y, x, 1] = time - s[y, x, 2] = s[y, x, 8] + s[0, y, x] = s[2, y, x] + s[1, y, x] = time + s[2, y, x] = s[8, y, x] diff --git a/tests/test_wave_sim.py b/tests/test_wave_sim.py index b8fc18c..6a4bad3 100644 --- a/tests/test_wave_sim.py +++ b/tests/test_wave_sim.py @@ -2,17 +2,17 @@ import numpy as np from kyupy.wave_sim import WaveSim, WaveSimCuda, wave_eval_cpu, TMIN, TMAX from kyupy.logic_sim import LogicSim -from kyupy import verilog, sdf, logic, bench +from kyupy import logic, bench, sim from kyupy.logic import mvarray -from kyupy.sim import SimPrim - def test_nand_delays(): - op = (SimPrim.NAND4, 4, 0, 1, 2, 3) + op = (sim.NAND4, 4, 0, 1, 2, 3) #op = (0b0111, 4, 0, 1) c = np.full((5*16, 1), TMAX) # 5 waveforms of capacity 16 - vat = np.zeros((5, 3), dtype='int') - for i in range(5): vat[i] = i*16, 16, 0 # 1:1 mapping + c_locs = np.zeros((5,), dtype='int') + c_caps = np.zeros((5,), dtype='int') + + for i in range(5): c_locs[i], c_caps[i] = i*16, 16 # 1:1 mapping # SDF specifies IOPATH delays with respect to output polarity # SDF pulse rejection value is determined by IOPATH causing last transition and polarity of last transition @@ -32,7 +32,7 @@ def test_nand_delays(): def wave_assert(inputs, output): for i, a in zip(inputs, c.reshape(-1,16)): a[:len(i)] = i - wave_eval_cpu(op, c, vat, 0, line_times, sdata) + wave_eval_cpu(op, c, c_locs, c_caps, 0, line_times, sdata) for i, v in enumerate(output): np.testing.assert_allclose(c.reshape(-1,16)[4,i], v) wave_assert([[TMAX,TMAX],[TMAX,TMAX],[TMIN,TMAX],[TMIN,TMAX]], [TMIN,TMAX]) # NAND(0,0,1,1) => 1 @@ -53,29 +53,29 @@ def test_tiny_circuit(): lt = np.zeros((len(c.lines), 2, 2)) lt[:,0,:] = 1.0 # unit delay for all lines wsim = WaveSim(c, lt) - assert len(wsim.s) == 5 + assert wsim.s.shape[1] == 5 # values for x - wsim.s[0,0,:3] = 0, 0.1, 0 - wsim.s[0,1,:3] = 0, 0.2, 1 - wsim.s[0,2,:3] = 1, 0.3, 0 - wsim.s[0,3,:3] = 1, 0.4, 1 - + wsim.s[:3,0,0] = 0, 0.1, 0 + wsim.s[:3,0,1] = 0, 0.2, 1 + wsim.s[:3,0,2] = 1, 0.3, 0 + wsim.s[:3,0,3] = 1, 0.4, 1 + # values for y - wsim.s[1,0,:3] = 1, 0.5, 0 - wsim.s[1,1,:3] = 1, 0.6, 0 - wsim.s[1,2,:3] = 1, 0.7, 0 - wsim.s[1,3,:3] = 0, 0.8, 1 + wsim.s[:3,1,0] = 1, 0.5, 0 + wsim.s[:3,1,1] = 1, 0.6, 0 + wsim.s[:3,1,2] = 1, 0.7, 0 + wsim.s[:3,1,3] = 0, 0.8, 1 wsim.s_to_c() - x_c_loc = wsim.vat[wsim.ppi_offset+0, 0] # check x waveforms + x_c_loc = wsim.c_locs[wsim.ppi_offset+0] # check x waveforms np.testing.assert_allclose(wsim.c[x_c_loc:x_c_loc+3, 0], [TMAX, TMAX, TMAX]) np.testing.assert_allclose(wsim.c[x_c_loc:x_c_loc+3, 1], [0.2, TMAX, TMAX]) np.testing.assert_allclose(wsim.c[x_c_loc:x_c_loc+3, 2], [TMIN, 0.3, TMAX]) np.testing.assert_allclose(wsim.c[x_c_loc:x_c_loc+3, 3], [TMIN, TMAX, TMAX]) - y_c_loc = wsim.vat[wsim.ppi_offset+1, 0] # check y waveforms + y_c_loc = wsim.c_locs[wsim.ppi_offset+1] # check y waveforms np.testing.assert_allclose(wsim.c[y_c_loc:y_c_loc+3, 0], [TMIN, 0.5, TMAX]) np.testing.assert_allclose(wsim.c[y_c_loc:y_c_loc+3, 1], [TMIN, 0.6, TMAX]) np.testing.assert_allclose(wsim.c[y_c_loc:y_c_loc+3, 2], [TMIN, 0.7, TMAX]) @@ -83,19 +83,19 @@ def test_tiny_circuit(): wsim.c_prop() - a_c_loc = wsim.vat[wsim.ppo_offset+2, 0] # check a waveforms + a_c_loc = wsim.c_locs[wsim.ppo_offset+2] # check a waveforms np.testing.assert_allclose(wsim.c[a_c_loc:a_c_loc+3, 0], [TMAX, TMAX, TMAX]) np.testing.assert_allclose(wsim.c[a_c_loc:a_c_loc+3, 1], [1.2, 1.6, TMAX]) np.testing.assert_allclose(wsim.c[a_c_loc:a_c_loc+3, 2], [TMIN, 1.3, TMAX]) np.testing.assert_allclose(wsim.c[a_c_loc:a_c_loc+3, 3], [1.8, TMAX, TMAX]) - o_c_loc = wsim.vat[wsim.ppo_offset+3, 0] # check o waveforms + o_c_loc = wsim.c_locs[wsim.ppo_offset+3] # check o waveforms np.testing.assert_allclose(wsim.c[o_c_loc:o_c_loc+3, 0], [TMIN, 1.5, TMAX]) np.testing.assert_allclose(wsim.c[o_c_loc:o_c_loc+3, 1], [TMIN, TMAX, TMAX]) np.testing.assert_allclose(wsim.c[o_c_loc:o_c_loc+3, 2], [TMIN, 1.7, TMAX]) np.testing.assert_allclose(wsim.c[o_c_loc:o_c_loc+3, 3], [TMIN, TMAX, TMAX]) - n_c_loc = wsim.vat[wsim.ppo_offset+4, 0] # check n waveforms + n_c_loc = wsim.c_locs[wsim.ppo_offset+4] # check n waveforms np.testing.assert_allclose(wsim.c[n_c_loc:n_c_loc+3, 0], [TMIN, TMAX, TMAX]) np.testing.assert_allclose(wsim.c[n_c_loc:n_c_loc+3, 1], [TMIN, 1.2, TMAX]) np.testing.assert_allclose(wsim.c[n_c_loc:n_c_loc+3, 2], [1.3, TMAX, TMAX]) @@ -104,22 +104,22 @@ def test_tiny_circuit(): wsim.c_to_s() # check a captures - np.testing.assert_allclose(wsim.s[2, 0, 3:7], [0, TMAX, TMIN, 0]) - np.testing.assert_allclose(wsim.s[2, 1, 3:7], [0, 1.2, 1.6, 0]) - np.testing.assert_allclose(wsim.s[2, 2, 3:7], [1, 1.3, 1.3, 0]) - np.testing.assert_allclose(wsim.s[2, 3, 3:7], [0, 1.8, 1.8, 1]) - - # check o captures - np.testing.assert_allclose(wsim.s[3, 0, 3:7], [1, 1.5, 1.5, 0]) - np.testing.assert_allclose(wsim.s[3, 1, 3:7], [1, TMAX, TMIN, 1]) - np.testing.assert_allclose(wsim.s[3, 2, 3:7], [1, 1.7, 1.7, 0]) - np.testing.assert_allclose(wsim.s[3, 3, 3:7], [1, TMAX, TMIN, 1]) - - # check o captures - np.testing.assert_allclose(wsim.s[4, 0, 3:7], [1, TMAX, TMIN, 1]) - np.testing.assert_allclose(wsim.s[4, 1, 3:7], [1, 1.2, 1.2, 0]) - np.testing.assert_allclose(wsim.s[4, 2, 3:7], [0, 1.3, 1.3, 1]) - np.testing.assert_allclose(wsim.s[4, 3, 3:7], [0, TMAX, TMIN, 0]) + np.testing.assert_allclose(wsim.s[3:7, 2, 0], [0, TMAX, TMIN, 0]) + np.testing.assert_allclose(wsim.s[3:7, 2, 1], [0, 1.2, 1.6, 0]) + np.testing.assert_allclose(wsim.s[3:7, 2, 2], [1, 1.3, 1.3, 0]) + np.testing.assert_allclose(wsim.s[3:7, 2, 3], [0, 1.8, 1.8, 1]) + + # check o captures + np.testing.assert_allclose(wsim.s[3:7, 3, 0], [1, 1.5, 1.5, 0]) + np.testing.assert_allclose(wsim.s[3:7, 3, 1], [1, TMAX, TMIN, 1]) + np.testing.assert_allclose(wsim.s[3:7, 3, 2], [1, 1.7, 1.7, 0]) + np.testing.assert_allclose(wsim.s[3:7, 3, 3], [1, TMAX, TMIN, 1]) + + # check o captures + np.testing.assert_allclose(wsim.s[3:7, 4, 0], [1, TMAX, TMIN, 1]) + np.testing.assert_allclose(wsim.s[3:7, 4, 1], [1, 1.2, 1.2, 0]) + np.testing.assert_allclose(wsim.s[3:7, 4, 2], [0, 1.3, 1.3, 1]) + np.testing.assert_allclose(wsim.s[3:7, 4, 3], [0, TMAX, TMIN, 0]) def compare_to_logic_sim(wsim: WaveSim): @@ -127,17 +127,17 @@ def compare_to_logic_sim(wsim: WaveSim): rng = np.random.default_rng(10) tests = rng.choice(choices, (wsim.s_len, wsim.sims)) - wsim.s[:, :, 0] = (tests & 2) >> 1 - wsim.s[:, :, 3] = (tests & 2) >> 1 - wsim.s[:, :, 1] = 0.0 - wsim.s[:, :, 2] = tests & 1 - wsim.s[:, :, 6] = tests & 1 + wsim.s[0] = (tests & 2) >> 1 + wsim.s[3] = (tests & 2) >> 1 + wsim.s[1] = 0.0 + wsim.s[2] = tests & 1 + wsim.s[6] = tests & 1 wsim.s_to_c() wsim.c_prop() wsim.c_to_s() - resp = np.array(wsim.s[:, :, 6], dtype=np.uint8) | (np.array(wsim.s[:, :, 3], dtype=np.uint8)<<1) + resp = np.array(wsim.s[6], dtype=np.uint8) | (np.array(wsim.s[3], dtype=np.uint8)<<1) resp |= ((resp ^ (resp >> 1)) & 1) << 2 # transitions resp[wsim.pi_s_locs] = logic.UNASSIGNED