diff --git a/src/kyupy/__init__.py b/src/kyupy/__init__.py index ea967b4..06c9ab8 100644 --- a/src/kyupy/__init__.py +++ b/src/kyupy/__init__.py @@ -84,7 +84,7 @@ def batchrange(nitems, maxsize): for offset in range(0, nitems, maxsize): yield offset, min(nitems-offset, maxsize) - + class Timer: def __init__(self, s=0): self.s = s def __enter__(self): self.start_time = time.perf_counter(); return self @@ -107,6 +107,8 @@ class Timers: tmr = Timers(self.timers) for k, v in t.timers.items(): tmr.timers[k] += v return tmr + def dict(self): + return dict([(k, v.s) for k, v in self.timers.items()]) class Log: diff --git a/src/kyupy/sim.py b/src/kyupy/sim.py index 6bc5961..8808d9d 100644 --- a/src/kyupy/sim.py +++ b/src/kyupy/sim.py @@ -1,5 +1,5 @@ -import math +from collections import defaultdict from bisect import bisect, insort_left import numpy as np @@ -147,11 +147,7 @@ class SimOps: """ def __init__(self, circuit, c_caps=1, c_caps_min=1, c_reuse=False, strip_forks=False): self.circuit = circuit - dffs = [n for n in circuit.nodes if 'dff' in n.kind.lower()] - latches = [n for n in circuit.nodes if 'latch' in n.kind.lower()] - self.s_nodes = list(circuit.io_nodes) + dffs + latches - self.s_len = len(self.s_nodes) - keep_signals = not c_reuse + self.s_len = len(circuit.s_nodes) if isinstance(c_caps, int): c_caps = [c_caps] * len(circuit.lines) @@ -160,17 +156,17 @@ class SimOps: self.zero_idx = len(circuit.lines) self.tmp_idx = self.zero_idx + 1 self.ppi_offset = self.tmp_idx + 1 - self.ppo_offset = self.ppi_offset + len(self.s_nodes) - self.c_locs_len = self.ppo_offset + len(self.s_nodes) + self.ppo_offset = self.ppi_offset + self.s_len + self.c_locs_len = self.ppo_offset + self.s_len # translate circuit structure into self.ops ops = [] - interface_dict = dict((n, i) for i, n in enumerate(self.s_nodes)) + interface_dict = dict((n, i) for i, n in enumerate(circuit.s_nodes)) for n in circuit.topological_order(): if n in interface_dict: inp_idx = self.ppi_offset + interface_dict[n] if len(n.outs) > 0 and n.outs[0] is not None: # first output of a PI/PPI - ops.append((BUF1, n.outs[0].index, inp_idx, self.zero_idx, self.zero_idx, self.zero_idx)) + ops.append((BUF1, n.outs[0].index, inp_idx, self.zero_idx, self.zero_idx, self.zero_idx)) if 'dff' in n.kind.lower(): # second output of DFF is inverted if len(n.outs) > 1 and n.outs[1] is not None: ops.append((INV1, n.outs[1].index, inp_idx, self.zero_idx, self.zero_idx, self.zero_idx)) @@ -254,7 +250,7 @@ class SimOps: ref_count[self.tmp_idx] += 1 # allocate and keep memory for PI/PPI, keep memory for PO/PPO (allocated later) - for i, n in enumerate(self.s_nodes): + for i, n in enumerate(circuit.s_nodes): if len(n.outs) > 0: self.c_locs[self.ppi_offset + i], self.c_caps[self.ppi_offset + i] = h.alloc(c_caps_min), c_caps_min ref_count[self.ppi_offset + i] += 1 @@ -282,7 +278,7 @@ class SimOps: o_idx = op[1] cap = max(c_caps_min, c_caps[o_idx]) self.c_locs[o_idx], self.c_caps[o_idx] = h.alloc(cap), cap - if not keep_signals: + if c_reuse: for loc in free_list: h.free(loc) @@ -292,19 +288,19 @@ class SimOps: self.c_locs[lidx], self.c_caps[lidx] = self.c_locs[stem], self.c_caps[stem] # copy memory location to PO/PPO area - for i, n in enumerate(self.s_nodes): + for i, n in enumerate(circuit.s_nodes): if len(n.ins) > 0: self.c_locs[self.ppo_offset + i], self.c_caps[self.ppo_offset + i] = self.c_locs[n.ins[0]], self.c_caps[n.ins[0]] self.c_len = h.max_size - from collections import defaultdict - self.prim_counts = defaultdict(int) - for op, _, _, _, _, _ in self.ops: self.prim_counts[names[op]] += 1 + d = defaultdict(int) + for op, _, _, _, _, _ in self.ops: d[names[op]] += 1 + self.prim_counts = dict(d) self.pi_s_locs = np.flatnonzero(self.c_locs[self.ppi_offset+np.arange(len(self.circuit.io_nodes))] >= 0) self.po_s_locs = np.flatnonzero(self.c_locs[self.ppo_offset+np.arange(len(self.circuit.io_nodes))] >= 0) - self.ppio_s_locs = np.arange(len(self.circuit.io_nodes), len(self.s_nodes)) + self.ppio_s_locs = np.arange(len(self.circuit.io_nodes), self.s_len) self.pippi_s_locs = np.concatenate([self.pi_s_locs, self.ppio_s_locs]) self.poppo_s_locs = np.concatenate([self.po_s_locs, self.ppio_s_locs]) diff --git a/src/kyupy/wave_sim.py b/src/kyupy/wave_sim.py index 006b52e..297fd4d 100644 --- a/src/kyupy/wave_sim.py +++ b/src/kyupy/wave_sim.py @@ -311,6 +311,27 @@ class WaveSimCuda(WaveSim): self._block_dim = (32, 16) + def __getstate__(self): + state = self.__dict__.copy() + state['c'] = np.array(self.c) + state['s'] = np.array(self.s) + state['ops'] = np.array(self.ops) + state['c_locs'] = np.array(self.c_locs) + state['c_caps'] = np.array(self.c_caps) + state['delays'] = np.array(self.delays) + state['params'] = np.array(self.params) + return state + + def __setstate__(self, state): + self.__dict__.update(state) + self.c = cuda.to_device(self.c) + self.s = cuda.to_device(self.s) + self.ops = cuda.to_device(self.ops) + self.c_locs = cuda.to_device(self.c_locs) + self.c_caps = cuda.to_device(self.c_caps) + self.delays = cuda.to_device(self.delays) + self.params = cuda.to_device(self.params) + def s_to_c(self): grid_dim = self._grid_dim(self.sims, self.s_len) wave_assign_gpu[grid_dim, self._block_dim](self.c, self.s, self.c_locs, self.ppi_offset)