Browse Source

make sims pickleable

devel
Stefan Holst 2 years ago
parent
commit
03802ac9f8
  1. 2
      src/kyupy/__init__.py
  2. 28
      src/kyupy/sim.py
  3. 21
      src/kyupy/wave_sim.py

2
src/kyupy/__init__.py

@ -107,6 +107,8 @@ class Timers: @@ -107,6 +107,8 @@ class Timers:
tmr = Timers(self.timers)
for k, v in t.timers.items(): tmr.timers[k] += v
return tmr
def dict(self):
return dict([(k, v.s) for k, v in self.timers.items()])
class Log:

28
src/kyupy/sim.py

@ -1,5 +1,5 @@ @@ -1,5 +1,5 @@
import math
from collections import defaultdict
from bisect import bisect, insort_left
import numpy as np
@ -147,11 +147,7 @@ class SimOps: @@ -147,11 +147,7 @@ class SimOps:
"""
def __init__(self, circuit, c_caps=1, c_caps_min=1, c_reuse=False, strip_forks=False):
self.circuit = circuit
dffs = [n for n in circuit.nodes if 'dff' in n.kind.lower()]
latches = [n for n in circuit.nodes if 'latch' in n.kind.lower()]
self.s_nodes = list(circuit.io_nodes) + dffs + latches
self.s_len = len(self.s_nodes)
keep_signals = not c_reuse
self.s_len = len(circuit.s_nodes)
if isinstance(c_caps, int):
c_caps = [c_caps] * len(circuit.lines)
@ -160,12 +156,12 @@ class SimOps: @@ -160,12 +156,12 @@ class SimOps:
self.zero_idx = len(circuit.lines)
self.tmp_idx = self.zero_idx + 1
self.ppi_offset = self.tmp_idx + 1
self.ppo_offset = self.ppi_offset + len(self.s_nodes)
self.c_locs_len = self.ppo_offset + len(self.s_nodes)
self.ppo_offset = self.ppi_offset + self.s_len
self.c_locs_len = self.ppo_offset + self.s_len
# translate circuit structure into self.ops
ops = []
interface_dict = dict((n, i) for i, n in enumerate(self.s_nodes))
interface_dict = dict((n, i) for i, n in enumerate(circuit.s_nodes))
for n in circuit.topological_order():
if n in interface_dict:
inp_idx = self.ppi_offset + interface_dict[n]
@ -254,7 +250,7 @@ class SimOps: @@ -254,7 +250,7 @@ class SimOps:
ref_count[self.tmp_idx] += 1
# allocate and keep memory for PI/PPI, keep memory for PO/PPO (allocated later)
for i, n in enumerate(self.s_nodes):
for i, n in enumerate(circuit.s_nodes):
if len(n.outs) > 0:
self.c_locs[self.ppi_offset + i], self.c_caps[self.ppi_offset + i] = h.alloc(c_caps_min), c_caps_min
ref_count[self.ppi_offset + i] += 1
@ -282,7 +278,7 @@ class SimOps: @@ -282,7 +278,7 @@ class SimOps:
o_idx = op[1]
cap = max(c_caps_min, c_caps[o_idx])
self.c_locs[o_idx], self.c_caps[o_idx] = h.alloc(cap), cap
if not keep_signals:
if c_reuse:
for loc in free_list:
h.free(loc)
@ -292,19 +288,19 @@ class SimOps: @@ -292,19 +288,19 @@ class SimOps:
self.c_locs[lidx], self.c_caps[lidx] = self.c_locs[stem], self.c_caps[stem]
# copy memory location to PO/PPO area
for i, n in enumerate(self.s_nodes):
for i, n in enumerate(circuit.s_nodes):
if len(n.ins) > 0:
self.c_locs[self.ppo_offset + i], self.c_caps[self.ppo_offset + i] = self.c_locs[n.ins[0]], self.c_caps[n.ins[0]]
self.c_len = h.max_size
from collections import defaultdict
self.prim_counts = defaultdict(int)
for op, _, _, _, _, _ in self.ops: self.prim_counts[names[op]] += 1
d = defaultdict(int)
for op, _, _, _, _, _ in self.ops: d[names[op]] += 1
self.prim_counts = dict(d)
self.pi_s_locs = np.flatnonzero(self.c_locs[self.ppi_offset+np.arange(len(self.circuit.io_nodes))] >= 0)
self.po_s_locs = np.flatnonzero(self.c_locs[self.ppo_offset+np.arange(len(self.circuit.io_nodes))] >= 0)
self.ppio_s_locs = np.arange(len(self.circuit.io_nodes), len(self.s_nodes))
self.ppio_s_locs = np.arange(len(self.circuit.io_nodes), self.s_len)
self.pippi_s_locs = np.concatenate([self.pi_s_locs, self.ppio_s_locs])
self.poppo_s_locs = np.concatenate([self.po_s_locs, self.ppio_s_locs])

21
src/kyupy/wave_sim.py

@ -311,6 +311,27 @@ class WaveSimCuda(WaveSim): @@ -311,6 +311,27 @@ class WaveSimCuda(WaveSim):
self._block_dim = (32, 16)
def __getstate__(self):
state = self.__dict__.copy()
state['c'] = np.array(self.c)
state['s'] = np.array(self.s)
state['ops'] = np.array(self.ops)
state['c_locs'] = np.array(self.c_locs)
state['c_caps'] = np.array(self.c_caps)
state['delays'] = np.array(self.delays)
state['params'] = np.array(self.params)
return state
def __setstate__(self, state):
self.__dict__.update(state)
self.c = cuda.to_device(self.c)
self.s = cuda.to_device(self.s)
self.ops = cuda.to_device(self.ops)
self.c_locs = cuda.to_device(self.c_locs)
self.c_caps = cuda.to_device(self.c_caps)
self.delays = cuda.to_device(self.delays)
self.params = cuda.to_device(self.params)
def s_to_c(self):
grid_dim = self._grid_dim(self.sims, self.s_len)
wave_assign_gpu[grid_dim, self._block_dim](self.c, self.s, self.c_locs, self.ppi_offset)

Loading…
Cancel
Save