|
|
@ -1,5 +1,7 @@ |
|
|
|
import numpy as np |
|
|
|
|
|
|
|
import math |
|
|
|
import math |
|
|
|
|
|
|
|
from bisect import bisect, insort_left |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import numpy as np |
|
|
|
from . import numba |
|
|
|
from . import numba |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -8,8 +10,74 @@ TMAX_OVL = np.float32(1.1 * 2 ** 127) # almost np.PINF with overflow mark |
|
|
|
TMIN = np.float32(-2 ** 127) # almost np.NINF for 32-bit floating point values |
|
|
|
TMIN = np.float32(-2 ** 127) # almost np.NINF for 32-bit floating point values |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class Heap: |
|
|
|
|
|
|
|
def __init__(self): |
|
|
|
|
|
|
|
self.chunks = dict() # map start location to chunk size |
|
|
|
|
|
|
|
self.released = list() # chunks that were released |
|
|
|
|
|
|
|
self.current_size = 0 |
|
|
|
|
|
|
|
self.max_size = 0 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def alloc(self, size): |
|
|
|
|
|
|
|
for idx, loc in enumerate(self.released): |
|
|
|
|
|
|
|
if self.chunks[loc] == size: |
|
|
|
|
|
|
|
del self.released[idx] |
|
|
|
|
|
|
|
return loc |
|
|
|
|
|
|
|
elif self.chunks[loc] > size: # split chunk |
|
|
|
|
|
|
|
chunksize = self.chunks[loc] |
|
|
|
|
|
|
|
self.chunks[loc] = size |
|
|
|
|
|
|
|
self.chunks[loc + size] = chunksize - size |
|
|
|
|
|
|
|
self.released[idx] = loc + size # move released pointer: loc -> loc+size |
|
|
|
|
|
|
|
return loc |
|
|
|
|
|
|
|
# no previously released chunk; make new one |
|
|
|
|
|
|
|
loc = self.current_size |
|
|
|
|
|
|
|
self.chunks[loc] = size |
|
|
|
|
|
|
|
self.current_size += size |
|
|
|
|
|
|
|
self.max_size = max(self.max_size, self.current_size) |
|
|
|
|
|
|
|
return loc |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def free(self, loc): |
|
|
|
|
|
|
|
size = self.chunks[loc] |
|
|
|
|
|
|
|
if loc + size == self.current_size: # end of managed area, remove chunk |
|
|
|
|
|
|
|
del self.chunks[loc] |
|
|
|
|
|
|
|
self.current_size -= size |
|
|
|
|
|
|
|
# check and remove prev chunk if free |
|
|
|
|
|
|
|
if len(self.released) > 0: |
|
|
|
|
|
|
|
prev = self.released[-1] |
|
|
|
|
|
|
|
if prev + self.chunks[prev] == self.current_size: |
|
|
|
|
|
|
|
chunksize = self.chunks[prev] |
|
|
|
|
|
|
|
del self.chunks[prev] |
|
|
|
|
|
|
|
del self.released[-1] |
|
|
|
|
|
|
|
self.current_size -= chunksize |
|
|
|
|
|
|
|
return |
|
|
|
|
|
|
|
released_idx = bisect(self.released, loc) |
|
|
|
|
|
|
|
if released_idx < len(self.released) and loc + size == self.released[released_idx]: # next chunk is free, merge |
|
|
|
|
|
|
|
chunksize = size + self.chunks[loc + size] |
|
|
|
|
|
|
|
del self.chunks[loc + size] |
|
|
|
|
|
|
|
self.chunks[loc] = chunksize |
|
|
|
|
|
|
|
size = self.chunks[loc] |
|
|
|
|
|
|
|
self.released[released_idx] = loc |
|
|
|
|
|
|
|
else: |
|
|
|
|
|
|
|
insort_left(self.released, loc) # put in a new release |
|
|
|
|
|
|
|
if released_idx > 0: # check if previous chunk is free |
|
|
|
|
|
|
|
prev = self.released[released_idx - 1] |
|
|
|
|
|
|
|
if prev + self.chunks[prev] == loc: # previous chunk is adjacent to freed one, merge |
|
|
|
|
|
|
|
chunksize = size + self.chunks[prev] |
|
|
|
|
|
|
|
del self.chunks[loc] |
|
|
|
|
|
|
|
self.chunks[prev] = chunksize |
|
|
|
|
|
|
|
del self.released[released_idx] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def __repr__(self): |
|
|
|
|
|
|
|
r = [] |
|
|
|
|
|
|
|
for loc in sorted(self.chunks.keys()): |
|
|
|
|
|
|
|
size = self.chunks[loc] |
|
|
|
|
|
|
|
released_idx = bisect(self.released, loc) |
|
|
|
|
|
|
|
is_released = released_idx > 0 and len(self.released) > 0 and self.released[released_idx - 1] == loc |
|
|
|
|
|
|
|
r.append(f'{loc:5d}: {"free" if is_released else "used"} {size}') |
|
|
|
|
|
|
|
return "\n".join(r) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class WaveSim: |
|
|
|
class WaveSim: |
|
|
|
def __init__(self, circuit, timing, sims=8, wavecaps=16): |
|
|
|
def __init__(self, circuit, timing, sims=8, wavecaps=16, strip_forks=False, keep_waveforms=True): |
|
|
|
self.circuit = circuit |
|
|
|
self.circuit = circuit |
|
|
|
self.sims = sims |
|
|
|
self.sims = sims |
|
|
|
self.overflows = 0 |
|
|
|
self.overflows = 0 |
|
|
@ -24,67 +92,37 @@ class WaveSim: |
|
|
|
|
|
|
|
|
|
|
|
intf_wavecap = 4 # sufficient for storing only 1 transition. |
|
|
|
intf_wavecap = 4 # sufficient for storing only 1 transition. |
|
|
|
|
|
|
|
|
|
|
|
# state allocation table. maps line and interface indices to self.state memory locations |
|
|
|
# indices for state allocation table (sat) |
|
|
|
|
|
|
|
|
|
|
|
self.sat = np.zeros((len(circuit.lines) + 2 + 2 * len(self.interface), 3), dtype='int') |
|
|
|
|
|
|
|
self.sat[:, 0] = -1 |
|
|
|
|
|
|
|
filled = 0 |
|
|
|
|
|
|
|
for lidx, cap in enumerate(wavecaps): |
|
|
|
|
|
|
|
self.sat[lidx] = filled, cap, 0 |
|
|
|
|
|
|
|
filled += cap |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
self.zero_idx = len(circuit.lines) |
|
|
|
self.zero_idx = len(circuit.lines) |
|
|
|
self.sat[self.zero_idx] = filled, intf_wavecap, 0 |
|
|
|
|
|
|
|
filled += intf_wavecap |
|
|
|
|
|
|
|
self.tmp_idx = self.zero_idx + 1 |
|
|
|
self.tmp_idx = self.zero_idx + 1 |
|
|
|
self.sat[self.tmp_idx] = filled, intf_wavecap, 0 |
|
|
|
|
|
|
|
filled += intf_wavecap |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
self.ppi_offset = self.tmp_idx + 1 |
|
|
|
self.ppi_offset = self.tmp_idx + 1 |
|
|
|
self.ppo_offset = self.ppi_offset + len(self.interface) |
|
|
|
self.ppo_offset = self.ppi_offset + len(self.interface) |
|
|
|
for i, n in enumerate(self.interface): |
|
|
|
self.sat_length = self.ppo_offset + len(self.interface) |
|
|
|
if len(n.outs) > 0: |
|
|
|
|
|
|
|
self.sat[self.ppi_offset + i] = filled, intf_wavecap, 0 |
|
|
|
|
|
|
|
filled += intf_wavecap |
|
|
|
|
|
|
|
if len(n.ins) > 0: |
|
|
|
|
|
|
|
self.sat[self.ppo_offset + i] = self.sat[n.ins[0].index] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# pad timing |
|
|
|
|
|
|
|
self.timing = np.zeros((len(self.sat), 2, 2)) |
|
|
|
|
|
|
|
self.timing[:len(timing)] = timing |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# allocate self.state |
|
|
|
# translate circuit structure into self.ops |
|
|
|
self.state = np.zeros((filled, sims), dtype='float32') + TMAX |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# generate self.ops |
|
|
|
|
|
|
|
ops = [] |
|
|
|
ops = [] |
|
|
|
interface_dict = dict([(n, i) for i, n in enumerate(self.interface)]) |
|
|
|
interface_dict = dict([(n, i) for i, n in enumerate(self.interface)]) |
|
|
|
for n in circuit.topological_order(): |
|
|
|
for n in circuit.topological_order(): |
|
|
|
if n in interface_dict: |
|
|
|
if n in interface_dict: |
|
|
|
inp_idx = self.ppi_offset + interface_dict[n] |
|
|
|
inp_idx = self.ppi_offset + interface_dict[n] |
|
|
|
if len(n.outs) > 0 and n.outs[0] is not None: |
|
|
|
if len(n.outs) > 0 and n.outs[0] is not None: # first output of a PI/PPI |
|
|
|
ops.append((0b1010, n.outs[0].index, inp_idx, self.zero_idx)) |
|
|
|
ops.append((0b1010, n.outs[0].index, inp_idx, self.zero_idx)) |
|
|
|
if 'dff' in n.kind.lower(): |
|
|
|
if 'dff' in n.kind.lower(): # second output of DFF is inverted |
|
|
|
if len(n.outs) > 1 and n.outs[1] is not None: |
|
|
|
if len(n.outs) > 1 and n.outs[1] is not None: |
|
|
|
ops.append((0b0101, n.outs[1].index, inp_idx, self.zero_idx)) |
|
|
|
ops.append((0b0101, n.outs[1].index, inp_idx, self.zero_idx)) |
|
|
|
else: |
|
|
|
else: # if not DFF, no output is inverted. |
|
|
|
for o_line in n.outs[1:]: |
|
|
|
for o_line in n.outs[1:]: |
|
|
|
if o_line is not None: |
|
|
|
if o_line is not None: |
|
|
|
ops.append((0b1010, o_line.index, inp_idx, self.zero_idx)) |
|
|
|
ops.append((0b1010, o_line.index, inp_idx, self.zero_idx)) |
|
|
|
else: |
|
|
|
else: # regular node, not PI/PPI or PO/PPO |
|
|
|
o0_idx = self.tmp_idx |
|
|
|
o0_idx = n.outs[0].index if len(n.outs) > 0 and n.outs[0] is not None else self.tmp_idx |
|
|
|
i0_idx = self.zero_idx |
|
|
|
i0_idx = n.ins[0].index if len(n.ins) > 0 and n.ins[0] is not None else self.zero_idx |
|
|
|
i1_idx = self.zero_idx |
|
|
|
i1_idx = n.ins[1].index if len(n.ins) > 1 and n.ins[1] is not None else self.zero_idx |
|
|
|
if len(n.outs) > 0 and n.outs[0] is not None: |
|
|
|
|
|
|
|
o0_idx = n.outs[0].index |
|
|
|
|
|
|
|
else: |
|
|
|
|
|
|
|
print(f'no outputs for {n}') |
|
|
|
|
|
|
|
if len(n.ins) > 0 and n.ins[0] is not None: i0_idx = n.ins[0].index |
|
|
|
|
|
|
|
if len(n.ins) > 1 and n.ins[1] is not None: i1_idx = n.ins[1].index |
|
|
|
|
|
|
|
kind = n.kind.lower() |
|
|
|
kind = n.kind.lower() |
|
|
|
if kind == '__fork__': |
|
|
|
if kind == '__fork__': |
|
|
|
for o_line in n.outs: |
|
|
|
if not strip_forks: |
|
|
|
ops.append((0b1010, o_line.index, i0_idx, i1_idx)) |
|
|
|
for o_line in n.outs: |
|
|
|
|
|
|
|
ops.append((0b1010, o_line.index, i0_idx, i1_idx)) |
|
|
|
elif kind.startswith('nand'): |
|
|
|
elif kind.startswith('nand'): |
|
|
|
ops.append((0b0111, o0_idx, i0_idx, i1_idx)) |
|
|
|
ops.append((0b0111, o0_idx, i0_idx, i1_idx)) |
|
|
|
elif kind.startswith('nor'): |
|
|
|
elif kind.startswith('nor'): |
|
|
@ -109,18 +147,91 @@ class WaveSim: |
|
|
|
print('unknown gate type', kind) |
|
|
|
print('unknown gate type', kind) |
|
|
|
self.ops = np.asarray(ops, dtype='int32') |
|
|
|
self.ops = np.asarray(ops, dtype='int32') |
|
|
|
|
|
|
|
|
|
|
|
# generate level data |
|
|
|
# create a map from fanout lines to stem lines for fork stripping |
|
|
|
levels = np.zeros(len(self.sat), dtype='int32') |
|
|
|
stems = np.zeros(self.sat_length, dtype='int32') - 1 # default to -1: 'no fanout line' |
|
|
|
|
|
|
|
if strip_forks: |
|
|
|
|
|
|
|
for f in circuit.forks.values(): |
|
|
|
|
|
|
|
prev_line = f.ins[0] |
|
|
|
|
|
|
|
while prev_line.driver.kind == '__fork__': |
|
|
|
|
|
|
|
prev_line = prev_line.driver.ins[0] |
|
|
|
|
|
|
|
stem_idx = prev_line.index |
|
|
|
|
|
|
|
for ol in f.outs: |
|
|
|
|
|
|
|
stems[ol.index] = stem_idx |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# calculate level (distance from PI/PPI) and reference count for each line |
|
|
|
|
|
|
|
levels = np.zeros(self.sat_length, dtype='int32') |
|
|
|
|
|
|
|
ref_count = np.zeros(self.sat_length, dtype='int32') |
|
|
|
level_starts = [0] |
|
|
|
level_starts = [0] |
|
|
|
current_level = 1 |
|
|
|
current_level = 1 |
|
|
|
for i, op in enumerate(self.ops): |
|
|
|
for i, op in enumerate(self.ops): |
|
|
|
if levels[op[2]] >= current_level or levels[op[3]] >= current_level: |
|
|
|
# if we fork-strip, always take the stems for determining fan-in level |
|
|
|
|
|
|
|
i0_idx = stems[op[2]] if stems[op[2]] >= 0 else op[2] |
|
|
|
|
|
|
|
i1_idx = stems[op[3]] if stems[op[3]] >= 0 else op[3] |
|
|
|
|
|
|
|
if levels[i0_idx] >= current_level or levels[i1_idx] >= current_level: |
|
|
|
current_level += 1 |
|
|
|
current_level += 1 |
|
|
|
level_starts.append(i) |
|
|
|
level_starts.append(i) |
|
|
|
levels[op[1]] = current_level |
|
|
|
levels[op[1]] = current_level # set level of the output line |
|
|
|
|
|
|
|
ref_count[i0_idx] += 1 |
|
|
|
|
|
|
|
ref_count[i1_idx] += 1 |
|
|
|
self.level_starts = np.asarray(level_starts, dtype='int32') |
|
|
|
self.level_starts = np.asarray(level_starts, dtype='int32') |
|
|
|
self.level_stops = np.asarray(level_starts[1:] + [len(self.ops)], dtype='int32') |
|
|
|
self.level_stops = np.asarray(level_starts[1:] + [len(self.ops)], dtype='int32') |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# state allocation table. maps line and interface indices to self.state memory locations |
|
|
|
|
|
|
|
self.sat = np.zeros((self.sat_length, 3), dtype='int') |
|
|
|
|
|
|
|
self.sat[:, 0] = -1 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
h = Heap() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# allocate and keep memory for special fields |
|
|
|
|
|
|
|
self.sat[self.zero_idx] = h.alloc(intf_wavecap), intf_wavecap, 0 |
|
|
|
|
|
|
|
self.sat[self.tmp_idx] = h.alloc(intf_wavecap), intf_wavecap, 0 |
|
|
|
|
|
|
|
ref_count[self.zero_idx] += 1 |
|
|
|
|
|
|
|
ref_count[self.tmp_idx] += 1 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# allocate and keep memory for PI/PPI, keep memory for PO/PPO (allocated later) |
|
|
|
|
|
|
|
for i, n in enumerate(self.interface): |
|
|
|
|
|
|
|
if len(n.outs) > 0: |
|
|
|
|
|
|
|
self.sat[self.ppi_offset + i] = h.alloc(intf_wavecap), intf_wavecap, 0 |
|
|
|
|
|
|
|
ref_count[self.ppi_offset + i] += 1 |
|
|
|
|
|
|
|
if len(n.ins) > 0: |
|
|
|
|
|
|
|
i0_idx = stems[n.ins[0].index] if stems[n.ins[0].index] >= 0 else n.ins[0].index |
|
|
|
|
|
|
|
ref_count[i0_idx] += 1 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# allocate memory for the rest of the circuit |
|
|
|
|
|
|
|
for op_start, op_stop in zip(self.level_starts, self.level_stops): |
|
|
|
|
|
|
|
free_list = [] |
|
|
|
|
|
|
|
for op in self.ops[op_start:op_stop]: |
|
|
|
|
|
|
|
# if we fork-strip, always take the stems |
|
|
|
|
|
|
|
i0_idx = stems[op[2]] if stems[op[2]] >= 0 else op[2] |
|
|
|
|
|
|
|
i1_idx = stems[op[3]] if stems[op[3]] >= 0 else op[3] |
|
|
|
|
|
|
|
ref_count[i0_idx] -= 1 |
|
|
|
|
|
|
|
ref_count[i1_idx] -= 1 |
|
|
|
|
|
|
|
if ref_count[i0_idx] <= 0: free_list.append(self.sat[i0_idx, 0]) |
|
|
|
|
|
|
|
if ref_count[i1_idx] <= 0: free_list.append(self.sat[i1_idx, 0]) |
|
|
|
|
|
|
|
o_idx = op[1] |
|
|
|
|
|
|
|
cap = wavecaps[o_idx] |
|
|
|
|
|
|
|
self.sat[o_idx] = h.alloc(cap), cap, 0 |
|
|
|
|
|
|
|
if not keep_waveforms: |
|
|
|
|
|
|
|
for loc in free_list: |
|
|
|
|
|
|
|
h.free(loc) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# copy memory location and capacity from stems to fanout lines |
|
|
|
|
|
|
|
for lidx, stem in enumerate(stems): |
|
|
|
|
|
|
|
if stem >= 0: # if at a fanout line |
|
|
|
|
|
|
|
self.sat[lidx] = self.sat[stem] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# copy memory location to PO/PPO area |
|
|
|
|
|
|
|
for i, n in enumerate(self.interface): |
|
|
|
|
|
|
|
if len(n.ins) > 0: |
|
|
|
|
|
|
|
self.sat[self.ppo_offset + i] = self.sat[n.ins[0].index] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# pad timing |
|
|
|
|
|
|
|
self.timing = np.zeros((self.sat_length, 2, 2)) |
|
|
|
|
|
|
|
self.timing[:len(timing)] = timing |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# allocate self.state |
|
|
|
|
|
|
|
self.state = np.zeros((h.max_size, sims), dtype='float32') + TMAX |
|
|
|
|
|
|
|
|
|
|
|
m1 = np.array([2 ** x for x in range(7, -1, -1)], dtype='uint8') |
|
|
|
m1 = np.array([2 ** x for x in range(7, -1, -1)], dtype='uint8') |
|
|
|
m0 = ~m1 |
|
|
|
m0 = ~m1 |
|
|
|
self.mask = np.rollaxis(np.vstack((m0, m1)), 1) |
|
|
|
self.mask = np.rollaxis(np.vstack((m0, m1)), 1) |
|
|
|