Stefan Holst
2 years ago
1 changed files with 305 additions and 0 deletions
@ -0,0 +1,305 @@ |
|||||||
|
|
||||||
|
import math |
||||||
|
from bisect import bisect, insort_left |
||||||
|
|
||||||
|
import numpy as np |
||||||
|
|
||||||
|
class SimPrim: |
||||||
|
BUF1 = 0b1010_1010_1010_1010 |
||||||
|
INV1 = 0b0101_0101_0101_0101 |
||||||
|
|
||||||
|
NAND4 = 0b0111_1111_1111_1111 |
||||||
|
NAND3 = 0b0111_1111_0111_1111 |
||||||
|
NAND2 = 0b0111_0111_0111_0111 |
||||||
|
|
||||||
|
NOR4 = 0b0000_0000_0000_0001 |
||||||
|
NOR3 = 0b0000_0001_0000_0001 |
||||||
|
NOR2 = 0b0001_0001_0001_0001 |
||||||
|
|
||||||
|
AND4 = 0b1000_0000_0000_0000 |
||||||
|
AND3 = 0b1000_0000_1000_0000 |
||||||
|
AND2 = 0b1000_1000_1000_1000 |
||||||
|
|
||||||
|
OR4 = 0b1111_1111_1111_1110 |
||||||
|
OR3 = 0b1111_1110_1111_1110 |
||||||
|
OR2 = 0b1110_1110_1110_1110 |
||||||
|
|
||||||
|
XOR4 = 0b0110_1001_1001_0110 |
||||||
|
XOR3 = 0b1001_0110_1001_0110 |
||||||
|
XOR2 = 0b0110_0110_0110_0110 |
||||||
|
|
||||||
|
XNOR4 = 0b1001_0110_0110_1001 |
||||||
|
XNOR3 = 0b0110_1001_0110_1001 |
||||||
|
XNOR2 = 0b1001_1001_1001_1001 |
||||||
|
|
||||||
|
AO22 = 0b1111_1000_1000_1000 |
||||||
|
AOI22 = 0b0000_0111_0111_0111 |
||||||
|
AO21 = 0b1110_1010_1110_1010 |
||||||
|
AOI21 = 0b0001_0101_0001_0101 |
||||||
|
OA22 = 0b1110_1110_1110_0000 |
||||||
|
OAI22 = 0b0001_0001_0001_1111 |
||||||
|
OA21 = 0b1010_1000_1010_1000 |
||||||
|
OAI21 = 0b0101_0111_0101_0111 |
||||||
|
MUX21 = 0b1110_0100_1110_0100 |
||||||
|
|
||||||
|
kind_prefixes = { |
||||||
|
'nand': (NAND4, NAND3, NAND2), |
||||||
|
'nor': (NOR4, NOR3, NOR2), |
||||||
|
'and': (AND4, AND3, AND2), |
||||||
|
'or': (OR4, OR3, OR2), |
||||||
|
'xor': (XOR4, XOR3, XOR2), |
||||||
|
'xnor': (XNOR4, XNOR3, XNOR2), |
||||||
|
|
||||||
|
'not': (INV1, INV1, INV1), |
||||||
|
'inv': (INV1, INV1, INV1), |
||||||
|
'ibuf': (INV1, INV1, INV1), |
||||||
|
'__const1__': (INV1, INV1, INV1), |
||||||
|
'tieh': (INV1, INV1, INV1), |
||||||
|
|
||||||
|
'buf': (BUF1, BUF1, BUF1), |
||||||
|
'nbuf': (BUF1, BUF1, BUF1), |
||||||
|
'delln': (BUF1, BUF1, BUF1), |
||||||
|
'__const0__': (BUF1, BUF1, BUF1), |
||||||
|
'tiel': (BUF1, BUF1, BUF1), |
||||||
|
|
||||||
|
'ao22': (AO22, AO22, AO22), |
||||||
|
'aoi22': (AOI22, AOI22, AOI22), |
||||||
|
'ao21': (AO21, AO21, AO21), |
||||||
|
'aoi21': (AOI21, AOI21, AOI21), |
||||||
|
|
||||||
|
'oa22': (OA22, OA22, OA22), |
||||||
|
'oai22': (OAI22, OAI22, OAI22), |
||||||
|
'oa21': (OA21, OA21, OA21), |
||||||
|
'oai21': (OAI21, OAI21, OAI21), |
||||||
|
|
||||||
|
'mux21': (MUX21, MUX21, MUX21), |
||||||
|
} |
||||||
|
|
||||||
|
@classmethod |
||||||
|
def names(cls): |
||||||
|
return dict([(v, k) for k, v in cls.__dict__.items() if isinstance(v, int)]) |
||||||
|
|
||||||
|
class Heap: |
||||||
|
def __init__(self): |
||||||
|
self.chunks = dict() # map start location to chunk size |
||||||
|
self.released = list() # chunks that were released |
||||||
|
self.current_size = 0 |
||||||
|
self.max_size = 0 |
||||||
|
|
||||||
|
def alloc(self, size): |
||||||
|
for idx, loc in enumerate(self.released): |
||||||
|
if self.chunks[loc] == size: |
||||||
|
del self.released[idx] |
||||||
|
return loc |
||||||
|
if self.chunks[loc] > size: # split chunk |
||||||
|
chunksize = self.chunks[loc] |
||||||
|
self.chunks[loc] = size |
||||||
|
self.chunks[loc + size] = chunksize - size |
||||||
|
self.released[idx] = loc + size # move released pointer: loc -> loc+size |
||||||
|
return loc |
||||||
|
# no previously released chunk; make new one |
||||||
|
loc = self.current_size |
||||||
|
self.chunks[loc] = size |
||||||
|
self.current_size += size |
||||||
|
self.max_size = max(self.max_size, self.current_size) |
||||||
|
return loc |
||||||
|
|
||||||
|
def free(self, loc): |
||||||
|
size = self.chunks[loc] |
||||||
|
if loc + size == self.current_size: # end of managed area, remove chunk |
||||||
|
del self.chunks[loc] |
||||||
|
self.current_size -= size |
||||||
|
# check and remove prev chunk if free |
||||||
|
if len(self.released) > 0: |
||||||
|
prev = self.released[-1] |
||||||
|
if prev + self.chunks[prev] == self.current_size: |
||||||
|
chunksize = self.chunks[prev] |
||||||
|
del self.chunks[prev] |
||||||
|
del self.released[-1] |
||||||
|
self.current_size -= chunksize |
||||||
|
return |
||||||
|
released_idx = bisect(self.released, loc) |
||||||
|
if released_idx < len(self.released) and loc + size == self.released[released_idx]: # next chunk is free, merge |
||||||
|
chunksize = size + self.chunks[loc + size] |
||||||
|
del self.chunks[loc + size] |
||||||
|
self.chunks[loc] = chunksize |
||||||
|
size = self.chunks[loc] |
||||||
|
self.released[released_idx] = loc |
||||||
|
else: |
||||||
|
insort_left(self.released, loc) # put in a new release |
||||||
|
if released_idx > 0: # check if previous chunk is free |
||||||
|
prev = self.released[released_idx - 1] |
||||||
|
if prev + self.chunks[prev] == loc: # previous chunk is adjacent to freed one, merge |
||||||
|
chunksize = size + self.chunks[prev] |
||||||
|
del self.chunks[loc] |
||||||
|
self.chunks[prev] = chunksize |
||||||
|
del self.released[released_idx] |
||||||
|
|
||||||
|
def __repr__(self): |
||||||
|
r = [] |
||||||
|
for loc in sorted(self.chunks.keys()): |
||||||
|
size = self.chunks[loc] |
||||||
|
released_idx = bisect(self.released, loc) |
||||||
|
is_released = released_idx > 0 and len(self.released) > 0 and self.released[released_idx - 1] == loc |
||||||
|
r.append(f'{loc:5d}: {"free" if is_released else "used"} {size}') |
||||||
|
return "\n".join(r) |
||||||
|
|
||||||
|
|
||||||
|
class Schedule: |
||||||
|
"""A static scheduler that translates a Circuit into a topologically sorted list of basic logic operations (self.ops) and |
||||||
|
a value allocation table (self.vat) for use in simulators. |
||||||
|
|
||||||
|
:param circuit: The circuit to create a schedule for. |
||||||
|
:param strip_forks: If enabled, the scheduler will not include fork nodes to safe simulation time. |
||||||
|
Stripping forks will cause interconnect delay annotations of lines read by fork nodes to be ignored. |
||||||
|
:param keep_signals: If disabled, memory of intermediate signal waveforms will be re-used. This greatly reduces |
||||||
|
memory footprint, but intermediate signal waveforms become unaccessible after a propagation. |
||||||
|
""" |
||||||
|
def __init__(self, circuit, strip_forks=False, keep_signals=True, signal_caps=1): |
||||||
|
self.circuit = circuit |
||||||
|
self.interface = list(circuit.interface) + [n for n in circuit.nodes if 'dff' in n.kind.lower()] |
||||||
|
|
||||||
|
if isinstance(signal_caps, int): |
||||||
|
signal_caps = [signal_caps] * len(circuit.lines) |
||||||
|
|
||||||
|
# indices for state allocation table (sat) |
||||||
|
self.zero_idx = len(circuit.lines) |
||||||
|
self.tmp_idx = self.zero_idx + 1 |
||||||
|
self.ppi_offset = self.tmp_idx + 1 |
||||||
|
self.ppo_offset = self.ppi_offset + len(self.interface) |
||||||
|
self.vat_length = self.ppo_offset + len(self.interface) |
||||||
|
|
||||||
|
# translate circuit structure into self.ops |
||||||
|
ops = [] |
||||||
|
interface_dict = dict((n, i) for i, n in enumerate(self.interface)) |
||||||
|
for n in circuit.topological_order(): |
||||||
|
if n in interface_dict: |
||||||
|
inp_idx = self.ppi_offset + interface_dict[n] |
||||||
|
if len(n.outs) > 0 and n.outs[0] is not None: # first output of a PI/PPI |
||||||
|
ops.append((SimPrim.BUF1, n.outs[0].index, inp_idx, self.zero_idx, self.zero_idx, self.zero_idx)) |
||||||
|
if 'dff' in n.kind.lower(): # second output of DFF is inverted |
||||||
|
if len(n.outs) > 1 and n.outs[1] is not None: |
||||||
|
ops.append((SimPrim.INV1, n.outs[1].index, inp_idx, self.zero_idx, self.zero_idx, self.zero_idx)) |
||||||
|
else: # if not DFF, no output is inverted. |
||||||
|
for o_line in n.outs[1:]: |
||||||
|
if o_line is not None: |
||||||
|
ops.append((SimPrim.BUF1, o_line.index, inp_idx, self.zero_idx, self.zero_idx, self.zero_idx)) |
||||||
|
continue |
||||||
|
# regular node, not PI/PPI or PO/PPO |
||||||
|
o0_idx = n.outs[0].index if len(n.outs) > 0 and n.outs[0] is not None else self.tmp_idx |
||||||
|
i0_idx = n.ins[0].index if len(n.ins) > 0 and n.ins[0] is not None else self.zero_idx |
||||||
|
i1_idx = n.ins[1].index if len(n.ins) > 1 and n.ins[1] is not None else self.zero_idx |
||||||
|
i2_idx = n.ins[2].index if len(n.ins) > 2 and n.ins[2] is not None else self.zero_idx |
||||||
|
i3_idx = n.ins[3].index if len(n.ins) > 3 and n.ins[3] is not None else self.zero_idx |
||||||
|
kind = n.kind.lower() |
||||||
|
if kind == '__fork__': |
||||||
|
if not strip_forks: |
||||||
|
for o_line in n.outs: |
||||||
|
if o_line is not None: |
||||||
|
ops.append((SimPrim.BUF1, o_line.index, i0_idx, i1_idx, i2_idx, i3_idx)) |
||||||
|
continue |
||||||
|
sp = None |
||||||
|
for prefix, prims in SimPrim.kind_prefixes.items(): |
||||||
|
if kind.startswith(prefix): |
||||||
|
sp = prims[0] |
||||||
|
if i3_idx == self.zero_idx: |
||||||
|
sp = prims[1] |
||||||
|
if i2_idx == self.zero_idx: |
||||||
|
sp = prims[2] |
||||||
|
break |
||||||
|
if sp is None: |
||||||
|
print('unknown gate type', kind) |
||||||
|
else: |
||||||
|
ops.append((sp, o0_idx, i0_idx, i1_idx, i2_idx, i3_idx)) |
||||||
|
|
||||||
|
self.ops = np.asarray(ops, dtype='int32') |
||||||
|
|
||||||
|
# create a map from fanout lines to stem lines for fork stripping |
||||||
|
stems = np.zeros(self.vat_length, dtype='int32') - 1 # default to -1: 'no fanout line' |
||||||
|
if strip_forks: |
||||||
|
for f in circuit.forks.values(): |
||||||
|
prev_line = f.ins[0] |
||||||
|
while prev_line.driver.kind == '__fork__': |
||||||
|
prev_line = prev_line.driver.ins[0] |
||||||
|
stem_idx = prev_line.index |
||||||
|
for ol in f.outs: |
||||||
|
stems[ol] = stem_idx |
||||||
|
|
||||||
|
# calculate level (distance from PI/PPI) and reference count for each line |
||||||
|
levels = np.zeros(self.vat_length, dtype='int32') |
||||||
|
ref_count = np.zeros(self.vat_length, dtype='int32') |
||||||
|
level_starts = [0] |
||||||
|
current_level = 1 |
||||||
|
for i, op in enumerate(self.ops): |
||||||
|
# if we fork-strip, always take the stems for determining fan-in level |
||||||
|
i0_idx = stems[op[2]] if stems[op[2]] >= 0 else op[2] |
||||||
|
i1_idx = stems[op[3]] if stems[op[3]] >= 0 else op[3] |
||||||
|
i2_idx = stems[op[4]] if stems[op[4]] >= 0 else op[4] |
||||||
|
i3_idx = stems[op[5]] if stems[op[5]] >= 0 else op[5] |
||||||
|
if levels[i0_idx] >= current_level or levels[i1_idx] >= current_level or levels[i2_idx] >= current_level or levels[i3_idx] >= current_level: |
||||||
|
current_level += 1 |
||||||
|
level_starts.append(i) |
||||||
|
levels[op[1]] = current_level # set level of the output line |
||||||
|
ref_count[i0_idx] += 1 |
||||||
|
ref_count[i1_idx] += 1 |
||||||
|
ref_count[i2_idx] += 1 |
||||||
|
ref_count[i3_idx] += 1 |
||||||
|
self.level_starts = np.asarray(level_starts, dtype='int32') |
||||||
|
self.level_stops = np.asarray(level_starts[1:] + [len(self.ops)], dtype='int32') |
||||||
|
|
||||||
|
# state allocation table. maps line and interface indices to self.state memory locations |
||||||
|
self.vat = np.zeros((self.vat_length, 3), dtype='int') |
||||||
|
self.vat[:, 0] = -1 |
||||||
|
|
||||||
|
h = Heap() |
||||||
|
|
||||||
|
# allocate and keep memory for special fields |
||||||
|
self.vat[self.zero_idx] = h.alloc(1), 1, 0 |
||||||
|
self.vat[self.tmp_idx] = h.alloc(1), 1, 0 |
||||||
|
ref_count[self.zero_idx] += 1 |
||||||
|
ref_count[self.tmp_idx] += 1 |
||||||
|
|
||||||
|
# allocate and keep memory for PI/PPI, keep memory for PO/PPO (allocated later) |
||||||
|
for i, n in enumerate(self.interface): |
||||||
|
if len(n.outs) > 0: |
||||||
|
self.vat[self.ppi_offset + i] = h.alloc(1), 1, 0 |
||||||
|
ref_count[self.ppi_offset + i] += 1 |
||||||
|
if len(n.ins) > 0: |
||||||
|
i0_idx = stems[n.ins[0]] if stems[n.ins[0]] >= 0 else n.ins[0] |
||||||
|
ref_count[i0_idx] += 1 |
||||||
|
|
||||||
|
# allocate memory for the rest of the circuit |
||||||
|
for op_start, op_stop in zip(self.level_starts, self.level_stops): |
||||||
|
free_list = [] |
||||||
|
for op in self.ops[op_start:op_stop]: |
||||||
|
# if we fork-strip, always take the stems |
||||||
|
i0_idx = stems[op[2]] if stems[op[2]] >= 0 else op[2] |
||||||
|
i1_idx = stems[op[3]] if stems[op[3]] >= 0 else op[3] |
||||||
|
i2_idx = stems[op[4]] if stems[op[4]] >= 0 else op[4] |
||||||
|
i3_idx = stems[op[5]] if stems[op[5]] >= 0 else op[5] |
||||||
|
ref_count[i0_idx] -= 1 |
||||||
|
ref_count[i1_idx] -= 1 |
||||||
|
ref_count[i2_idx] -= 1 |
||||||
|
ref_count[i3_idx] -= 1 |
||||||
|
if ref_count[i0_idx] <= 0: free_list.append(self.vat[i0_idx, 0]) |
||||||
|
if ref_count[i1_idx] <= 0: free_list.append(self.vat[i1_idx, 0]) |
||||||
|
if ref_count[i2_idx] <= 0: free_list.append(self.vat[i2_idx, 0]) |
||||||
|
if ref_count[i3_idx] <= 0: free_list.append(self.vat[i3_idx, 0]) |
||||||
|
o_idx = op[1] |
||||||
|
cap = signal_caps[o_idx] |
||||||
|
self.vat[o_idx] = h.alloc(cap), cap, 0 |
||||||
|
if not keep_signals: |
||||||
|
for loc in free_list: |
||||||
|
h.free(loc) |
||||||
|
|
||||||
|
# copy memory location and capacity from stems to fanout lines |
||||||
|
for lidx, stem in enumerate(stems): |
||||||
|
if stem >= 0: # if at a fanout line |
||||||
|
self.vat[lidx] = self.vat[stem] |
||||||
|
|
||||||
|
# copy memory location to PO/PPO area |
||||||
|
for i, n in enumerate(self.interface): |
||||||
|
if len(n.ins) > 0: |
||||||
|
self.vat[self.ppo_offset + i] = self.vat[n.ins[0]] |
||||||
|
|
||||||
|
self.state_length = h.max_size |
Loading…
Reference in new issue