4 changed files with 552 additions and 1125 deletions
			
			
		@ -0,0 +1,365 @@ | 
				
			|||||||
 | 
					"""High-throughput combinational logic timing simulators. | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					These simulators work similarly to :py:class:`~kyupy.logic_sim.LogicSim`. | 
				
			||||||
 | 
					They propagate values through the combinational circuit from (pseudo) primary inputs to (pseudo) primary outputs. | 
				
			||||||
 | 
					Instead of propagating logic values, these simulators propagate signal histories (waveforms). | 
				
			||||||
 | 
					They are designed to run many simulations in parallel and while their latencies are quite high, they can achieve | 
				
			||||||
 | 
					high throughput. | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The simulators are not event-based and are not capable of simulating sequential circuits directly. | 
				
			||||||
 | 
					""" | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import math | 
				
			||||||
 | 
					from bisect import bisect, insort_left | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import numpy as np | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from . import numba, cuda, hr_bytes | 
				
			||||||
 | 
					from .sim import SimOps | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TMAX = np.float32(2 ** 127) | 
				
			||||||
 | 
					"""A large 32-bit floating point value used to mark the end of a waveform.""" | 
				
			||||||
 | 
					TMAX_OVL = np.float32(1.1 * 2 ** 127) | 
				
			||||||
 | 
					"""A large 32-bit floating point value used to mark the end of a waveform that | 
				
			||||||
 | 
					may be incomplete due to an overflow.""" | 
				
			||||||
 | 
					TMIN = np.float32(-2 ** 127) | 
				
			||||||
 | 
					"""A large negative 32-bit floating point value used at the beginning of waveforms that start with logic-1.""" | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class WaveSim(SimOps): | 
				
			||||||
 | 
					    """A waveform-based combinational logic timing simulator running on CPU. | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    :param circuit: The circuit to simulate. | 
				
			||||||
 | 
					    :param timing: The timing annotation of the circuit (see :py:func:`kyupy.sdf.DelayFile.annotation` for details) | 
				
			||||||
 | 
					    :param sims: The number of parallel simulations. | 
				
			||||||
 | 
					    :param c_caps: The number of floats available in each waveform. Values must be positive and a multiple of 4. | 
				
			||||||
 | 
					        Waveforms encode the signal switching history by storing transition times. | 
				
			||||||
 | 
					        The waveform capacity roughly corresponds to the number of transitions | 
				
			||||||
 | 
					        that can be stored. A capacity of ``n`` can store at least ``n-2`` transitions. If more transitions are | 
				
			||||||
 | 
					        generated during simulation, the latest glitch is removed (freeing up two transition times) and an overflow | 
				
			||||||
 | 
					        flag is set. If an integer is given, all waveforms are set to that same capacity. With an array of length | 
				
			||||||
 | 
					        ``len(circuit.lines)`` the capacity is set for each intermediate waveform individually. | 
				
			||||||
 | 
					    :param strip_forks: If enabled, the simulator will not evaluate fork nodes explicitly. This saves simulation time | 
				
			||||||
 | 
					        by reducing the number of nodes to simulate, but (interconnect) delay annotations of lines read by fork nodes | 
				
			||||||
 | 
					        are ignored. | 
				
			||||||
 | 
					    :param keep_waveforms: If disabled, memory of intermediate signal waveforms will be re-used. This greatly reduces | 
				
			||||||
 | 
					        memory footprint, but intermediate signal waveforms become unaccessible after a propagation. | 
				
			||||||
 | 
					    """ | 
				
			||||||
 | 
					    def __init__(self, circuit, timing, sims=8, c_caps=16, c_reuse=False, strip_forks=False): | 
				
			||||||
 | 
					        assert c_caps > 0 and c_caps % 4 == 0 | 
				
			||||||
 | 
					        super().__init__(circuit, c_caps=c_caps//4, c_reuse=c_reuse, strip_forks=strip_forks) | 
				
			||||||
 | 
					        self.sims = sims | 
				
			||||||
 | 
					         | 
				
			||||||
 | 
					        self.c_len *= 4 | 
				
			||||||
 | 
					        self.vat[...,0:2] *= 4 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        self.timing = np.zeros((self.c_len, 2, 2)) | 
				
			||||||
 | 
					        self.timing[:len(timing)] = timing | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        self.c = np.zeros((self.c_len, sims), dtype=np.float32) + TMAX | 
				
			||||||
 | 
					        self.s = np.zeros((len(self.s_nodes), sims, 11), dtype=np.float32) | 
				
			||||||
 | 
					        """Information about the logic values and transitions around the sequential elements (flip-flops) and ports. | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        The first 3 values are read by ``s_to_c()``. | 
				
			||||||
 | 
					        The remaining values are written by ``c_to_s()``. | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        The elements are as follows: | 
				
			||||||
 | 
					        * ``s[..., 0]`` (P)PI initial value | 
				
			||||||
 | 
					        * ``s[..., 1]`` (P)PI transition time | 
				
			||||||
 | 
					        * ``s[..., 2]`` (P)PI final value | 
				
			||||||
 | 
					        * ``s[..., 3]`` (P)PO initial value | 
				
			||||||
 | 
					        * ``s[..., 4]`` (P)PO earliest arrival time (EAT): The time at which the output transitioned from its initial value. | 
				
			||||||
 | 
					        * ``s[..., 5]`` (P)PO latest stabilization time (LST): The time at which the output settled to its final value. | 
				
			||||||
 | 
					        * ``s[..., 6]`` (P)PO final value | 
				
			||||||
 | 
					        * ``s[..., 7]`` (P)PO capture value: probability of capturing a 1 at a given capture time | 
				
			||||||
 | 
					        * ``s[..., 8]`` (P)PO sampled capture value: decided by random sampling according to a given seed. | 
				
			||||||
 | 
					        * ``s[..., 9]`` (P)PO sampled capture slack: (capture time - LST) - decided by random sampling according to a given seed. | 
				
			||||||
 | 
					        * ``s[..., 10]`` Overflow indicator: If non-zero, some signals in the input cone of this output had more | 
				
			||||||
 | 
					          transitions than specified in ``wavecaps``. Some transitions have been discarded, the | 
				
			||||||
 | 
					          final values in the waveforms are still valid. | 
				
			||||||
 | 
					        """ | 
				
			||||||
 | 
					                      | 
				
			||||||
 | 
					        self.params = np.zeros((sims, 4), dtype=np.float32) | 
				
			||||||
 | 
					        self.params[...,0] = 1.0 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        m1 = np.array([2 ** x for x in range(7, -1, -1)], dtype=np.uint8) | 
				
			||||||
 | 
					        m0 = ~m1 | 
				
			||||||
 | 
					        self.mask = np.rollaxis(np.vstack((m0, m1)), 1) | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        self.overflows = 0 | 
				
			||||||
 | 
					        self.lst_eat_valid = False | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        self.pi_s_locs = np.flatnonzero(self.vat[self.ppi_offset+np.arange(len(self.circuit.io_nodes)), 0] >= 0) | 
				
			||||||
 | 
					        self.po_s_locs = np.flatnonzero(self.vat[self.ppo_offset+np.arange(len(self.circuit.io_nodes)), 0] >= 0) | 
				
			||||||
 | 
					        self.ppio_s_locs = np.arange(len(self.circuit.io_nodes), len(self.s_nodes)) | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        self.pippi_s_locs = np.concatenate([self.pi_s_locs, self.ppio_s_locs]) | 
				
			||||||
 | 
					        self.poppo_s_locs = np.concatenate([self.po_s_locs, self.ppio_s_locs]) | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        self.pi_c_locs = self.vat[self.ppi_offset+self.pi_s_locs, 0] | 
				
			||||||
 | 
					        self.po_c_locs = self.vat[self.ppo_offset+self.po_s_locs, 0] | 
				
			||||||
 | 
					        self.ppi_c_locs = self.vat[self.ppi_offset+self.ppio_s_locs, 0] | 
				
			||||||
 | 
					        self.ppo_c_locs = self.vat[self.ppo_offset+self.ppio_s_locs, 0] | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        self.pippi_c_locs = np.concatenate([self.pi_c_locs, self.ppi_c_locs]) | 
				
			||||||
 | 
					        self.poppo_c_locs = np.concatenate([self.po_c_locs, self.ppo_c_locs]) | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        self.wave_capture = numba.njit(WaveSim.wave_capture) | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def __repr__(self): | 
				
			||||||
 | 
					        total_mem = self.c.nbytes + self.vat.nbytes + self.ops.nbytes + self.s.nbytes | 
				
			||||||
 | 
					        return f'<WaveSim {self.circuit.name} sims={self.sims} ops={len(self.ops)} ' + \ | 
				
			||||||
 | 
					               f'levels={len(self.level_starts)} mem={hr_bytes(total_mem)}>' | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def get_line_delay(self, line, polarity): | 
				
			||||||
 | 
					        """Returns the current delay of the given ``line`` and ``polarity`` in the simulation model.""" | 
				
			||||||
 | 
					        return self.timing[line, 0, polarity] | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def set_line_delay(self, line, polarity, delay): | 
				
			||||||
 | 
					        """Sets a new ``delay`` for the given ``line`` and ``polarity`` in the simulation model.""" | 
				
			||||||
 | 
					        self.timing[line, 0, polarity] = delay | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def s_to_c(self): | 
				
			||||||
 | 
					        """Transfers values of sequential elements and primary inputs to the combinational portion. | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        Based on the data in ``self.s``, waveforms are generated on the input lines of the circuit. | 
				
			||||||
 | 
					        It modifies ``self.c``. | 
				
			||||||
 | 
					        """ | 
				
			||||||
 | 
					        sins = np.moveaxis(self.s[self.pippi_s_locs], -1, 0) | 
				
			||||||
 | 
					        cond = (sins[2] != 0) + 2*(sins[0] != 0)  # choices order: 0 R F 1 | 
				
			||||||
 | 
					        self.c[self.pippi_c_locs] = np.choose(cond, [TMAX, sins[1], TMIN, TMIN]) | 
				
			||||||
 | 
					        self.c[self.pippi_c_locs+1] = np.choose(cond, [TMAX, TMAX, sins[1], TMAX]) | 
				
			||||||
 | 
					        self.c[self.pippi_c_locs+2] = TMAX | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def c_prop(self, sims=None, sd=0.0, seed=1): | 
				
			||||||
 | 
					        """Propagates all waveforms from the (pseudo) primary inputs to the (pseudo) primary outputs. | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        :param sims: Number of parallel simulations to execute. If None, all available simulations are performed. | 
				
			||||||
 | 
					        :param sd: Standard deviation for injection of random delay variation. Active, if value is positive. | 
				
			||||||
 | 
					        :param seed: Random seed for delay variations. | 
				
			||||||
 | 
					        """ | 
				
			||||||
 | 
					        sims = min(sims or self.sims, self.sims) | 
				
			||||||
 | 
					        for op_start, op_stop in zip(self.level_starts, self.level_stops): | 
				
			||||||
 | 
					            self.overflows += level_eval(self.ops, op_start, op_stop, self.c, self.vat, 0, sims, | 
				
			||||||
 | 
					                                         self.timing, self.params, sd, seed) | 
				
			||||||
 | 
					        self.lst_eat_valid = False | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def c_to_s(self, time=TMAX, sd=0.0, seed=1): | 
				
			||||||
 | 
					        """Simulates a capture operation at all sequential elements and primary outputs. | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        Propagated waveforms in ``self.c`` at and around the given capture time are analyzed and | 
				
			||||||
 | 
					        the results are stored in ``self.s``. | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        :param time: The desired capture time. By default, a capture of the settled value is performed. | 
				
			||||||
 | 
					        :param sd: A standard deviation for uncertainty in the actual capture time. | 
				
			||||||
 | 
					        :param seed: The random seed for a capture with uncertainty. | 
				
			||||||
 | 
					        """ | 
				
			||||||
 | 
					        for s_loc, (c_loc, c_len, _) in zip(self.poppo_s_locs, self.vat[self.ppo_offset+self.poppo_s_locs]): | 
				
			||||||
 | 
					            for vector in range(self.sims): | 
				
			||||||
 | 
					                self.s[s_loc, vector, 3:] = self.wave_capture(self.c, c_loc, c_len, vector, time=time, sd=sd, seed=seed) | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def s_ppo_to_ppi(self, time=0.0): | 
				
			||||||
 | 
					        """Re-assigns the last sampled capture to the appropriate pseudo-primary inputs (PPI).  | 
				
			||||||
 | 
					        Each PPI transition is constructed from its previous final value, the | 
				
			||||||
 | 
					        given time, and the sampled captured value of its PPO. Reads and modifies ``self.s``. | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        :param time: The transition time at the inputs (usually 0.0). | 
				
			||||||
 | 
					        """ | 
				
			||||||
 | 
					        self.s[self.ppio_s_locs, :, 0] = self.s[self.ppio_s_locs, :, 2] | 
				
			||||||
 | 
					        self.s[self.ppio_s_locs, :, 1] = time | 
				
			||||||
 | 
					        self.s[self.ppio_s_locs, :, 2] = self.s[self.ppio_s_locs, :, 8] | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @staticmethod | 
				
			||||||
 | 
					    def wave_capture(c, c_loc, c_len, vector, time=TMAX, sd=0.0, seed=1): | 
				
			||||||
 | 
					        s_sqrt2 = sd * math.sqrt(2) | 
				
			||||||
 | 
					        m = 0.5 | 
				
			||||||
 | 
					        acc = 0.0 | 
				
			||||||
 | 
					        eat = TMAX | 
				
			||||||
 | 
					        lst = TMIN | 
				
			||||||
 | 
					        tog = 0 | 
				
			||||||
 | 
					        ovl = 0 | 
				
			||||||
 | 
					        val = int(0) | 
				
			||||||
 | 
					        final = int(0) | 
				
			||||||
 | 
					        w = c[c_loc:c_loc+c_len, vector] | 
				
			||||||
 | 
					        for t in w: | 
				
			||||||
 | 
					            if t >= TMAX: | 
				
			||||||
 | 
					                if t == TMAX_OVL: | 
				
			||||||
 | 
					                    ovl = 1 | 
				
			||||||
 | 
					                break | 
				
			||||||
 | 
					            m = -m | 
				
			||||||
 | 
					            final ^= 1 | 
				
			||||||
 | 
					            if t < time: | 
				
			||||||
 | 
					                val ^= 1 | 
				
			||||||
 | 
					            if t <= TMIN: continue | 
				
			||||||
 | 
					            if s_sqrt2 > 0: | 
				
			||||||
 | 
					                acc += m * (1 + math.erf((t - time) / s_sqrt2)) | 
				
			||||||
 | 
					            eat = min(eat, t) | 
				
			||||||
 | 
					            lst = max(lst, t) | 
				
			||||||
 | 
					            tog += 1 | 
				
			||||||
 | 
					        if s_sqrt2 > 0: | 
				
			||||||
 | 
					            if m < 0: | 
				
			||||||
 | 
					                acc += 1 | 
				
			||||||
 | 
					            if acc >= 0.99: | 
				
			||||||
 | 
					                val = 1 | 
				
			||||||
 | 
					            elif acc > 0.01: | 
				
			||||||
 | 
					                seed = (seed << 4) + (vector << 20) + c_loc | 
				
			||||||
 | 
					                seed = int(0xDEECE66D) * seed + 0xB | 
				
			||||||
 | 
					                seed = int(0xDEECE66D) * seed + 0xB | 
				
			||||||
 | 
					                rnd = float((seed >> 8) & 0xffffff) / float(1 << 24) | 
				
			||||||
 | 
					                val = rnd < acc | 
				
			||||||
 | 
					            else: | 
				
			||||||
 | 
					                val = 0 | 
				
			||||||
 | 
					        else: | 
				
			||||||
 | 
					            acc = val | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        return (w[0] <= TMIN), eat, lst, final, acc, val, 0, ovl | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@numba.njit | 
				
			||||||
 | 
					def level_eval(ops, op_start, op_stop, c, vat, st_start, st_stop, line_times, params, sd, seed): | 
				
			||||||
 | 
					    overflows = 0 | 
				
			||||||
 | 
					    for op_idx in range(op_start, op_stop): | 
				
			||||||
 | 
					        op = ops[op_idx] | 
				
			||||||
 | 
					        for st_idx in range(st_start, st_stop): | 
				
			||||||
 | 
					            overflows += wave_eval(op, c, vat, st_idx, line_times, params[st_idx], sd, seed) | 
				
			||||||
 | 
					    return overflows | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@numba.njit | 
				
			||||||
 | 
					def rand_gauss(seed, sd): | 
				
			||||||
 | 
					    clamp = 0.5 | 
				
			||||||
 | 
					    if sd <= 0.0: | 
				
			||||||
 | 
					        return 1.0 | 
				
			||||||
 | 
					    while True: | 
				
			||||||
 | 
					        x = -6.0 | 
				
			||||||
 | 
					        for _ in range(12): | 
				
			||||||
 | 
					            seed = int(0xDEECE66D) * seed + 0xB | 
				
			||||||
 | 
					            x += float((seed >> 8) & 0xffffff) / float(1 << 24) | 
				
			||||||
 | 
					        x *= sd | 
				
			||||||
 | 
					        if abs(x) <= clamp: | 
				
			||||||
 | 
					            break | 
				
			||||||
 | 
					    return x + 1.0 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@numba.njit | 
				
			||||||
 | 
					def wave_eval(op, cbuf, vat, st_idx, line_times, param, sd=0.0, seed=0): | 
				
			||||||
 | 
					    lut, z_idx, a_idx, b_idx, c_idx, d_idx = op | 
				
			||||||
 | 
					    overflows = int(0) | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    _seed = (seed << 4) + (z_idx << 20) + (st_idx << 1) | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    a_mem = vat[a_idx, 0] | 
				
			||||||
 | 
					    b_mem = vat[b_idx, 0] | 
				
			||||||
 | 
					    c_mem = vat[c_idx, 0] | 
				
			||||||
 | 
					    d_mem = vat[d_idx, 0] | 
				
			||||||
 | 
					    z_mem, z_cap, _ = vat[z_idx] | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    a_cur = int(0) | 
				
			||||||
 | 
					    b_cur = int(0) | 
				
			||||||
 | 
					    c_cur = int(0) | 
				
			||||||
 | 
					    d_cur = int(0)                                           | 
				
			||||||
 | 
					    z_cur = lut & 1 | 
				
			||||||
 | 
					    if z_cur == 1: | 
				
			||||||
 | 
					        cbuf[z_mem, st_idx] = TMIN | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    a = cbuf[a_mem, st_idx] + line_times[a_idx, 0, z_cur] * rand_gauss(_seed ^ a_mem ^ z_cur, sd) * param[0] | 
				
			||||||
 | 
					    if int(param[1]) == a_idx: a += param[2+z_cur] | 
				
			||||||
 | 
					    b = cbuf[b_mem, st_idx] + line_times[b_idx, 0, z_cur] * rand_gauss(_seed ^ b_mem ^ z_cur, sd) * param[0] | 
				
			||||||
 | 
					    if int(param[1]) == b_idx: b += param[2+z_cur] | 
				
			||||||
 | 
					    c = cbuf[c_mem, st_idx] + line_times[c_idx, 0, z_cur] * rand_gauss(_seed ^ c_mem ^ z_cur, sd) * param[0] | 
				
			||||||
 | 
					    if int(param[1]) == c_idx: c += param[2+z_cur] | 
				
			||||||
 | 
					    d = cbuf[d_mem, st_idx] + line_times[d_idx, 0, z_cur] * rand_gauss(_seed ^ d_mem ^ z_cur, sd) * param[0] | 
				
			||||||
 | 
					    if int(param[1]) == d_idx: d += param[2+z_cur] | 
				
			||||||
 | 
					     | 
				
			||||||
 | 
					    previous_t = TMIN | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    current_t = min(a, b, c, d) | 
				
			||||||
 | 
					    inputs = int(0) | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    while current_t < TMAX: | 
				
			||||||
 | 
					        z_val = z_cur & 1 | 
				
			||||||
 | 
					        if a == current_t: | 
				
			||||||
 | 
					            a_cur += 1 | 
				
			||||||
 | 
					            a = cbuf[a_mem + a_cur, st_idx] | 
				
			||||||
 | 
					            a += line_times[a_idx, 0, z_val ^ 1] * rand_gauss(_seed ^ a_mem ^ z_val ^ 1, sd) * param[0] | 
				
			||||||
 | 
					            thresh = line_times[a_idx, 1, z_val] * rand_gauss(_seed ^ a_mem ^ z_val, sd) * param[0] | 
				
			||||||
 | 
					            if int(param[1]) == a_idx: | 
				
			||||||
 | 
					                a += param[2+(z_val^1)] | 
				
			||||||
 | 
					                thresh += param[2+z_val] | 
				
			||||||
 | 
					            inputs ^= 1 | 
				
			||||||
 | 
					            next_t = a    | 
				
			||||||
 | 
					         | 
				
			||||||
 | 
					        elif b == current_t: | 
				
			||||||
 | 
					            b_cur += 1 | 
				
			||||||
 | 
					            b = cbuf[b_mem + b_cur, st_idx] | 
				
			||||||
 | 
					            b += line_times[b_idx, 0, z_val ^ 1] * rand_gauss(_seed ^ b_mem ^ z_val ^ 1, sd) * param[0] | 
				
			||||||
 | 
					            thresh = line_times[b_idx, 1, z_val] * rand_gauss(_seed ^ b_mem ^ z_val, sd) * param[0] | 
				
			||||||
 | 
					            if int(param[1]) == b_idx: | 
				
			||||||
 | 
					                b += param[2+(z_val^1)] | 
				
			||||||
 | 
					                thresh += param[2+z_val] | 
				
			||||||
 | 
					            inputs ^= 2 | 
				
			||||||
 | 
					            next_t = b | 
				
			||||||
 | 
					                 | 
				
			||||||
 | 
					        elif c == current_t: | 
				
			||||||
 | 
					            c_cur += 1 | 
				
			||||||
 | 
					            c = cbuf[c_mem + c_cur, st_idx] | 
				
			||||||
 | 
					            c += line_times[c_idx, 0, z_val ^ 1] * rand_gauss(_seed ^ c_mem ^ z_val ^ 1, sd) * param[0] | 
				
			||||||
 | 
					            thresh = line_times[c_idx, 1, z_val] * rand_gauss(_seed ^ c_mem ^ z_val, sd) * param[0] | 
				
			||||||
 | 
					            if int(param[1]) == c_idx: | 
				
			||||||
 | 
					                c += param[2+(z_val^1)] | 
				
			||||||
 | 
					                thresh += param[2+z_val] | 
				
			||||||
 | 
					            inputs ^= 4 | 
				
			||||||
 | 
					            next_t = c  | 
				
			||||||
 | 
					                      | 
				
			||||||
 | 
					        else: | 
				
			||||||
 | 
					            d_cur += 1 | 
				
			||||||
 | 
					            d = cbuf[d_mem + d_cur, st_idx] | 
				
			||||||
 | 
					            d += line_times[d_idx, 0, z_val ^ 1] * rand_gauss(_seed ^ d_mem ^ z_val ^ 1, sd) * param[0] | 
				
			||||||
 | 
					            thresh = line_times[d_idx, 1, z_val] * rand_gauss(_seed ^ d_mem ^ z_val, sd) * param[0] | 
				
			||||||
 | 
					            if int(param[1]) == d_idx: | 
				
			||||||
 | 
					                d += param[2+(z_val^1)] | 
				
			||||||
 | 
					                thresh += param[2+z_val] | 
				
			||||||
 | 
					            inputs ^= 8 | 
				
			||||||
 | 
					            next_t = d  | 
				
			||||||
 | 
					        #print("previous_t",previous_t) | 
				
			||||||
 | 
					        #print("current_t",current_t)  | 
				
			||||||
 | 
					        #print(current_t - previous_t) | 
				
			||||||
 | 
					        #print(thresh) | 
				
			||||||
 | 
					        #print(z_cur & 1) | 
				
			||||||
 | 
					        #print((lut >> inputs) & 1) | 
				
			||||||
 | 
					        | 
				
			||||||
 | 
					        if (z_cur & 1) != ((lut >> inputs) & 1): | 
				
			||||||
 | 
					            # we generate a toggle in z_mem, if: | 
				
			||||||
 | 
					            #   ( it is the first toggle in z_mem OR | 
				
			||||||
 | 
					            #   following toggle is earlier OR | 
				
			||||||
 | 
					            #   pulse is wide enough ) AND enough space in z_mem. | 
				
			||||||
 | 
					            if z_cur == 0 or next_t < current_t or (current_t - previous_t) > thresh: | 
				
			||||||
 | 
					                #print(current_t - previous_t) | 
				
			||||||
 | 
					                #print(thresh) | 
				
			||||||
 | 
					                #print(z_cap) | 
				
			||||||
 | 
					                if z_cur < (z_cap - 1): | 
				
			||||||
 | 
					                    cbuf[z_mem + z_cur, st_idx] = current_t | 
				
			||||||
 | 
					                    #print(cbuf[z_mem + z_cur, st_idx]) | 
				
			||||||
 | 
					                    previous_t = current_t | 
				
			||||||
 | 
					                    z_cur += 1 | 
				
			||||||
 | 
					                else: | 
				
			||||||
 | 
					                    overflows += 1 | 
				
			||||||
 | 
					                    previous_t = cbuf[z_mem + z_cur - 1, st_idx] | 
				
			||||||
 | 
					                    z_cur -= 1 | 
				
			||||||
 | 
					            else: | 
				
			||||||
 | 
					                #print(a) | 
				
			||||||
 | 
					                z_cur -= 1 | 
				
			||||||
 | 
					                if z_cur > 0: | 
				
			||||||
 | 
					                    previous_t = cbuf[z_mem + z_cur - 1, st_idx] | 
				
			||||||
 | 
					                else: | 
				
			||||||
 | 
					                    previous_t = TMIN | 
				
			||||||
 | 
					         | 
				
			||||||
 | 
					        current_t = min(a, b, c, d) | 
				
			||||||
 | 
					                      | 
				
			||||||
 | 
					    if overflows > 0: | 
				
			||||||
 | 
					        cbuf[z_mem + z_cur, st_idx] = TMAX_OVL | 
				
			||||||
 | 
					    else: | 
				
			||||||
 | 
					        cbuf[z_mem + z_cur, st_idx] = a if a == max(a, b, c, d) else b if b == max(a, b, c, d) else c if c == max(a, b, c, d) else d   # propagate overflow flags by storing biggest TMAX from input | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return overflows | 
				
			||||||
@ -0,0 +1,174 @@ | 
				
			|||||||
 | 
					import numpy as np | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from kyupy.wave_sim4 import WaveSim, wave_eval, TMIN, TMAX | 
				
			||||||
 | 
					from kyupy.logic_sim import LogicSim | 
				
			||||||
 | 
					from kyupy import verilog, sdf, logic, bench | 
				
			||||||
 | 
					from kyupy.logic import MVArray, BPArray | 
				
			||||||
 | 
					from kyupy.sim import SimPrim | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def test_nand_delays(): | 
				
			||||||
 | 
					    op = (SimPrim.NAND4, 4, 0, 1, 2, 3) | 
				
			||||||
 | 
					    #op = (0b0111, 4, 0, 1) | 
				
			||||||
 | 
					    c = np.full((5*16, 1), TMAX)  # 5 waveforms of capacity 16 | 
				
			||||||
 | 
					    vat = np.zeros((5, 3), dtype='int') | 
				
			||||||
 | 
					    for i in range(5): vat[i] = i*16, 16, 0  # 1:1 mapping | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # SDF specifies IOPATH delays with respect to output polarity | 
				
			||||||
 | 
					    # SDF pulse rejection value is determined by IOPATH causing last transition and polarity of last transition | 
				
			||||||
 | 
					    line_times = np.zeros((5, 2, 2)) | 
				
			||||||
 | 
					    line_times[0, 0, 0] = 0.1  # A -> Z rise delay | 
				
			||||||
 | 
					    line_times[0, 0, 1] = 0.2  # A -> Z fall delay | 
				
			||||||
 | 
					    line_times[0, 1, 0] = 0.1  # A -> Z negative pulse limit (terminate in rising Z) | 
				
			||||||
 | 
					    line_times[0, 1, 1] = 0.2  # A -> Z positive pulse limit | 
				
			||||||
 | 
					    line_times[1, :, 0] = 0.3  # as above for B -> Z | 
				
			||||||
 | 
					    line_times[1, :, 1] = 0.4 | 
				
			||||||
 | 
					    line_times[2, :, 0] = 0.5  # as above for C -> Z | 
				
			||||||
 | 
					    line_times[2, :, 1] = 0.6 | 
				
			||||||
 | 
					    line_times[3, :, 0] = 0.7  # as above for D -> Z | 
				
			||||||
 | 
					    line_times[3, :, 1] = 0.8 | 
				
			||||||
 | 
					     | 
				
			||||||
 | 
					    sdata = np.asarray([1, -1, 0, 0], dtype='float32') | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def wave_assert(inputs, output): | 
				
			||||||
 | 
					        for i, a in zip(inputs, c.reshape(-1,16)): a[:len(i)] = i | 
				
			||||||
 | 
					        wave_eval(op, c, vat, 0, line_times, sdata) | 
				
			||||||
 | 
					        for i, v in enumerate(output): np.testing.assert_allclose(c.reshape(-1,16)[4,i], v) | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    wave_assert([[TMAX,TMAX],[TMAX,TMAX],[TMIN,TMAX],[TMIN,TMAX]], [TMIN,TMAX]) # NAND(0,0,1,1) => 1 | 
				
			||||||
 | 
					    wave_assert([[TMIN,TMAX],[TMAX,TMAX],[TMIN,TMAX],[TMIN,TMAX]], [TMIN,TMAX]) # NAND(1,0,1,1) => 1 | 
				
			||||||
 | 
					    wave_assert([[TMIN,TMAX],[TMIN,TMAX],[TMIN,TMAX],[TMIN,TMAX]], [TMAX])      # NAND(1,1,1,1) => 0 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Keep inputs C=1 and D=1. | 
				
			||||||
 | 
					    wave_assert([[1,TMAX],[2,TMAX]], [TMIN,2.4,TMAX])              # _/⎺⎺⎺ NAND __/⎺⎺ => ⎺⎺⎺\___ (B->Z fall delay) | 
				
			||||||
 | 
					    wave_assert([[TMIN,TMAX],[TMIN,2,TMAX]],  [2.3,TMAX])          # ⎺⎺⎺⎺⎺ NAND ⎺⎺\__ => ___/⎺⎺⎺ (B->Z rise delay) | 
				
			||||||
 | 
					    wave_assert([[TMIN,TMAX],[TMIN,2,2.35,TMAX]], [2.3,2.75,TMAX]) # ⎺⎺⎺⎺⎺ NAND ⎺\_/⎺ => __/⎺⎺\_ (pos pulse, .35@B -> .45@Z) | 
				
			||||||
 | 
					    wave_assert([[TMIN,TMAX],[TMIN,2,2.25,TMAX]], [TMAX])          # ⎺⎺⎺⎺⎺ NAND ⎺\_/⎺ => _______ (pos pulse, .25@B -> .35@Z, filtered) | 
				
			||||||
 | 
					    wave_assert([[TMIN,TMAX],[2,2.45,TMAX]], [TMIN,2.4,2.75,TMAX]) # ⎺⎺⎺⎺⎺ NAND _/⎺\_ => ⎺⎺\_/⎺⎺ (neg pulse, .45@B -> .35@Z) | 
				
			||||||
 | 
					    wave_assert([[TMIN,TMAX],[2,2.35,TMAX]], [TMIN,TMAX])          # ⎺⎺⎺⎺⎺ NAND _/⎺\_ => ⎺⎺⎺⎺⎺⎺⎺ (neg pulse, .35@B -> .25@Z, filtered) | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def test_tiny_circuit(): | 
				
			||||||
 | 
					    c = bench.parse('input(x, y) output(a, o, n) a=and(x,y) o=or(x,y) n=not(x)') | 
				
			||||||
 | 
					    lt = np.zeros((len(c.lines), 2, 2)) | 
				
			||||||
 | 
					    lt[:,0,:] = 1.0  # unit delay for all lines | 
				
			||||||
 | 
					    wsim = WaveSim(c, lt) | 
				
			||||||
 | 
					    print(wsim.prim_counts) | 
				
			||||||
 | 
					    assert len(wsim.s) == 5 | 
				
			||||||
 | 
					     | 
				
			||||||
 | 
					    # values for x | 
				
			||||||
 | 
					    wsim.s[0,0,:3] = 0, 0.1, 0 | 
				
			||||||
 | 
					    wsim.s[0,1,:3] = 0, 0.2, 1 | 
				
			||||||
 | 
					    wsim.s[0,2,:3] = 1, 0.3, 0 | 
				
			||||||
 | 
					    wsim.s[0,3,:3] = 1, 0.4, 1 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # values for y | 
				
			||||||
 | 
					    wsim.s[1,0,:3] = 1, 0.5, 0 | 
				
			||||||
 | 
					    wsim.s[1,1,:3] = 1, 0.6, 0 | 
				
			||||||
 | 
					    wsim.s[1,2,:3] = 1, 0.7, 0 | 
				
			||||||
 | 
					    wsim.s[1,3,:3] = 0, 0.8, 1 | 
				
			||||||
 | 
					     | 
				
			||||||
 | 
					    wsim.s_to_c() | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    x_c_loc = wsim.vat[wsim.ppi_offset+0, 0] # check x waveforms | 
				
			||||||
 | 
					    np.testing.assert_allclose(wsim.c[x_c_loc:x_c_loc+3, 0], [TMAX, TMAX, TMAX]) | 
				
			||||||
 | 
					    np.testing.assert_allclose(wsim.c[x_c_loc:x_c_loc+3, 1], [0.2, TMAX, TMAX]) | 
				
			||||||
 | 
					    np.testing.assert_allclose(wsim.c[x_c_loc:x_c_loc+3, 2], [TMIN, 0.3, TMAX]) | 
				
			||||||
 | 
					    np.testing.assert_allclose(wsim.c[x_c_loc:x_c_loc+3, 3], [TMIN, TMAX, TMAX]) | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    y_c_loc = wsim.vat[wsim.ppi_offset+1, 0] # check y waveforms | 
				
			||||||
 | 
					    np.testing.assert_allclose(wsim.c[y_c_loc:y_c_loc+3, 0], [TMIN, 0.5, TMAX]) | 
				
			||||||
 | 
					    np.testing.assert_allclose(wsim.c[y_c_loc:y_c_loc+3, 1], [TMIN, 0.6, TMAX]) | 
				
			||||||
 | 
					    np.testing.assert_allclose(wsim.c[y_c_loc:y_c_loc+3, 2], [TMIN, 0.7, TMAX]) | 
				
			||||||
 | 
					    np.testing.assert_allclose(wsim.c[y_c_loc:y_c_loc+3, 3], [0.8, TMAX, TMAX]) | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    wsim.c_prop() | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    a_c_loc = wsim.vat[wsim.ppo_offset+2, 0] # check a waveforms | 
				
			||||||
 | 
					    np.testing.assert_allclose(wsim.c[a_c_loc:a_c_loc+3, 0], [TMAX, TMAX, TMAX]) | 
				
			||||||
 | 
					    np.testing.assert_allclose(wsim.c[a_c_loc:a_c_loc+3, 1], [1.2, 1.6, TMAX]) | 
				
			||||||
 | 
					    np.testing.assert_allclose(wsim.c[a_c_loc:a_c_loc+3, 2], [TMIN, 1.3, TMAX]) | 
				
			||||||
 | 
					    np.testing.assert_allclose(wsim.c[a_c_loc:a_c_loc+3, 3], [1.8, TMAX, TMAX]) | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    o_c_loc = wsim.vat[wsim.ppo_offset+3, 0] # check o waveforms | 
				
			||||||
 | 
					    np.testing.assert_allclose(wsim.c[o_c_loc:o_c_loc+3, 0], [TMIN, 1.5, TMAX]) | 
				
			||||||
 | 
					    np.testing.assert_allclose(wsim.c[o_c_loc:o_c_loc+3, 1], [TMIN, TMAX, TMAX]) | 
				
			||||||
 | 
					    np.testing.assert_allclose(wsim.c[o_c_loc:o_c_loc+3, 2], [TMIN, 1.7, TMAX]) | 
				
			||||||
 | 
					    np.testing.assert_allclose(wsim.c[o_c_loc:o_c_loc+3, 3], [TMIN, TMAX, TMAX]) | 
				
			||||||
 | 
					     | 
				
			||||||
 | 
					    n_c_loc = wsim.vat[wsim.ppo_offset+4, 0] # check n waveforms | 
				
			||||||
 | 
					    np.testing.assert_allclose(wsim.c[n_c_loc:n_c_loc+3, 0], [TMIN, TMAX, TMAX]) | 
				
			||||||
 | 
					    np.testing.assert_allclose(wsim.c[n_c_loc:n_c_loc+3, 1], [TMIN, 1.2, TMAX]) | 
				
			||||||
 | 
					    np.testing.assert_allclose(wsim.c[n_c_loc:n_c_loc+3, 2], [1.3, TMAX, TMAX]) | 
				
			||||||
 | 
					    np.testing.assert_allclose(wsim.c[n_c_loc:n_c_loc+3, 3], [TMAX, TMAX, TMAX]) | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    wsim.c_to_s() | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # check a captures | 
				
			||||||
 | 
					    np.testing.assert_allclose(wsim.s[2, 0, 3:7], [0, TMAX, TMIN, 0]) | 
				
			||||||
 | 
					    np.testing.assert_allclose(wsim.s[2, 1, 3:7], [0, 1.2, 1.6, 0]) | 
				
			||||||
 | 
					    np.testing.assert_allclose(wsim.s[2, 2, 3:7], [1, 1.3, 1.3, 0]) | 
				
			||||||
 | 
					    np.testing.assert_allclose(wsim.s[2, 3, 3:7], [0, 1.8, 1.8, 1]) | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # check o captures | 
				
			||||||
 | 
					    np.testing.assert_allclose(wsim.s[3, 0, 3:7], [1, 1.5, 1.5, 0]) | 
				
			||||||
 | 
					    np.testing.assert_allclose(wsim.s[3, 1, 3:7], [1, TMAX, TMIN, 1]) | 
				
			||||||
 | 
					    np.testing.assert_allclose(wsim.s[3, 2, 3:7], [1, 1.7, 1.7, 0]) | 
				
			||||||
 | 
					    np.testing.assert_allclose(wsim.s[3, 3, 3:7], [1, TMAX, TMIN, 1]) | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # check o captures | 
				
			||||||
 | 
					    np.testing.assert_allclose(wsim.s[4, 0, 3:7], [1, TMAX, TMIN, 1]) | 
				
			||||||
 | 
					    np.testing.assert_allclose(wsim.s[4, 1, 3:7], [1, 1.2, 1.2, 0]) | 
				
			||||||
 | 
					    np.testing.assert_allclose(wsim.s[4, 2, 3:7], [0, 1.3, 1.3, 1]) | 
				
			||||||
 | 
					    np.testing.assert_allclose(wsim.s[4, 3, 3:7], [0, TMAX, TMIN, 0]) | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def compare_to_logic_sim(wsim: WaveSim): | 
				
			||||||
 | 
					    tests = MVArray((len(wsim.s_nodes), wsim.sims)) | 
				
			||||||
 | 
					    choices = np.asarray([logic.ZERO, logic.ONE, logic.RISE, logic.FALL], dtype=np.uint8) | 
				
			||||||
 | 
					    rng = np.random.default_rng(10) | 
				
			||||||
 | 
					    tests.data[...] = rng.choice(choices, tests.data.shape) | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    wsim.s[..., 0] = (tests.data & 2) >> 1 | 
				
			||||||
 | 
					    wsim.s[..., 3] = (tests.data & 2) >> 1 | 
				
			||||||
 | 
					    wsim.s[..., 1] = 0.0 | 
				
			||||||
 | 
					    wsim.s[..., 2] = tests.data & 1 | 
				
			||||||
 | 
					    wsim.s[..., 6] = tests.data & 1 | 
				
			||||||
 | 
					     | 
				
			||||||
 | 
					    wsim.s_to_c() | 
				
			||||||
 | 
					    wsim.c_prop() | 
				
			||||||
 | 
					    wsim.c_to_s() | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    resp = MVArray(tests) | 
				
			||||||
 | 
					    resp.data[...] = wsim.s[..., 6].astype(np.uint8) | (wsim.s[..., 3].astype(np.uint8)<<1) | 
				
			||||||
 | 
					    resp.data |= ((resp.data ^ (resp.data >> 1)) & 1) << 2  # transitions | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    tests_bp = BPArray(tests)     | 
				
			||||||
 | 
					    lsim = LogicSim(wsim.circuit, len(tests_bp)) | 
				
			||||||
 | 
					    lsim.assign(tests_bp) | 
				
			||||||
 | 
					    lsim.propagate() | 
				
			||||||
 | 
					    exp_bp = BPArray(tests_bp) | 
				
			||||||
 | 
					    lsim.capture(exp_bp) | 
				
			||||||
 | 
					    exp = MVArray(exp_bp) | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for i in range(8): | 
				
			||||||
 | 
					        exp_str = exp[i].replace('P', '0').replace('N', '1') | 
				
			||||||
 | 
					        res_str = resp[i].replace('P', '0').replace('N', '1') | 
				
			||||||
 | 
					        assert res_str == exp_str | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def test_b14(mydir): | 
				
			||||||
 | 
					    c = verilog.load(mydir / 'b14.v.gz', branchforks=True) | 
				
			||||||
 | 
					    df = sdf.load(mydir / 'b14.sdf.gz') | 
				
			||||||
 | 
					    lt = df.annotation(c) | 
				
			||||||
 | 
					    wsim = WaveSim(c, lt, 8) | 
				
			||||||
 | 
					    compare_to_logic_sim(wsim) | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def test_b14_strip_forks(mydir): | 
				
			||||||
 | 
					    c = verilog.load(mydir / 'b14.v.gz', branchforks=True) | 
				
			||||||
 | 
					    df = sdf.load(mydir / 'b14.sdf.gz') | 
				
			||||||
 | 
					    lt = df.annotation(c) | 
				
			||||||
 | 
					    wsim = WaveSim(c, lt, 8, strip_forks=True) | 
				
			||||||
 | 
					    compare_to_logic_sim(wsim) | 
				
			||||||
 | 
					
 | 
				
			||||||
					Loading…
					
					
				
		Reference in new issue