diff --git a/src/kyupy/wave_sim.py b/src/kyupy/wave_sim.py index 8ce37c3..1650598 100644 --- a/src/kyupy/wave_sim.py +++ b/src/kyupy/wave_sim.py @@ -629,6 +629,84 @@ def wave_eval(op, state, sat, st_idx, line_times, sdata, sd=0.0, seed=0): return overflows +@numba.njit +def wave_eval4(op, state, sat, st_idx, line_times, sdata, sd=0.0, seed=0): + lut, z_idx, a_idx, b_idx = op + overflows = int(0) + + _seed = (seed << 4) + (z_idx << 20) + (st_idx << 1) + + a_mem = sat[a_idx, 0] + b_mem = sat[b_idx, 0] + z_mem, z_cap, _ = sat[z_idx] + + a_cur = int(0) + b_cur = int(0) + z_cur = lut & 1 + if z_cur == 1: + state[z_mem, st_idx] = TMIN + + a = state[a_mem, st_idx] + line_times[a_idx, 0, z_cur] * rand_gauss(_seed ^ a_mem ^ z_cur, sd) * sdata[0] + if int(sdata[1]) == a_idx: a += sdata[2+z_cur] + b = state[b_mem, st_idx] + line_times[b_idx, 0, z_cur] * rand_gauss(_seed ^ b_mem ^ z_cur, sd) * sdata[0] + if int(sdata[1]) == b_idx: b += sdata[2+z_cur] + + previous_t = TMIN + + current_t = min(a, b) + inputs = int(0) + + while current_t < TMAX: + z_val = z_cur & 1 + if b < a: + b_cur += 1 + b = state[b_mem + b_cur, st_idx] + b += line_times[b_idx, 0, z_val ^ 1] * rand_gauss(_seed ^ b_mem ^ z_val ^ 1, sd) * sdata[0] + thresh = line_times[b_idx, 1, z_val] * rand_gauss(_seed ^ b_mem ^ z_val, sd) * sdata[0] + if int(sdata[1]) == b_idx: + b += sdata[2+(z_val^1)] + thresh += sdata[2+z_val] + inputs ^= 2 + next_t = b + else: + a_cur += 1 + a = state[a_mem + a_cur, st_idx] + a += line_times[a_idx, 0, z_val ^ 1] * rand_gauss(_seed ^ a_mem ^ z_val ^ 1, sd) * sdata[0] + thresh = line_times[a_idx, 1, z_val] * rand_gauss(_seed ^ a_mem ^ z_val, sd) * sdata[0] + if int(sdata[1]) == a_idx: + a += sdata[2+(z_val^1)] + thresh += sdata[2+z_val] + inputs ^= 1 + next_t = a + + if (z_cur & 1) != ((lut >> inputs) & 1): + # we generate a toggle in z_mem, if: + # ( it is the first toggle in z_mem OR + # following toggle is earlier OR + # pulse is wide enough ) AND enough space in z_mem. + if z_cur == 0 or next_t < current_t or (current_t - previous_t) > thresh: + if z_cur < (z_cap - 1): + state[z_mem + z_cur, st_idx] = current_t + previous_t = current_t + z_cur += 1 + else: + overflows += 1 + previous_t = state[z_mem + z_cur - 1, st_idx] + z_cur -= 1 + else: + z_cur -= 1 + if z_cur > 0: + previous_t = state[z_mem + z_cur - 1, st_idx] + else: + previous_t = TMIN + current_t = min(a, b) + + if overflows > 0: + state[z_mem + z_cur, st_idx] = TMAX_OVL + else: + state[z_mem + z_cur, st_idx] = a if a > b else b # propagate overflow flags by storing biggest TMAX from input + + return overflows class WaveSimCuda(WaveSim): """A GPU-accelerated waveform-based combinational logic timing simulator.