From ea45a326ec44189008cb9d519e3bb9e9f4bc6407 Mon Sep 17 00:00:00 2001
From: Stefan Holst <holst@ci.kyutech.ac.jp>
Date: Sun, 9 Jul 2023 13:42:07 +0900
Subject: [PATCH] add latch, fix xor delays, improve test

---
 src/kyupy/sdf.py      |   3 +
 src/kyupy/techlib.py  |   4 +
 src/kyupy/wave_sim.py |  37 +++----
 tests/test_sdf.py     | 217 +++++++++++++++++++++++++++++++++---------
 4 files changed, 200 insertions(+), 61 deletions(-)

diff --git a/src/kyupy/sdf.py b/src/kyupy/sdf.py
index a726771..9e7e1b1 100644
--- a/src/kyupy/sdf.py
+++ b/src/kyupy/sdf.py
@@ -40,6 +40,9 @@ class DelayFile:
 
         All IOPATH delays for a node ``n`` are annotated to the line connected to the input pin specified in the IOPATH.
 
+        Only supports two delvals per delval_list. First delval is rising/posedge, second delval is falling/negedge
+        transition at the output of the IOPATH (SDF spec, pp. 3-17).
+
         * Axis 0: dataset (usually 3 datasets per SDF-file)
         * Axis 1: line index (e.g. ``n.ins[0]``, ``n.ins[1]``)
         * Axis 2: polarity of the transition at the IOPATH-input (e.g. at ``n.ins[0]`` or ``n.ins[1]``), 0='rising/posedge', 1='falling/negedge'
diff --git a/src/kyupy/techlib.py b/src/kyupy/techlib.py
index 472e263..c686083 100644
--- a/src/kyupy/techlib.py
+++ b/src/kyupy/techlib.py
@@ -289,6 +289,8 @@ SDFFASRX{1,2}$  input(D,CLK,RSTB,SETB,SE,SI) output(Q,QN) DR=AND2(D,RSTB) SET=IN
 SDFFASX{1,2}$   input(D,CLK,SETB,SE,SI)      output(Q,QN) SET=INV1(SETB) DS=OR2(D,SET) DI=MUX21(DS,SI,SE) Q=DFF(DI,CLK) QN=INV1(Q) ;
 SDFFSSRX{1,2}$  input(CLK,D,RSTB,SETB,SI,SE) output(Q,QN) DR=AND2(D,RSTB) SET=INV1(SETB) DRS=OR2(DR,SET) DI=MUX21(DRS,SI,SE) Q=DFF(DI,CLK) QN=INV1(Q) ;
 SDFFX{1,2}$     input(D,CLK,SE,SI)           output(Q,QN) DI=MUX21(D,SI,SE) Q=DFF(DI,CLK) QN=INV1(Q) ;
+
+LATCHX{1,2}$ input(D,CLK) output(Q,QN) Q=LATCH(D,CLK) QN=INV1(Q) ;
 """.replace('$','{,_LVT,_HVT}'))
 
 
@@ -378,4 +380,6 @@ SDFFASRX{1,2}$  input(D,CLK,RSTB,SETB,SE,SI) output(Q,QN) DR=AND2(D,RSTB) SET=IN
 SDFFASX{1,2}$   input(D,CLK,SETB,SE,SI)      output(Q,QN) SET=INV1(SETB) DS=OR2(D,SET) DI=MUX21(DS,SI,SE) Q=DFF(DI,CLK) QN=INV1(Q) ;
 SDFFSSRX{1,2}$  input(CLK,D,RSTB,SETB,SI,SE) output(Q,QN) DR=AND2(D,RSTB) SET=INV1(SETB) DRS=OR2(DR,SET) DI=MUX21(DRS,SI,SE) Q=DFF(DI,CLK) QN=INV1(Q) ;
 SDFFX{1,2}$     input(D,CLK,SE,SI)           output(Q,QN) DI=MUX21(D,SI,SE) Q=DFF(DI,CLK) QN=INV1(Q) ;
+
+LATCHX{1,2}$ input(D,CLK) output(Q,QN) Q=LATCH(D,CLK) QN=INV1(Q) ;
 """.replace('$','_RVT'))
\ No newline at end of file
diff --git a/src/kyupy/wave_sim.py b/src/kyupy/wave_sim.py
index c9f25e6..4c0656c 100644
--- a/src/kyupy/wave_sim.py
+++ b/src/kyupy/wave_sim.py
@@ -93,8 +93,9 @@ class WaveSim(sim.SimOps):
         self.nbytes = sum([a.nbytes for a in (self.c, self.s, self.c_locs, self.c_caps, self.ops, self.simctl_int)])
 
     def __repr__(self):
-        return f'<{type(self).__name__} {self.circuit.name} sims={self.sims} ops={len(self.ops)} ' + \
-               f'levels={len(self.level_starts)} mem={hr_bytes(self.nbytes)}>'
+        dev = 'GPU' if hasattr(self.c, 'copy_to_host') else 'CPU'
+        return f'{{name: "{self.circuit.name}", device: "{dev}", sims: {self.sims}, ops: {len(self.ops)}, ' + \
+               f'levels: {len(self.level_starts)}, nbytes: {self.nbytes}}}'
 
     def s_to_c(self):
         """Transfers values of sequential elements and primary inputs to the combinational portion.
@@ -198,27 +199,27 @@ def _wave_eval(op, cbuf, c_locs, c_caps, sim, delays, simctl_int, seed=0):
         if a == current_t:
             a_cur += 1
             inputs ^= 1
-            thresh = delays[a_idx, 0, z_val]
-            a = cbuf[a_mem + a_cur, sim] + delays[a_idx, 0, z_val]
-            next_t = cbuf[a_mem + a_cur, sim] + delays[a_idx, 0, z_val ^ 1]
+            thresh = delays[a_idx, a_cur & 1, z_val]
+            a = cbuf[a_mem + a_cur, sim] + delays[a_idx, a_cur & 1, z_val]
+            next_t = cbuf[a_mem + a_cur, sim] + delays[a_idx, (a_cur & 1) ^ 1, z_val ^ 1]
         elif b == current_t:
             b_cur += 1
             inputs ^= 2
-            thresh = delays[b_idx, 0, z_val]
-            b = cbuf[b_mem + b_cur, sim] + delays[b_idx, 0, z_val]
-            next_t = cbuf[b_mem + b_cur, sim] + delays[b_idx, 0, z_val ^ 1]
+            thresh = delays[b_idx, b_cur & 1, z_val]
+            b = cbuf[b_mem + b_cur, sim] + delays[b_idx, b_cur & 1, z_val]
+            next_t = cbuf[b_mem + b_cur, sim] + delays[b_idx, (b_cur & 1) ^ 1, z_val ^ 1]
         elif c == current_t:
             c_cur += 1
             inputs ^= 4
-            thresh = delays[c_idx, 0, z_val]
-            c = cbuf[c_mem + c_cur, sim] + delays[c_idx, 0, z_val]
-            next_t = cbuf[c_mem + c_cur, sim] + delays[c_idx, 0, z_val ^ 1]
+            thresh = delays[c_idx, c_cur & 1, z_val]
+            c = cbuf[c_mem + c_cur, sim] + delays[c_idx, c_cur & 1, z_val]
+            next_t = cbuf[c_mem + c_cur, sim] + delays[c_idx, (c_cur & 1) ^ 1, z_val ^ 1]
         else:
             d_cur += 1
             inputs ^= 8
-            thresh = delays[d_idx, 0, z_val]
-            d = cbuf[d_mem + d_cur, sim] + delays[d_idx, 0, z_val]
-            next_t = cbuf[d_mem + d_cur, sim] + delays[d_idx, 0, z_val ^ 1]
+            thresh = delays[d_idx, d_cur & 1, z_val]
+            d = cbuf[d_mem + d_cur, sim] + delays[d_idx, d_cur & 1, z_val]
+            next_t = cbuf[d_mem + d_cur, sim] + delays[d_idx, (d_cur & 1) ^ 1, z_val ^ 1]
 
         if (z_cur & 1) != ((lut >> inputs) & 1):
             # we generate an edge in z_mem, if ...
@@ -240,10 +241,10 @@ def _wave_eval(op, cbuf, c_locs, c_caps, sim, delays, simctl_int, seed=0):
 
             # output value of cell changed. update all delayed inputs.
             z_val = z_val ^ 1
-            a = cbuf[a_mem + a_cur, sim] + delays[a_idx, 0, z_val]
-            b = cbuf[b_mem + b_cur, sim] + delays[b_idx, 0, z_val]
-            c = cbuf[c_mem + c_cur, sim] + delays[c_idx, 0, z_val]
-            d = cbuf[d_mem + d_cur, sim] + delays[d_idx, 0, z_val]
+            a = cbuf[a_mem + a_cur, sim] + delays[a_idx, a_cur & 1, z_val]
+            b = cbuf[b_mem + b_cur, sim] + delays[b_idx, b_cur & 1, z_val]
+            c = cbuf[c_mem + c_cur, sim] + delays[c_idx, c_cur & 1, z_val]
+            d = cbuf[d_mem + d_cur, sim] + delays[d_idx, d_cur & 1, z_val]
 
         current_t = min(a, b, c, d)
 
diff --git a/tests/test_sdf.py b/tests/test_sdf.py
index 79e586d..6a47b86 100644
--- a/tests/test_sdf.py
+++ b/tests/test_sdf.py
@@ -1,5 +1,7 @@
-from kyupy import sdf, verilog
+import numpy as np
 
+from kyupy import sdf, verilog, bench
+from kyupy.wave_sim import WaveSim, TMAX, TMIN
 
 def test_parse():
     test = '''
@@ -16,55 +18,55 @@ def test_parse():
     (TEMPERATURE 25.00:25.00:25.00)
     (TIMESCALE 1ns)
     (CELL
-      (CELLTYPE "b14")
-      (INSTANCE)
-      (DELAY
-        (ABSOLUTE
-        (INTERCONNECT U621/ZN U19246/IN1 (0.000:0.000:0.000))
-        (INTERCONNECT U13292/QN U19246/IN2 (0.001:0.001:0.001))
-        (INTERCONNECT U15050/QN U19247/IN1 (0.000:0.000:0.000))
-        (INTERCONNECT U13293/QN U19247/IN2 (0.000:0.000:0.000) (0.000:0.000:0.000))
+        (CELLTYPE "b14")
+        (INSTANCE)
+        (DELAY
+            (ABSOLUTE
+                (INTERCONNECT U621/ZN U19246/IN1 (0.000:0.000:0.000))
+                (INTERCONNECT U13292/QN U19246/IN2 (0.001:0.001:0.001))
+                (INTERCONNECT U15050/QN U19247/IN1 (0.000:0.000:0.000))
+                (INTERCONNECT U13293/QN U19247/IN2 (0.000:0.000:0.000) (0.000:0.000:0.000))
+            )
         )
-      )
     )
     (CELL
-      (CELLTYPE "INVX2")
-      (INSTANCE U78)
-      (DELAY
-        (ABSOLUTE
-        (IOPATH INP ZN (0.201:0.227:0.227) (0.250:0.271:0.271))
+        (CELLTYPE "INVX2")
+        (INSTANCE U78)
+        (DELAY
+            (ABSOLUTE
+                (IOPATH INP ZN (0.201:0.227:0.227) (0.250:0.271:0.271))
+            )
         )
-      )
     )
     (CELL
-      (CELLTYPE "SDFFARX1")
-      (INSTANCE reg3_reg_1_0)
-      (DELAY
-        (ABSOLUTE
-        (IOPATH (posedge CLK) Q (0.707:0.710:0.710) (0.737:0.740:0.740))
-        (IOPATH (negedge RSTB) Q () (0.909:0.948:0.948))
-        (IOPATH (posedge CLK) QN (0.585:0.589:0.589) (0.545:0.550:0.550))
-        (IOPATH (negedge RSTB) QN (1.546:1.593:1.593) ())
+        (CELLTYPE "SDFFARX1")
+        (INSTANCE reg3_reg_1_0)
+        (DELAY
+            (ABSOLUTE
+                (IOPATH (posedge CLK) Q (0.707:0.710:0.710) (0.737:0.740:0.740))
+                (IOPATH (negedge RSTB) Q () (0.909:0.948:0.948))
+                (IOPATH (posedge CLK) QN (0.585:0.589:0.589) (0.545:0.550:0.550))
+                (IOPATH (negedge RSTB) QN (1.546:1.593:1.593) ())
+            )
         )
-      )
-      (TIMINGCHECK
-        (WIDTH (posedge CLK) (0.284:0.284:0.284))
-        (WIDTH (negedge CLK) (0.642:0.642:0.642))
-        (SETUP (posedge D) (posedge CLK) (0.544:0.553:0.553))
-        (SETUP (negedge D) (posedge CLK) (0.620:0.643:0.643))
-        (HOLD (posedge D) (posedge CLK) (-0.321:-0.331:-0.331))
-        (HOLD (negedge D) (posedge CLK) (-0.196:-0.219:-0.219))
-        (RECOVERY (posedge RSTB) (posedge CLK) (-1.390:-1.455:-1.455))
-        (HOLD (posedge RSTB) (posedge CLK) (1.448:1.509:1.509))
-        (SETUP (posedge SE) (posedge CLK) (0.662:0.670:0.670))
-        (SETUP (negedge SE) (posedge CLK) (0.698:0.702:0.702))
-        (HOLD (posedge SE) (posedge CLK) (-0.435:-0.444:-0.444))
-        (HOLD (negedge SE) (posedge CLK) (-0.291:-0.295:-0.295))
-        (SETUP (posedge SI) (posedge CLK) (0.544:0.544:0.544))
-        (SETUP (negedge SI) (posedge CLK) (0.634:0.688:0.688))
-        (HOLD (posedge SI) (posedge CLK) (-0.317:-0.318:-0.318))
-        (HOLD (negedge SI) (posedge CLK) (-0.198:-0.247:-0.247))
-        (WIDTH (negedge RSTB) (0.345:0.345:0.345))
+        (TIMINGCHECK
+            (WIDTH (posedge CLK) (0.284:0.284:0.284))
+            (WIDTH (negedge CLK) (0.642:0.642:0.642))
+            (SETUP (posedge D) (posedge CLK) (0.544:0.553:0.553))
+            (SETUP (negedge D) (posedge CLK) (0.620:0.643:0.643))
+            (HOLD (posedge D) (posedge CLK) (-0.321:-0.331:-0.331))
+            (HOLD (negedge D) (posedge CLK) (-0.196:-0.219:-0.219))
+            (RECOVERY (posedge RSTB) (posedge CLK) (-1.390:-1.455:-1.455))
+            (HOLD (posedge RSTB) (posedge CLK) (1.448:1.509:1.509))
+            (SETUP (posedge SE) (posedge CLK) (0.662:0.670:0.670))
+            (SETUP (negedge SE) (posedge CLK) (0.698:0.702:0.702))
+            (HOLD (posedge SE) (posedge CLK) (-0.435:-0.444:-0.444))
+            (HOLD (negedge SE) (posedge CLK) (-0.291:-0.295:-0.295))
+            (SETUP (posedge SI) (posedge CLK) (0.544:0.544:0.544))
+            (SETUP (negedge SI) (posedge CLK) (0.634:0.688:0.688))
+            (HOLD (posedge SI) (posedge CLK) (-0.317:-0.318:-0.318))
+            (HOLD (negedge SI) (posedge CLK) (-0.198:-0.247:-0.247))
+            (WIDTH (negedge RSTB) (0.345:0.345:0.345))
     )))
     '''
     df = sdf.parse(test)
@@ -97,3 +99,132 @@ def test_gates(mydir):
 
     assert lt[and_b, 0, 0] == 0.375
     assert lt[and_b, 0, 1] == 0.370
+
+
+def test_nand_xor():
+    c = bench.parse("""
+        input(A1,A2)
+        output(lt_1237_U91,lt_1237_U92)
+        lt_1237_U91 = NAND2X0_RVT(A1,A2)
+        lt_1237_U92 = XOR2X1_RVT(A1,A2)
+        """)
+    df = sdf.parse("""
+        (DELAYFILE
+            (CELL
+                (CELLTYPE "NAND2X0_RVT")
+                (INSTANCE lt_1237_U91)
+                (DELAY
+                    (ABSOLUTE
+                        (IOPATH A1 Y (0.018:0.022:0.021) (0.017:0.019:0.019))
+                        (IOPATH A2 Y (0.021:0.024:0.024) (0.018:0.021:0.021))
+                    )
+                )
+            )
+            (CELL
+                (CELLTYPE "XOR2X1_RVT")
+                (INSTANCE lt_1237_U92)
+                (DELAY
+                    (ABSOLUTE
+                        (IOPATH (posedge A1) Y (0.035:0.038:0.038) (0.037:0.062:0.062))
+                        (IOPATH (negedge A1) Y (0.035:0.061:0.061) (0.036:0.040:0.040))
+                        (IOPATH (posedge A2) Y (0.042:0.043:0.043) (0.051:0.064:0.064))
+                        (IOPATH (negedge A2) Y (0.041:0.066:0.066) (0.051:0.053:0.053))
+                    )
+                )
+            )
+        )
+        """)
+    d = df.iopaths(c)[1]
+    sim = WaveSim(c, delays=d, sims=16)
+
+    # input A1
+    sim.s[0,0] = [0,1,0,1] * 4  # initial values  0101010101010101
+    sim.s[1,0] = 0.0            # transition time
+    sim.s[2,0] = [0,0,1,1] * 4  # final values    0011001100110011
+
+    # input A2
+    sim.s[0,1] = ([0]*4 + [1]*4)*2  # initial values  0000111100001111
+    sim.s[1,1] = 0.0                # transition time
+    sim.s[2,1] = [0]*8 + [1]*8      # final values    0000000011111111
+
+    # A1:   0FR10FR10FR10FR1
+    # A2:   0000FFFFRRRR1111
+    # nand: 11111RNR1NFF1RF0
+    # xor:  0FR1FPPRRNPF1RF0
+
+    sim.s_to_c()
+    sim.c_prop()
+    sim.c_to_s()
+
+    eat = sim.s[4,2:]
+    lst = sim.s[5,2:]
+
+    # NAND-gate output
+    assert np.allclose(eat[0], [
+        TMAX, TMAX, TMAX, TMAX, TMAX,
+        0.022,  # FF -> rising Y: min(0.022, 0.024)
+        TMAX,   # RF: pulse filtered
+        0.024,  # falling A2 -> rising Y
+        TMAX,
+        TMAX,   # FR: pulse filtered
+        0.021,  # RR -> falling Y: max(0.019, 0.021)
+        0.021,  # rising A2 -> falling Y
+        TMAX,
+        0.022,  # falling A1 -> rising Y
+        0.019,  # rising A1 -> falling Y
+        TMAX
+    ])
+
+    assert np.allclose(lst[0], [
+        TMIN, TMIN, TMIN, TMIN, TMIN,
+        0.022,  # FF -> rising Y: min(0.022, 0.024)
+        TMIN,   # RF: pulse filtered
+        0.024,  # falling A2 -> rising Y
+        TMIN,
+        TMIN,   # FR: pulse filtered
+        0.021,  # RR -> falling Y: max(0.019, 0.021)
+        0.021,  # rising A2 -> falling Y
+        TMIN,
+        0.022,  # falling A1 -> rising Y
+        0.019,  # rising A1 -> falling Y
+        TMIN
+    ])
+
+    #XOR-gate output
+    assert np.allclose(eat[1], [
+        TMAX,
+        0.040,  # A1:F -> Y:F
+        0.038,  # A1:R -> Y:R
+        TMAX,
+        0.053,  # A2:F -> Y:F
+        TMAX,   # P filtered
+        TMAX,   # P filtered
+        0.066,  # A2:F -> Y:R
+        0.043,  # A2:R -> Y:R
+        TMAX,   # N filtered
+        TMAX,   # P filtered
+        0.064,  # A2:R -> Y:F
+        TMAX,
+        0.061,  # A1:F -> Y:R
+        0.062,  # A1:R -> Y:F
+        TMAX,
+    ])
+
+    assert np.allclose(lst[1], [
+        TMIN,
+        0.040,  # A1:F -> Y:F
+        0.038,  # A1:R -> Y:R
+        TMIN,
+        0.053,  # A2:F -> Y:F
+        TMIN,   # P filtered
+        TMIN,   # P filtered
+        0.066,  # A2:F -> Y:R
+        0.043,  # A2:R -> Y:R
+        TMIN,   # N filtered
+        TMIN,   # P filtered
+        0.064,  # A2:R -> Y:F
+        TMIN,
+        0.061,  # A1:F -> Y:R
+        0.062,  # A1:R -> Y:F
+        TMIN,
+    ])
\ No newline at end of file