Improved NCO: 200MHz

2025-10-06 16:25:40 +02:00
parent 1e9d7b7680
commit 06ef70e1ee
5 changed files with 154 additions and 192 deletions
--- a/project.cfg
+++ b/project.cfg
@@ -30,7 +30,7 @@ xst_opts            = -vlgincdir rtl
 files_verilog       = rtl/toplevel/top_generic.v
                      rtl/core/nco_q15.v
 files_con           = boards/mimas_v1/constraints.ucf
-files_other         = rtl/core/nco_q15_funcs.vh
+files_other         = 

 [target.sim]
 toolchain           = iverilog
@@ -42,4 +42,4 @@ ivl_opts            = -Irtl
 #files_sysverilog    = 
 files_verilog       = sim/tb/tb_nco_q15.v
                      rtl/core/nco_q15.v
-files_other         = rtl/core/nco_q15_funcs.vh
+files_other         = 
--- a/rtl/core/nco_q15.v
+++ b/rtl/core/nco_q15.v
@@ -1,174 +1,163 @@
 `timescale 1ns/1ps
-// ------------------------------------------------------------
-// nco_q15.v
-// Tiny DDS/NCO @ FS_HZ sample rate with Q1.15 sine/cos outputs
-// - Phase accumulator width: PHASE_BITS (default 32)
-// - Quarter-wave LUT: 2^QTR_ADDR_BITS entries (default 64)
-// - Clock domain: CLK_HZ (default 120 MHz), creates 1-cycle strobe at FS_HZ
-// - Frequency control: write 'ftw' (32-bit tuning word)
-//   FTW = round(f_out * 2^PHASE_BITS / FS_HZ)
-// ------------------------------------------------------------

-module nco_q15 #
-(
-  // -------- Synth parameters --------
-  parameter integer PHASE_BITS    = 32,           // accumulator width
-  parameter integer QTR_ADDR_BITS = 6,            // 64-entry quarter-wave LUT
-  parameter integer CLK_HZ        = 120_000_000,  // input clock (Hz)
-  parameter integer FS_HZ         = 40_000        // output sample rate (Hz)
-)
-(
-  input  wire        clk,         // CLK_HZ domain
-  input  wire        rst_n,       // async active-low reset
+// =============================================================================
+// Small number controlled oscillator
+// Generates a sine and cosine and uses no multiplications, just some logic and
+// a 64-entry LUT. It outputs Q15 data (but the LUT is 8 bits wide)
+// params:
+//      -- CLK_HZ : input clock frequency in Hz
+//      -- FS_HZ : output sample frequency in Hz
+// inout:
+//      -- clk : input clock
+//      -- rst_n : reset
+//      -- freq_hz : decimal number of desired generated frequency in Hz, 0-FS/2
+//      -- sin_q15/cos_q15 : I and Q outputs
+//      -- clk_en : output valid strobe
+// =============================================================================
+module nco_q15 #(
+	parameter integer CLK_HZ = 120_000_000,         // input clock
+	parameter integer FS_HZ = 40_000                // sample rate
+)(
+	input wire clk,                                 // CLK_HZ domain
+	input wire rst_n,                               // async active-low reset
+	input wire [31:0] freq_hz,                      // desired output frequency (Hz), 0..FS_HZ/2

-  // Frequency control
-  input  wire [31:0] ftw_in,      // Frequency Tuning Word (FTW)
-
-  // Outputs (valid on clk_en rising pulse, i.e., at FS_HZ)
-  output reg  signed [15:0] sin_q15, // signed Q1.15 sine
-  output reg  signed [15:0] cos_q15, // signed Q1.15 cosine
-  output reg                 clk_en  // 1-cycle strobe @ FS_HZ
+	output reg signed [15:0] sin_q15,               // Q1.15 sine
+	output reg signed [15:0] cos_q15,               // Q1.15 cosine
+	output reg clk_en                               // 1-cycle strobe @ FS_HZ
 );
+	localparam integer PHASE_FRAC_BITS = 6;
+	localparam integer QTR_ADDR_BITS = 6;
+    localparam integer PHASE_BITS = 2 + QTR_ADDR_BITS + PHASE_FRAC_BITS;
+    localparam integer DIV        = CLK_HZ / FS_HZ;
+    localparam integer SHIFT      = 32;

-  `include "core/nco_q15_funcs.vh"
+    // Fixed-point reciprocal (constant): RECIP = round( (2^PHASE_BITS * 2^SHIFT) / FS_HZ )
+    localparam [63:0] RECIP = ( ((64'd1 << PHASE_BITS) << SHIFT) + (FS_HZ/2) ) / FS_HZ;

-  // ==========================================================
-  // Sample-rate enable: divide CLK_HZ down to FS_HZ
-  // - DIV must be an integer (CLK_HZ / FS_HZ).
-  // - clk_en goes high for exactly 1 clk cycle every DIV cycles.
-  // ==========================================================
-  localparam integer DIV = CLK_HZ / FS_HZ;
+    // Sample-rate tick
+    function integer clog2;
+        input integer v; integer r; begin r=0; v=v-1; while (v>0) begin v=v>>1; r=r+1; end clog2=r; end
+    endfunction

-  // Optional safety for misconfiguration (ignored by synthesis tools):
-  initial if (CLK_HZ % FS_HZ != 0)
-    $display("WARNING nco_q15: CLK_HZ (%0d) not divisible by FS_HZ (%0d).", CLK_HZ, FS_HZ);
-
-  // Counter width: enough bits to count to DIV-1 (use a generous fixed width to keep 2001-compatible)
-  // If you prefer, replace 16 with $clog2(DIV) on a tool that supports it well.
-  reg [15:0] tick_cnt;
-
-  always @(posedge clk) begin
-    if (!rst_n) begin
-      tick_cnt <= 16'd0;
-      clk_en   <= 1'b0;
-    end else begin
-      if (tick_cnt == DIV-1) begin
-        tick_cnt <= 16'd0;
-        clk_en   <= 1'b1;   // 1-cycle pulse
-      end else begin
-        tick_cnt <= tick_cnt + 16'd1;
-        clk_en   <= 1'b0;
-      end
+    reg [clog2(DIV)-1:0] tick_cnt;
+    always @(posedge clk or negedge rst_n) begin
+        if (!rst_n) begin tick_cnt <= 0; clk_en <= 1'b0; end
+        else begin
+        clk_en <= 1'b0;
+        if (tick_cnt == DIV-1) begin tick_cnt <= 0; clk_en <= 1'b1; end
+        else tick_cnt <= tick_cnt + 1'b1;
+        end
    end
-  end

-  // ==========================================================
-  // Frequency control register
-  // - You present ftw_in
-  // ==========================================================
-  reg [31:0] ftw;
-  always @(posedge clk) begin
-    ftw <= ftw_in;
-  end
+    // 32-cycle shift–add multiply: prod = freq_hz * RECIP  (no multiplications themself)
+    // Starts at clk_en, finishes in 32 cycles (<< available cycles per sample).
+    reg        mul_busy;
+    reg  [5:0] mul_i;           // 0..31
+    reg [31:0] f_reg;
+    reg [95:0] acc;             // accumulator for product (32x64 -> 96b)

-  // ==========================================================
-  // Phase accumulators
-  // - phase_sin advances by FTW once per sample (on clk_en).
-  // - cosine is generated by a +90° phase lead (π/2), i.e., add 2^(PHASE_BITS-2).
-  //   Here we realize it by deriving phase_cos from phase_sin each sample.
-  // ==========================================================
-  reg  [PHASE_BITS-1:0] phase_sin, phase_cos;
-  wire [PHASE_BITS-1:0] phase_cos_plus90 = phase_sin + ({{(PHASE_BITS-2){1'b0}}, 2'b01} << (PHASE_BITS-2)); // +90°
+    wire [95:0] recip_shift = {{32{1'b0}}, RECIP} << mul_i; // shift constant by i

-  always @(posedge clk) begin
-    if (!rst_n) begin
-      phase_sin <= {PHASE_BITS{1'b0}};
-      phase_cos <= {PHASE_BITS{1'b0}};
-    end else if (clk_en) begin
-      phase_sin <= phase_sin + ftw;
-      // Keep cosine aligned to the same sample using a +90° offset from updated phase_sin
-      phase_cos <= phase_cos_plus90 + ftw;
+    always @(posedge clk or negedge rst_n) begin
+        if (!rst_n) begin
+        mul_busy <= 1'b0; mul_i <= 6'd0; f_reg <= 32'd0; acc <= 96'd0;
+        end else begin
+        if (clk_en && !mul_busy) begin
+            // kick off a new multiply this sample
+            mul_busy <= 1'b1;
+            mul_i    <= 6'd0;
+            f_reg    <= (freq_hz > (FS_HZ>>1)) ? (FS_HZ>>1) : freq_hz; // clamp to Nyquist
+            acc      <= 96'd0;
+        end else if (mul_busy) begin
+            // add shifted RECIP if bit is set
+            if (f_reg[mul_i]) acc <= acc + recip_shift;
+            // next bit
+            if (mul_i == 6'd31) begin
+            mul_busy <= 1'b0;  // done in 32 cycles
+            end
+            mul_i <= mul_i + 6'd1;
+        end
+        end
    end
-  end

-  // ==========================================================
-  // Phase -> quadrant/index
-  // - q_*: top 2 bits select quadrant (0..3).
-  // - idx_*_raw: next QTR_ADDR_BITS select a point in 0..π/2.
-  // - For odd quadrants, mirror the LUT index.
-  // ==========================================================
-  wire [1:0] q_sin = phase_sin[PHASE_BITS-1 -: 2];
-  wire [1:0] q_cos = phase_cos[PHASE_BITS-1 -: 2];
+    // Rounding shift to get FTW; latch when multiply finishes.
+    reg [PHASE_BITS-1:0] ftw_q;
+    wire [95:0] acc_round = acc + (96'd1 << (SHIFT-1));
+    wire [PHASE_BITS-1:0] ftw_next = acc_round[SHIFT +: PHASE_BITS]; // >> SHIFT

-  wire [QTR_ADDR_BITS-1:0] idx_sin_raw = phase_sin[PHASE_BITS-3 -: QTR_ADDR_BITS];
-  wire [QTR_ADDR_BITS-1:0] idx_cos_raw = phase_cos[PHASE_BITS-3 -: QTR_ADDR_BITS];
-
-  wire [QTR_ADDR_BITS-1:0] idx_sin = (q_sin[0]) ? ({QTR_ADDR_BITS{1'b1}} - idx_sin_raw) : idx_sin_raw;
-  wire [QTR_ADDR_BITS-1:0] idx_cos = (q_cos[0]) ? ({QTR_ADDR_BITS{1'b1}} - idx_cos_raw) : idx_cos_raw;
-
-  // ==========================================================
-  // Quarter-wave 8-bit LUT (0..255). 64 entries map 0..π/2.
-  // ==========================================================
-  wire [7:0] lut_sin_mag, lut_cos_mag;
-  sine_qtr_lut64 u_lut_s (.addr(idx_sin), .dout(lut_sin_mag));
-  sine_qtr_lut64 u_lut_c (.addr(idx_cos), .dout(lut_cos_mag));
-
-  // ==========================================================
-  // Sign & scale to Q1.15
-  // - Scale: <<7 so 255 becomes 32640 (slightly below 32767).
-  // - Apply sign by quadrant: quadrants 2 & 3 are negative.
-  // ==========================================================
-  wire signed [15:0] sin_mag_q15 = {1'b0, lut_sin_mag, 7'd0};
-  wire signed [15:0] cos_mag_q15 = {1'b0, lut_cos_mag, 7'd0};
-
-  wire sin_neg = (q_sin >= 2); // quadrants 2,3
-  wire cos_neg = (q_cos >= 2);
-
-  wire signed [15:0] sin_q15_next = sin_neg ? -sin_mag_q15 : sin_mag_q15;
-  wire signed [15:0] cos_q15_next = cos_neg ? -cos_mag_q15 : cos_mag_q15;
-
-  // ==========================================================
-  // Output registers (update on clk_en)
-  // ==========================================================
-  always @(posedge clk) begin
-    if (!rst_n) begin
-      sin_q15 <= 16'sd0;
-      cos_q15 <= 16'sd0;
-    end else begin
-      sin_q15 <= sin_q15_next;
-      cos_q15 <= cos_q15_next;
+    always @(posedge clk or negedge rst_n) begin
+        if (!rst_n) ftw_q <= {PHASE_BITS{1'b0}};
+        else if (!mul_busy) ftw_q <= ftw_next; // update once product ready
    end
-  end
+
+    // Phase accumulator (advance at FS_HZ)
+    reg [PHASE_BITS-1:0] phase;
+    always @(posedge clk or negedge rst_n) begin
+        if (!rst_n)      phase <= {PHASE_BITS{1'b0}};
+        else if (clk_en) phase <= phase + ftw_q;
+    end
+
+    // Cosine phase = sine phase + 90°
+    wire [PHASE_BITS-1:0] phase_cos = phase + ({{(PHASE_BITS-2){1'b0}}, 2'b01} << (PHASE_BITS-2));
+
+    // Quadrant & LUT index
+    wire [1:0] q_sin = phase    [PHASE_BITS-1 -: 2];
+    wire [1:0] q_cos = phase_cos[PHASE_BITS-1 -: 2];
+
+    wire [QTR_ADDR_BITS-1:0] idx_sin_raw = phase    [PHASE_BITS-3 -: QTR_ADDR_BITS];
+    wire [QTR_ADDR_BITS-1:0] idx_cos_raw = phase_cos[PHASE_BITS-3 -: QTR_ADDR_BITS];
+
+    wire [QTR_ADDR_BITS-1:0] idx_sin = q_sin[0] ? ({QTR_ADDR_BITS{1'b1}} - idx_sin_raw) : idx_sin_raw;
+    wire [QTR_ADDR_BITS-1:0] idx_cos = q_cos[0] ? ({QTR_ADDR_BITS{1'b1}} - idx_cos_raw) : idx_cos_raw;
+
+    // 64-entry quarter-wave LUT
+    wire [7:0] mag_sin_u8, mag_cos_u8;
+    sine_qtr_lut64 u_lut_s (.addr(idx_sin), .dout(mag_sin_u8));
+    sine_qtr_lut64 u_lut_c (.addr(idx_cos), .dout(mag_cos_u8));
+
+    // Scale to Q1.15 and apply sign
+    wire signed [15:0] mag_sin_q15 = {1'b0, mag_sin_u8, 7'd0};
+    wire signed [15:0] mag_cos_q15 = {1'b0, mag_cos_u8, 7'd0};
+    wire sin_neg = (q_sin >= 2);
+    wire cos_neg = (q_cos >= 2);
+
+    wire signed [15:0] sin_next = sin_neg ? -mag_sin_q15 : mag_sin_q15;
+    wire signed [15:0] cos_next = cos_neg ? -mag_cos_q15 : mag_cos_q15;
+
+    always @(posedge clk or negedge rst_n) begin
+        if (!rst_n) begin
+        sin_q15 <= 16'sd0; cos_q15 <= 16'sd0;
+        end else if (clk_en) begin
+        sin_q15 <= sin_next; cos_q15 <= cos_next;
+        end
+    end
+
 endmodule

-// ------------------------------------------------------------
-// 64-entry quarter-wave sine ROM (8-bit), indices 0..63 map 0..π/2.
-// Plain Verilog case ROM. You can regenerate these with a script
-// or replace with a vendor-specific ROM for BRAM inference.
-// ------------------------------------------------------------
 module sine_qtr_lut64(
-  input  wire [5:0] addr,
-  output reg  [7:0] dout
+	input wire [5:0] addr,
+	output reg [7:0] dout
 );
-  always @* begin
-    case (addr)
-      6'd0: dout = 8'd0; 6'd1: dout = 8'd6; 6'd2: dout = 8'd13; 6'd3: dout = 8'd19;
-      6'd4: dout = 8'd25; 6'd5: dout = 8'd31; 6'd6: dout = 8'd37; 6'd7: dout = 8'd44;
-      6'd8: dout = 8'd50; 6'd9: dout = 8'd56; 6'd10: dout = 8'd62; 6'd11: dout = 8'd68;
-      6'd12: dout = 8'd74; 6'd13: dout = 8'd80; 6'd14: dout = 8'd86; 6'd15: dout = 8'd92;
-      6'd16: dout = 8'd98; 6'd17: dout = 8'd103; 6'd18: dout = 8'd109; 6'd19: dout = 8'd115;
-      6'd20: dout = 8'd120; 6'd21: dout = 8'd126; 6'd22: dout = 8'd131; 6'd23: dout = 8'd136;
-      6'd24: dout = 8'd142; 6'd25: dout = 8'd147; 6'd26: dout = 8'd152; 6'd27: dout = 8'd157;
-      6'd28: dout = 8'd162; 6'd29: dout = 8'd167; 6'd30: dout = 8'd171; 6'd31: dout = 8'd176;
-      6'd32: dout = 8'd180; 6'd33: dout = 8'd185; 6'd34: dout = 8'd189; 6'd35: dout = 8'd193;
-      6'd36: dout = 8'd197; 6'd37: dout = 8'd201; 6'd38: dout = 8'd205; 6'd39: dout = 8'd208;
-      6'd40: dout = 8'd212; 6'd41: dout = 8'd215; 6'd42: dout = 8'd219; 6'd43: dout = 8'd222;
-      6'd44: dout = 8'd225; 6'd45: dout = 8'd228; 6'd46: dout = 8'd231; 6'd47: dout = 8'd233;
-      6'd48: dout = 8'd236; 6'd49: dout = 8'd238; 6'd50: dout = 8'd240; 6'd51: dout = 8'd242;
-      6'd52: dout = 8'd244; 6'd53: dout = 8'd246; 6'd54: dout = 8'd247; 6'd55: dout = 8'd249;
-      6'd56: dout = 8'd250; 6'd57: dout = 8'd251; 6'd58: dout = 8'd252; 6'd59: dout = 8'd253;
-      6'd60: dout = 8'd254; 6'd61: dout = 8'd254; 6'd62: dout = 8'd255; 6'd63: dout = 8'd255;
-      default: dout=8'd0;
-    endcase
-  end
+	always @* begin
+		case (addr)
+			6'd0: dout = 8'd0;    6'd1: dout = 8'd6;    6'd2: dout = 8'd13;   6'd3: dout = 8'd19;
+			6'd4: dout = 8'd25;   6'd5: dout = 8'd31;   6'd6: dout = 8'd37;   6'd7: dout = 8'd44;
+			6'd8: dout = 8'd50;   6'd9: dout = 8'd56;   6'd10: dout = 8'd62;  6'd11: dout = 8'd68;
+			6'd12: dout = 8'd74;  6'd13: dout = 8'd80;  6'd14: dout = 8'd86;  6'd15: dout = 8'd92;
+			6'd16: dout = 8'd98;  6'd17: dout = 8'd103; 6'd18: dout = 8'd109; 6'd19: dout = 8'd115;
+			6'd20: dout = 8'd120; 6'd21: dout = 8'd126; 6'd22: dout = 8'd131; 6'd23: dout = 8'd136;
+			6'd24: dout = 8'd142; 6'd25: dout = 8'd147; 6'd26: dout = 8'd152; 6'd27: dout = 8'd157;
+			6'd28: dout = 8'd162; 6'd29: dout = 8'd167; 6'd30: dout = 8'd171; 6'd31: dout = 8'd176;
+			6'd32: dout = 8'd180; 6'd33: dout = 8'd185; 6'd34: dout = 8'd189; 6'd35: dout = 8'd193;
+			6'd36: dout = 8'd197; 6'd37: dout = 8'd201; 6'd38: dout = 8'd205; 6'd39: dout = 8'd208;
+			6'd40: dout = 8'd212; 6'd41: dout = 8'd215; 6'd42: dout = 8'd219; 6'd43: dout = 8'd222;
+			6'd44: dout = 8'd225; 6'd45: dout = 8'd228; 6'd46: dout = 8'd231; 6'd47: dout = 8'd233;
+			6'd48: dout = 8'd236; 6'd49: dout = 8'd238; 6'd50: dout = 8'd240; 6'd51: dout = 8'd242;
+			6'd52: dout = 8'd244; 6'd53: dout = 8'd246; 6'd54: dout = 8'd247; 6'd55: dout = 8'd249;
+			6'd56: dout = 8'd250; 6'd57: dout = 8'd251; 6'd58: dout = 8'd252; 6'd59: dout = 8'd253;
+			6'd60: dout = 8'd254; 6'd61: dout = 8'd254; 6'd62: dout = 8'd255; 6'd63: dout = 8'd255;
+			default: dout=8'd0;
+		endcase
+	end
 endmodule
--- a/rtl/core/nco_q15_funcs.vh
+++ b/rtl/core/nco_q15_funcs.vh
@@ -1,24 +0,0 @@
-// ==========================================================
-// Helper: FTW function (compile-time or runtime call)
-//   FTW = round( f_hz * 2^PHASE_BITS / FS_HZ )
-// Notes:
-// - Accepts integer Hz.
-// - Uses 64-bit math to avoid overflow for typical params.
-// - Can be called from a testbench or combinational logic that
-//   prepares 'ftw_in' before asserting 'ftw_we'.
-// Example:
-//   initial begin
-//     #1;
-//     $display("FTW 1kHz = 0x%08x", ftw_from_hz(1000));
-//   end
-// ==========================================================
-function [31:0] ftw_from_hz;
-  input integer f_hz;
-  input integer phase_bits;
-  input integer fs_hz;
-  reg   [63:0] numer;
-  begin
-    numer       = ((64'd1 << phase_bits) * f_hz) + (fs_hz/2);
-    ftw_from_hz = numer / fs_hz;
-  end
-endfunction
--- a/rtl/toplevel/top_generic.v
+++ b/rtl/toplevel/top_generic.v
@@ -10,8 +10,6 @@ module top_generic(
    output wire[5:0] r2r
 );

-    `include "core/nco_q15_funcs.vh"
-
    assign led_green = 1'b0;
    assign led_red = 1'b0;

@@ -19,6 +17,7 @@ module top_generic(
    reg [11:0] count;
    localparam integer DIV_MAX = 100_000 - 1; // 1 ms tick at 100 MHz
    reg [16:0] div_counter = 0;               // enough bits for 100k (2^17=131072)
+    reg [31:0] freq;
    always @(posedge aclk) begin
        if (!aresetn) begin
            div_counter <= 0;
@@ -34,6 +33,7 @@ module top_generic(
                div_counter <= div_counter + 1'b1;
            end
        end
+        freq <= count;
    end


@@ -41,12 +41,11 @@ module top_generic(
    wire clk_en;
    nco_q15 #(
        .CLK_HZ(100_000_000),
-        .PHASE_BITS(16),
        .FS_HZ(40_000)
    ) nco (
        .clk    (aclk),
        .rst_n  (aresetn),
-        .ftw_in (ftw_from_hz(count, 16, 40_000)),
+        .freq_hz(freq),
        .sin_q15(sin_q15),
        .cos_q15(),
        .clk_en (clk_en)
--- a/sim/tb/tb_nco_q15.v
+++ b/sim/tb/tb_nco_q15.v
@@ -1,8 +1,6 @@
 `timescale 1ns/1ps

 module tb_nco_q15();
-    `include "core/nco_q15_funcs.vh"
-
    // Clock and reset generation
    reg clk;
    reg resetn;
@@ -20,26 +18,26 @@ module tb_nco_q15();
    end;


-    reg [31:0] ftw_in;
+    reg [31:0] freq;
    wire [15:0] sin_q15;
    wire [15:0] cos_q15;
    wire out_en;

-    nco_q15 #(.PHASE_BITS(16)) nco (
+    nco_q15 #(.CLK_HZ(120_000_000), .FS_HZ(40_000)) nco (
        .clk    (clk),
        .rst_n  (resetn),
-        .ftw_in (ftw_in),
+        .freq_hz(freq),
        .sin_q15(sin_q15),
        .cos_q15(cos_q15),
        .clk_en (out_en)
    );

    initial begin
-        ftw_in = 32'h0;
+        freq = 32'h0;
        #100
-        ftw_in = ftw_from_hz(1000, 16, 40000);
+        freq = 32'd1000;
        #2_500_000
-        ftw_in = ftw_from_hz(2000, 16, 40000);
+        freq = 32'd2000;
    end;

 endmodule