From 06ef70e1ee7943acf979c1c31968228a8244c588 Mon Sep 17 00:00:00 2001 From: Jojojoppe Date: Mon, 6 Oct 2025 16:25:40 +0200 Subject: [PATCH] Improved NCO: 200MHz --- project.cfg | 4 +- rtl/core/nco_q15.v | 297 ++++++++++++++++++------------------- rtl/core/nco_q15_funcs.vh | 24 --- rtl/toplevel/top_generic.v | 7 +- sim/tb/tb_nco_q15.v | 14 +- 5 files changed, 154 insertions(+), 192 deletions(-) delete mode 100644 rtl/core/nco_q15_funcs.vh diff --git a/project.cfg b/project.cfg index d83fdc7..806296a 100644 --- a/project.cfg +++ b/project.cfg @@ -30,7 +30,7 @@ xst_opts = -vlgincdir rtl files_verilog = rtl/toplevel/top_generic.v rtl/core/nco_q15.v files_con = boards/mimas_v1/constraints.ucf -files_other = rtl/core/nco_q15_funcs.vh +files_other = [target.sim] toolchain = iverilog @@ -42,4 +42,4 @@ ivl_opts = -Irtl #files_sysverilog = files_verilog = sim/tb/tb_nco_q15.v rtl/core/nco_q15.v -files_other = rtl/core/nco_q15_funcs.vh \ No newline at end of file +files_other = \ No newline at end of file diff --git a/rtl/core/nco_q15.v b/rtl/core/nco_q15.v index 9c06e69..df60955 100644 --- a/rtl/core/nco_q15.v +++ b/rtl/core/nco_q15.v @@ -1,174 +1,163 @@ `timescale 1ns/1ps -// ------------------------------------------------------------ -// nco_q15.v -// Tiny DDS/NCO @ FS_HZ sample rate with Q1.15 sine/cos outputs -// - Phase accumulator width: PHASE_BITS (default 32) -// - Quarter-wave LUT: 2^QTR_ADDR_BITS entries (default 64) -// - Clock domain: CLK_HZ (default 120 MHz), creates 1-cycle strobe at FS_HZ -// - Frequency control: write 'ftw' (32-bit tuning word) -// FTW = round(f_out * 2^PHASE_BITS / FS_HZ) -// ------------------------------------------------------------ -module nco_q15 # -( - // -------- Synth parameters -------- - parameter integer PHASE_BITS = 32, // accumulator width - parameter integer QTR_ADDR_BITS = 6, // 64-entry quarter-wave LUT - parameter integer CLK_HZ = 120_000_000, // input clock (Hz) - parameter integer FS_HZ = 40_000 // output sample rate (Hz) -) -( - input wire clk, // CLK_HZ domain - input wire rst_n, // async active-low reset +// ============================================================================= +// Small number controlled oscillator +// Generates a sine and cosine and uses no multiplications, just some logic and +// a 64-entry LUT. It outputs Q15 data (but the LUT is 8 bits wide) +// params: +// -- CLK_HZ : input clock frequency in Hz +// -- FS_HZ : output sample frequency in Hz +// inout: +// -- clk : input clock +// -- rst_n : reset +// -- freq_hz : decimal number of desired generated frequency in Hz, 0-FS/2 +// -- sin_q15/cos_q15 : I and Q outputs +// -- clk_en : output valid strobe +// ============================================================================= +module nco_q15 #( + parameter integer CLK_HZ = 120_000_000, // input clock + parameter integer FS_HZ = 40_000 // sample rate +)( + input wire clk, // CLK_HZ domain + input wire rst_n, // async active-low reset + input wire [31:0] freq_hz, // desired output frequency (Hz), 0..FS_HZ/2 - // Frequency control - input wire [31:0] ftw_in, // Frequency Tuning Word (FTW) - - // Outputs (valid on clk_en rising pulse, i.e., at FS_HZ) - output reg signed [15:0] sin_q15, // signed Q1.15 sine - output reg signed [15:0] cos_q15, // signed Q1.15 cosine - output reg clk_en // 1-cycle strobe @ FS_HZ + output reg signed [15:0] sin_q15, // Q1.15 sine + output reg signed [15:0] cos_q15, // Q1.15 cosine + output reg clk_en // 1-cycle strobe @ FS_HZ ); + localparam integer PHASE_FRAC_BITS = 6; + localparam integer QTR_ADDR_BITS = 6; + localparam integer PHASE_BITS = 2 + QTR_ADDR_BITS + PHASE_FRAC_BITS; + localparam integer DIV = CLK_HZ / FS_HZ; + localparam integer SHIFT = 32; - `include "core/nco_q15_funcs.vh" + // Fixed-point reciprocal (constant): RECIP = round( (2^PHASE_BITS * 2^SHIFT) / FS_HZ ) + localparam [63:0] RECIP = ( ((64'd1 << PHASE_BITS) << SHIFT) + (FS_HZ/2) ) / FS_HZ; - // ========================================================== - // Sample-rate enable: divide CLK_HZ down to FS_HZ - // - DIV must be an integer (CLK_HZ / FS_HZ). - // - clk_en goes high for exactly 1 clk cycle every DIV cycles. - // ========================================================== - localparam integer DIV = CLK_HZ / FS_HZ; + // Sample-rate tick + function integer clog2; + input integer v; integer r; begin r=0; v=v-1; while (v>0) begin v=v>>1; r=r+1; end clog2=r; end + endfunction - // Optional safety for misconfiguration (ignored by synthesis tools): - initial if (CLK_HZ % FS_HZ != 0) - $display("WARNING nco_q15: CLK_HZ (%0d) not divisible by FS_HZ (%0d).", CLK_HZ, FS_HZ); - - // Counter width: enough bits to count to DIV-1 (use a generous fixed width to keep 2001-compatible) - // If you prefer, replace 16 with $clog2(DIV) on a tool that supports it well. - reg [15:0] tick_cnt; - - always @(posedge clk) begin - if (!rst_n) begin - tick_cnt <= 16'd0; - clk_en <= 1'b0; - end else begin - if (tick_cnt == DIV-1) begin - tick_cnt <= 16'd0; - clk_en <= 1'b1; // 1-cycle pulse - end else begin - tick_cnt <= tick_cnt + 16'd1; - clk_en <= 1'b0; - end + reg [clog2(DIV)-1:0] tick_cnt; + always @(posedge clk or negedge rst_n) begin + if (!rst_n) begin tick_cnt <= 0; clk_en <= 1'b0; end + else begin + clk_en <= 1'b0; + if (tick_cnt == DIV-1) begin tick_cnt <= 0; clk_en <= 1'b1; end + else tick_cnt <= tick_cnt + 1'b1; + end end - end - // ========================================================== - // Frequency control register - // - You present ftw_in - // ========================================================== - reg [31:0] ftw; - always @(posedge clk) begin - ftw <= ftw_in; - end + // 32-cycle shift–add multiply: prod = freq_hz * RECIP (no multiplications themself) + // Starts at clk_en, finishes in 32 cycles (<< available cycles per sample). + reg mul_busy; + reg [5:0] mul_i; // 0..31 + reg [31:0] f_reg; + reg [95:0] acc; // accumulator for product (32x64 -> 96b) - // ========================================================== - // Phase accumulators - // - phase_sin advances by FTW once per sample (on clk_en). - // - cosine is generated by a +90° phase lead (π/2), i.e., add 2^(PHASE_BITS-2). - // Here we realize it by deriving phase_cos from phase_sin each sample. - // ========================================================== - reg [PHASE_BITS-1:0] phase_sin, phase_cos; - wire [PHASE_BITS-1:0] phase_cos_plus90 = phase_sin + ({{(PHASE_BITS-2){1'b0}}, 2'b01} << (PHASE_BITS-2)); // +90° + wire [95:0] recip_shift = {{32{1'b0}}, RECIP} << mul_i; // shift constant by i - always @(posedge clk) begin - if (!rst_n) begin - phase_sin <= {PHASE_BITS{1'b0}}; - phase_cos <= {PHASE_BITS{1'b0}}; - end else if (clk_en) begin - phase_sin <= phase_sin + ftw; - // Keep cosine aligned to the same sample using a +90° offset from updated phase_sin - phase_cos <= phase_cos_plus90 + ftw; + always @(posedge clk or negedge rst_n) begin + if (!rst_n) begin + mul_busy <= 1'b0; mul_i <= 6'd0; f_reg <= 32'd0; acc <= 96'd0; + end else begin + if (clk_en && !mul_busy) begin + // kick off a new multiply this sample + mul_busy <= 1'b1; + mul_i <= 6'd0; + f_reg <= (freq_hz > (FS_HZ>>1)) ? (FS_HZ>>1) : freq_hz; // clamp to Nyquist + acc <= 96'd0; + end else if (mul_busy) begin + // add shifted RECIP if bit is set + if (f_reg[mul_i]) acc <= acc + recip_shift; + // next bit + if (mul_i == 6'd31) begin + mul_busy <= 1'b0; // done in 32 cycles + end + mul_i <= mul_i + 6'd1; + end + end end - end - // ========================================================== - // Phase -> quadrant/index - // - q_*: top 2 bits select quadrant (0..3). - // - idx_*_raw: next QTR_ADDR_BITS select a point in 0..π/2. - // - For odd quadrants, mirror the LUT index. - // ========================================================== - wire [1:0] q_sin = phase_sin[PHASE_BITS-1 -: 2]; - wire [1:0] q_cos = phase_cos[PHASE_BITS-1 -: 2]; + // Rounding shift to get FTW; latch when multiply finishes. + reg [PHASE_BITS-1:0] ftw_q; + wire [95:0] acc_round = acc + (96'd1 << (SHIFT-1)); + wire [PHASE_BITS-1:0] ftw_next = acc_round[SHIFT +: PHASE_BITS]; // >> SHIFT - wire [QTR_ADDR_BITS-1:0] idx_sin_raw = phase_sin[PHASE_BITS-3 -: QTR_ADDR_BITS]; - wire [QTR_ADDR_BITS-1:0] idx_cos_raw = phase_cos[PHASE_BITS-3 -: QTR_ADDR_BITS]; - - wire [QTR_ADDR_BITS-1:0] idx_sin = (q_sin[0]) ? ({QTR_ADDR_BITS{1'b1}} - idx_sin_raw) : idx_sin_raw; - wire [QTR_ADDR_BITS-1:0] idx_cos = (q_cos[0]) ? ({QTR_ADDR_BITS{1'b1}} - idx_cos_raw) : idx_cos_raw; - - // ========================================================== - // Quarter-wave 8-bit LUT (0..255). 64 entries map 0..π/2. - // ========================================================== - wire [7:0] lut_sin_mag, lut_cos_mag; - sine_qtr_lut64 u_lut_s (.addr(idx_sin), .dout(lut_sin_mag)); - sine_qtr_lut64 u_lut_c (.addr(idx_cos), .dout(lut_cos_mag)); - - // ========================================================== - // Sign & scale to Q1.15 - // - Scale: <<7 so 255 becomes 32640 (slightly below 32767). - // - Apply sign by quadrant: quadrants 2 & 3 are negative. - // ========================================================== - wire signed [15:0] sin_mag_q15 = {1'b0, lut_sin_mag, 7'd0}; - wire signed [15:0] cos_mag_q15 = {1'b0, lut_cos_mag, 7'd0}; - - wire sin_neg = (q_sin >= 2); // quadrants 2,3 - wire cos_neg = (q_cos >= 2); - - wire signed [15:0] sin_q15_next = sin_neg ? -sin_mag_q15 : sin_mag_q15; - wire signed [15:0] cos_q15_next = cos_neg ? -cos_mag_q15 : cos_mag_q15; - - // ========================================================== - // Output registers (update on clk_en) - // ========================================================== - always @(posedge clk) begin - if (!rst_n) begin - sin_q15 <= 16'sd0; - cos_q15 <= 16'sd0; - end else begin - sin_q15 <= sin_q15_next; - cos_q15 <= cos_q15_next; + always @(posedge clk or negedge rst_n) begin + if (!rst_n) ftw_q <= {PHASE_BITS{1'b0}}; + else if (!mul_busy) ftw_q <= ftw_next; // update once product ready end - end + + // Phase accumulator (advance at FS_HZ) + reg [PHASE_BITS-1:0] phase; + always @(posedge clk or negedge rst_n) begin + if (!rst_n) phase <= {PHASE_BITS{1'b0}}; + else if (clk_en) phase <= phase + ftw_q; + end + + // Cosine phase = sine phase + 90° + wire [PHASE_BITS-1:0] phase_cos = phase + ({{(PHASE_BITS-2){1'b0}}, 2'b01} << (PHASE_BITS-2)); + + // Quadrant & LUT index + wire [1:0] q_sin = phase [PHASE_BITS-1 -: 2]; + wire [1:0] q_cos = phase_cos[PHASE_BITS-1 -: 2]; + + wire [QTR_ADDR_BITS-1:0] idx_sin_raw = phase [PHASE_BITS-3 -: QTR_ADDR_BITS]; + wire [QTR_ADDR_BITS-1:0] idx_cos_raw = phase_cos[PHASE_BITS-3 -: QTR_ADDR_BITS]; + + wire [QTR_ADDR_BITS-1:0] idx_sin = q_sin[0] ? ({QTR_ADDR_BITS{1'b1}} - idx_sin_raw) : idx_sin_raw; + wire [QTR_ADDR_BITS-1:0] idx_cos = q_cos[0] ? ({QTR_ADDR_BITS{1'b1}} - idx_cos_raw) : idx_cos_raw; + + // 64-entry quarter-wave LUT + wire [7:0] mag_sin_u8, mag_cos_u8; + sine_qtr_lut64 u_lut_s (.addr(idx_sin), .dout(mag_sin_u8)); + sine_qtr_lut64 u_lut_c (.addr(idx_cos), .dout(mag_cos_u8)); + + // Scale to Q1.15 and apply sign + wire signed [15:0] mag_sin_q15 = {1'b0, mag_sin_u8, 7'd0}; + wire signed [15:0] mag_cos_q15 = {1'b0, mag_cos_u8, 7'd0}; + wire sin_neg = (q_sin >= 2); + wire cos_neg = (q_cos >= 2); + + wire signed [15:0] sin_next = sin_neg ? -mag_sin_q15 : mag_sin_q15; + wire signed [15:0] cos_next = cos_neg ? -mag_cos_q15 : mag_cos_q15; + + always @(posedge clk or negedge rst_n) begin + if (!rst_n) begin + sin_q15 <= 16'sd0; cos_q15 <= 16'sd0; + end else if (clk_en) begin + sin_q15 <= sin_next; cos_q15 <= cos_next; + end + end + endmodule -// ------------------------------------------------------------ -// 64-entry quarter-wave sine ROM (8-bit), indices 0..63 map 0..π/2. -// Plain Verilog case ROM. You can regenerate these with a script -// or replace with a vendor-specific ROM for BRAM inference. -// ------------------------------------------------------------ module sine_qtr_lut64( - input wire [5:0] addr, - output reg [7:0] dout + input wire [5:0] addr, + output reg [7:0] dout ); - always @* begin - case (addr) - 6'd0: dout = 8'd0; 6'd1: dout = 8'd6; 6'd2: dout = 8'd13; 6'd3: dout = 8'd19; - 6'd4: dout = 8'd25; 6'd5: dout = 8'd31; 6'd6: dout = 8'd37; 6'd7: dout = 8'd44; - 6'd8: dout = 8'd50; 6'd9: dout = 8'd56; 6'd10: dout = 8'd62; 6'd11: dout = 8'd68; - 6'd12: dout = 8'd74; 6'd13: dout = 8'd80; 6'd14: dout = 8'd86; 6'd15: dout = 8'd92; - 6'd16: dout = 8'd98; 6'd17: dout = 8'd103; 6'd18: dout = 8'd109; 6'd19: dout = 8'd115; - 6'd20: dout = 8'd120; 6'd21: dout = 8'd126; 6'd22: dout = 8'd131; 6'd23: dout = 8'd136; - 6'd24: dout = 8'd142; 6'd25: dout = 8'd147; 6'd26: dout = 8'd152; 6'd27: dout = 8'd157; - 6'd28: dout = 8'd162; 6'd29: dout = 8'd167; 6'd30: dout = 8'd171; 6'd31: dout = 8'd176; - 6'd32: dout = 8'd180; 6'd33: dout = 8'd185; 6'd34: dout = 8'd189; 6'd35: dout = 8'd193; - 6'd36: dout = 8'd197; 6'd37: dout = 8'd201; 6'd38: dout = 8'd205; 6'd39: dout = 8'd208; - 6'd40: dout = 8'd212; 6'd41: dout = 8'd215; 6'd42: dout = 8'd219; 6'd43: dout = 8'd222; - 6'd44: dout = 8'd225; 6'd45: dout = 8'd228; 6'd46: dout = 8'd231; 6'd47: dout = 8'd233; - 6'd48: dout = 8'd236; 6'd49: dout = 8'd238; 6'd50: dout = 8'd240; 6'd51: dout = 8'd242; - 6'd52: dout = 8'd244; 6'd53: dout = 8'd246; 6'd54: dout = 8'd247; 6'd55: dout = 8'd249; - 6'd56: dout = 8'd250; 6'd57: dout = 8'd251; 6'd58: dout = 8'd252; 6'd59: dout = 8'd253; - 6'd60: dout = 8'd254; 6'd61: dout = 8'd254; 6'd62: dout = 8'd255; 6'd63: dout = 8'd255; - default: dout=8'd0; - endcase - end + always @* begin + case (addr) + 6'd0: dout = 8'd0; 6'd1: dout = 8'd6; 6'd2: dout = 8'd13; 6'd3: dout = 8'd19; + 6'd4: dout = 8'd25; 6'd5: dout = 8'd31; 6'd6: dout = 8'd37; 6'd7: dout = 8'd44; + 6'd8: dout = 8'd50; 6'd9: dout = 8'd56; 6'd10: dout = 8'd62; 6'd11: dout = 8'd68; + 6'd12: dout = 8'd74; 6'd13: dout = 8'd80; 6'd14: dout = 8'd86; 6'd15: dout = 8'd92; + 6'd16: dout = 8'd98; 6'd17: dout = 8'd103; 6'd18: dout = 8'd109; 6'd19: dout = 8'd115; + 6'd20: dout = 8'd120; 6'd21: dout = 8'd126; 6'd22: dout = 8'd131; 6'd23: dout = 8'd136; + 6'd24: dout = 8'd142; 6'd25: dout = 8'd147; 6'd26: dout = 8'd152; 6'd27: dout = 8'd157; + 6'd28: dout = 8'd162; 6'd29: dout = 8'd167; 6'd30: dout = 8'd171; 6'd31: dout = 8'd176; + 6'd32: dout = 8'd180; 6'd33: dout = 8'd185; 6'd34: dout = 8'd189; 6'd35: dout = 8'd193; + 6'd36: dout = 8'd197; 6'd37: dout = 8'd201; 6'd38: dout = 8'd205; 6'd39: dout = 8'd208; + 6'd40: dout = 8'd212; 6'd41: dout = 8'd215; 6'd42: dout = 8'd219; 6'd43: dout = 8'd222; + 6'd44: dout = 8'd225; 6'd45: dout = 8'd228; 6'd46: dout = 8'd231; 6'd47: dout = 8'd233; + 6'd48: dout = 8'd236; 6'd49: dout = 8'd238; 6'd50: dout = 8'd240; 6'd51: dout = 8'd242; + 6'd52: dout = 8'd244; 6'd53: dout = 8'd246; 6'd54: dout = 8'd247; 6'd55: dout = 8'd249; + 6'd56: dout = 8'd250; 6'd57: dout = 8'd251; 6'd58: dout = 8'd252; 6'd59: dout = 8'd253; + 6'd60: dout = 8'd254; 6'd61: dout = 8'd254; 6'd62: dout = 8'd255; 6'd63: dout = 8'd255; + default: dout=8'd0; + endcase + end endmodule \ No newline at end of file diff --git a/rtl/core/nco_q15_funcs.vh b/rtl/core/nco_q15_funcs.vh deleted file mode 100644 index def8837..0000000 --- a/rtl/core/nco_q15_funcs.vh +++ /dev/null @@ -1,24 +0,0 @@ -// ========================================================== -// Helper: FTW function (compile-time or runtime call) -// FTW = round( f_hz * 2^PHASE_BITS / FS_HZ ) -// Notes: -// - Accepts integer Hz. -// - Uses 64-bit math to avoid overflow for typical params. -// - Can be called from a testbench or combinational logic that -// prepares 'ftw_in' before asserting 'ftw_we'. -// Example: -// initial begin -// #1; -// $display("FTW 1kHz = 0x%08x", ftw_from_hz(1000)); -// end -// ========================================================== -function [31:0] ftw_from_hz; - input integer f_hz; - input integer phase_bits; - input integer fs_hz; - reg [63:0] numer; - begin - numer = ((64'd1 << phase_bits) * f_hz) + (fs_hz/2); - ftw_from_hz = numer / fs_hz; - end -endfunction \ No newline at end of file diff --git a/rtl/toplevel/top_generic.v b/rtl/toplevel/top_generic.v index 822cffc..1d85f10 100644 --- a/rtl/toplevel/top_generic.v +++ b/rtl/toplevel/top_generic.v @@ -10,8 +10,6 @@ module top_generic( output wire[5:0] r2r ); - `include "core/nco_q15_funcs.vh" - assign led_green = 1'b0; assign led_red = 1'b0; @@ -19,6 +17,7 @@ module top_generic( reg [11:0] count; localparam integer DIV_MAX = 100_000 - 1; // 1 ms tick at 100 MHz reg [16:0] div_counter = 0; // enough bits for 100k (2^17=131072) + reg [31:0] freq; always @(posedge aclk) begin if (!aresetn) begin div_counter <= 0; @@ -34,6 +33,7 @@ module top_generic( div_counter <= div_counter + 1'b1; end end + freq <= count; end @@ -41,12 +41,11 @@ module top_generic( wire clk_en; nco_q15 #( .CLK_HZ(100_000_000), - .PHASE_BITS(16), .FS_HZ(40_000) ) nco ( .clk (aclk), .rst_n (aresetn), - .ftw_in (ftw_from_hz(count, 16, 40_000)), + .freq_hz(freq), .sin_q15(sin_q15), .cos_q15(), .clk_en (clk_en) diff --git a/sim/tb/tb_nco_q15.v b/sim/tb/tb_nco_q15.v index d5ae656..0ca2e0a 100644 --- a/sim/tb/tb_nco_q15.v +++ b/sim/tb/tb_nco_q15.v @@ -1,8 +1,6 @@ `timescale 1ns/1ps module tb_nco_q15(); - `include "core/nco_q15_funcs.vh" - // Clock and reset generation reg clk; reg resetn; @@ -20,26 +18,26 @@ module tb_nco_q15(); end; - reg [31:0] ftw_in; + reg [31:0] freq; wire [15:0] sin_q15; wire [15:0] cos_q15; wire out_en; - nco_q15 #(.PHASE_BITS(16)) nco ( + nco_q15 #(.CLK_HZ(120_000_000), .FS_HZ(40_000)) nco ( .clk (clk), .rst_n (resetn), - .ftw_in (ftw_in), + .freq_hz(freq), .sin_q15(sin_q15), .cos_q15(cos_q15), .clk_en (out_en) ); initial begin - ftw_in = 32'h0; + freq = 32'h0; #100 - ftw_in = ftw_from_hz(1000, 16, 40000); + freq = 32'd1000; #2_500_000 - ftw_in = ftw_from_hz(2000, 16, 40000); + freq = 32'd2000; end; endmodule \ No newline at end of file