Improved NCO: 200MHz

This commit is contained in:
Jojojoppe
2025-10-06 16:25:40 +02:00
parent 1e9d7b7680
commit 06ef70e1ee
5 changed files with 154 additions and 192 deletions

View File

@@ -30,7 +30,7 @@ xst_opts = -vlgincdir rtl
files_verilog = rtl/toplevel/top_generic.v
rtl/core/nco_q15.v
files_con = boards/mimas_v1/constraints.ucf
files_other = rtl/core/nco_q15_funcs.vh
files_other =
[target.sim]
toolchain = iverilog
@@ -42,4 +42,4 @@ ivl_opts = -Irtl
#files_sysverilog =
files_verilog = sim/tb/tb_nco_q15.v
rtl/core/nco_q15.v
files_other = rtl/core/nco_q15_funcs.vh
files_other =

View File

@@ -1,174 +1,163 @@
`timescale 1ns/1ps
// ------------------------------------------------------------
// nco_q15.v
// Tiny DDS/NCO @ FS_HZ sample rate with Q1.15 sine/cos outputs
// - Phase accumulator width: PHASE_BITS (default 32)
// - Quarter-wave LUT: 2^QTR_ADDR_BITS entries (default 64)
// - Clock domain: CLK_HZ (default 120 MHz), creates 1-cycle strobe at FS_HZ
// - Frequency control: write 'ftw' (32-bit tuning word)
// FTW = round(f_out * 2^PHASE_BITS / FS_HZ)
// ------------------------------------------------------------
module nco_q15 #
(
// -------- Synth parameters --------
parameter integer PHASE_BITS = 32, // accumulator width
parameter integer QTR_ADDR_BITS = 6, // 64-entry quarter-wave LUT
parameter integer CLK_HZ = 120_000_000, // input clock (Hz)
parameter integer FS_HZ = 40_000 // output sample rate (Hz)
)
(
input wire clk, // CLK_HZ domain
input wire rst_n, // async active-low reset
// =============================================================================
// Small number controlled oscillator
// Generates a sine and cosine and uses no multiplications, just some logic and
// a 64-entry LUT. It outputs Q15 data (but the LUT is 8 bits wide)
// params:
// -- CLK_HZ : input clock frequency in Hz
// -- FS_HZ : output sample frequency in Hz
// inout:
// -- clk : input clock
// -- rst_n : reset
// -- freq_hz : decimal number of desired generated frequency in Hz, 0-FS/2
// -- sin_q15/cos_q15 : I and Q outputs
// -- clk_en : output valid strobe
// =============================================================================
module nco_q15 #(
parameter integer CLK_HZ = 120_000_000, // input clock
parameter integer FS_HZ = 40_000 // sample rate
)(
input wire clk, // CLK_HZ domain
input wire rst_n, // async active-low reset
input wire [31:0] freq_hz, // desired output frequency (Hz), 0..FS_HZ/2
// Frequency control
input wire [31:0] ftw_in, // Frequency Tuning Word (FTW)
// Outputs (valid on clk_en rising pulse, i.e., at FS_HZ)
output reg signed [15:0] sin_q15, // signed Q1.15 sine
output reg signed [15:0] cos_q15, // signed Q1.15 cosine
output reg clk_en // 1-cycle strobe @ FS_HZ
output reg signed [15:0] sin_q15, // Q1.15 sine
output reg signed [15:0] cos_q15, // Q1.15 cosine
output reg clk_en // 1-cycle strobe @ FS_HZ
);
localparam integer PHASE_FRAC_BITS = 6;
localparam integer QTR_ADDR_BITS = 6;
localparam integer PHASE_BITS = 2 + QTR_ADDR_BITS + PHASE_FRAC_BITS;
localparam integer DIV = CLK_HZ / FS_HZ;
localparam integer SHIFT = 32;
`include "core/nco_q15_funcs.vh"
// Fixed-point reciprocal (constant): RECIP = round( (2^PHASE_BITS * 2^SHIFT) / FS_HZ )
localparam [63:0] RECIP = ( ((64'd1 << PHASE_BITS) << SHIFT) + (FS_HZ/2) ) / FS_HZ;
// ==========================================================
// Sample-rate enable: divide CLK_HZ down to FS_HZ
// - DIV must be an integer (CLK_HZ / FS_HZ).
// - clk_en goes high for exactly 1 clk cycle every DIV cycles.
// ==========================================================
localparam integer DIV = CLK_HZ / FS_HZ;
// Sample-rate tick
function integer clog2;
input integer v; integer r; begin r=0; v=v-1; while (v>0) begin v=v>>1; r=r+1; end clog2=r; end
endfunction
// Optional safety for misconfiguration (ignored by synthesis tools):
initial if (CLK_HZ % FS_HZ != 0)
$display("WARNING nco_q15: CLK_HZ (%0d) not divisible by FS_HZ (%0d).", CLK_HZ, FS_HZ);
// Counter width: enough bits to count to DIV-1 (use a generous fixed width to keep 2001-compatible)
// If you prefer, replace 16 with $clog2(DIV) on a tool that supports it well.
reg [15:0] tick_cnt;
always @(posedge clk) begin
if (!rst_n) begin
tick_cnt <= 16'd0;
clk_en <= 1'b0;
end else begin
if (tick_cnt == DIV-1) begin
tick_cnt <= 16'd0;
clk_en <= 1'b1; // 1-cycle pulse
end else begin
tick_cnt <= tick_cnt + 16'd1;
clk_en <= 1'b0;
end
reg [clog2(DIV)-1:0] tick_cnt;
always @(posedge clk or negedge rst_n) begin
if (!rst_n) begin tick_cnt <= 0; clk_en <= 1'b0; end
else begin
clk_en <= 1'b0;
if (tick_cnt == DIV-1) begin tick_cnt <= 0; clk_en <= 1'b1; end
else tick_cnt <= tick_cnt + 1'b1;
end
end
end
// ==========================================================
// Frequency control register
// - You present ftw_in
// ==========================================================
reg [31:0] ftw;
always @(posedge clk) begin
ftw <= ftw_in;
end
// 32-cycle shiftadd multiply: prod = freq_hz * RECIP (no multiplications themself)
// Starts at clk_en, finishes in 32 cycles (<< available cycles per sample).
reg mul_busy;
reg [5:0] mul_i; // 0..31
reg [31:0] f_reg;
reg [95:0] acc; // accumulator for product (32x64 -> 96b)
// ==========================================================
// Phase accumulators
// - phase_sin advances by FTW once per sample (on clk_en).
// - cosine is generated by a +90° phase lead (π/2), i.e., add 2^(PHASE_BITS-2).
// Here we realize it by deriving phase_cos from phase_sin each sample.
// ==========================================================
reg [PHASE_BITS-1:0] phase_sin, phase_cos;
wire [PHASE_BITS-1:0] phase_cos_plus90 = phase_sin + ({{(PHASE_BITS-2){1'b0}}, 2'b01} << (PHASE_BITS-2)); // +90°
wire [95:0] recip_shift = {{32{1'b0}}, RECIP} << mul_i; // shift constant by i
always @(posedge clk) begin
if (!rst_n) begin
phase_sin <= {PHASE_BITS{1'b0}};
phase_cos <= {PHASE_BITS{1'b0}};
end else if (clk_en) begin
phase_sin <= phase_sin + ftw;
// Keep cosine aligned to the same sample using a +90° offset from updated phase_sin
phase_cos <= phase_cos_plus90 + ftw;
always @(posedge clk or negedge rst_n) begin
if (!rst_n) begin
mul_busy <= 1'b0; mul_i <= 6'd0; f_reg <= 32'd0; acc <= 96'd0;
end else begin
if (clk_en && !mul_busy) begin
// kick off a new multiply this sample
mul_busy <= 1'b1;
mul_i <= 6'd0;
f_reg <= (freq_hz > (FS_HZ>>1)) ? (FS_HZ>>1) : freq_hz; // clamp to Nyquist
acc <= 96'd0;
end else if (mul_busy) begin
// add shifted RECIP if bit is set
if (f_reg[mul_i]) acc <= acc + recip_shift;
// next bit
if (mul_i == 6'd31) begin
mul_busy <= 1'b0; // done in 32 cycles
end
mul_i <= mul_i + 6'd1;
end
end
end
end
// ==========================================================
// Phase -> quadrant/index
// - q_*: top 2 bits select quadrant (0..3).
// - idx_*_raw: next QTR_ADDR_BITS select a point in 0..π/2.
// - For odd quadrants, mirror the LUT index.
// ==========================================================
wire [1:0] q_sin = phase_sin[PHASE_BITS-1 -: 2];
wire [1:0] q_cos = phase_cos[PHASE_BITS-1 -: 2];
// Rounding shift to get FTW; latch when multiply finishes.
reg [PHASE_BITS-1:0] ftw_q;
wire [95:0] acc_round = acc + (96'd1 << (SHIFT-1));
wire [PHASE_BITS-1:0] ftw_next = acc_round[SHIFT +: PHASE_BITS]; // >> SHIFT
wire [QTR_ADDR_BITS-1:0] idx_sin_raw = phase_sin[PHASE_BITS-3 -: QTR_ADDR_BITS];
wire [QTR_ADDR_BITS-1:0] idx_cos_raw = phase_cos[PHASE_BITS-3 -: QTR_ADDR_BITS];
wire [QTR_ADDR_BITS-1:0] idx_sin = (q_sin[0]) ? ({QTR_ADDR_BITS{1'b1}} - idx_sin_raw) : idx_sin_raw;
wire [QTR_ADDR_BITS-1:0] idx_cos = (q_cos[0]) ? ({QTR_ADDR_BITS{1'b1}} - idx_cos_raw) : idx_cos_raw;
// ==========================================================
// Quarter-wave 8-bit LUT (0..255). 64 entries map 0..π/2.
// ==========================================================
wire [7:0] lut_sin_mag, lut_cos_mag;
sine_qtr_lut64 u_lut_s (.addr(idx_sin), .dout(lut_sin_mag));
sine_qtr_lut64 u_lut_c (.addr(idx_cos), .dout(lut_cos_mag));
// ==========================================================
// Sign & scale to Q1.15
// - Scale: <<7 so 255 becomes 32640 (slightly below 32767).
// - Apply sign by quadrant: quadrants 2 & 3 are negative.
// ==========================================================
wire signed [15:0] sin_mag_q15 = {1'b0, lut_sin_mag, 7'd0};
wire signed [15:0] cos_mag_q15 = {1'b0, lut_cos_mag, 7'd0};
wire sin_neg = (q_sin >= 2); // quadrants 2,3
wire cos_neg = (q_cos >= 2);
wire signed [15:0] sin_q15_next = sin_neg ? -sin_mag_q15 : sin_mag_q15;
wire signed [15:0] cos_q15_next = cos_neg ? -cos_mag_q15 : cos_mag_q15;
// ==========================================================
// Output registers (update on clk_en)
// ==========================================================
always @(posedge clk) begin
if (!rst_n) begin
sin_q15 <= 16'sd0;
cos_q15 <= 16'sd0;
end else begin
sin_q15 <= sin_q15_next;
cos_q15 <= cos_q15_next;
always @(posedge clk or negedge rst_n) begin
if (!rst_n) ftw_q <= {PHASE_BITS{1'b0}};
else if (!mul_busy) ftw_q <= ftw_next; // update once product ready
end
end
// Phase accumulator (advance at FS_HZ)
reg [PHASE_BITS-1:0] phase;
always @(posedge clk or negedge rst_n) begin
if (!rst_n) phase <= {PHASE_BITS{1'b0}};
else if (clk_en) phase <= phase + ftw_q;
end
// Cosine phase = sine phase + 90°
wire [PHASE_BITS-1:0] phase_cos = phase + ({{(PHASE_BITS-2){1'b0}}, 2'b01} << (PHASE_BITS-2));
// Quadrant & LUT index
wire [1:0] q_sin = phase [PHASE_BITS-1 -: 2];
wire [1:0] q_cos = phase_cos[PHASE_BITS-1 -: 2];
wire [QTR_ADDR_BITS-1:0] idx_sin_raw = phase [PHASE_BITS-3 -: QTR_ADDR_BITS];
wire [QTR_ADDR_BITS-1:0] idx_cos_raw = phase_cos[PHASE_BITS-3 -: QTR_ADDR_BITS];
wire [QTR_ADDR_BITS-1:0] idx_sin = q_sin[0] ? ({QTR_ADDR_BITS{1'b1}} - idx_sin_raw) : idx_sin_raw;
wire [QTR_ADDR_BITS-1:0] idx_cos = q_cos[0] ? ({QTR_ADDR_BITS{1'b1}} - idx_cos_raw) : idx_cos_raw;
// 64-entry quarter-wave LUT
wire [7:0] mag_sin_u8, mag_cos_u8;
sine_qtr_lut64 u_lut_s (.addr(idx_sin), .dout(mag_sin_u8));
sine_qtr_lut64 u_lut_c (.addr(idx_cos), .dout(mag_cos_u8));
// Scale to Q1.15 and apply sign
wire signed [15:0] mag_sin_q15 = {1'b0, mag_sin_u8, 7'd0};
wire signed [15:0] mag_cos_q15 = {1'b0, mag_cos_u8, 7'd0};
wire sin_neg = (q_sin >= 2);
wire cos_neg = (q_cos >= 2);
wire signed [15:0] sin_next = sin_neg ? -mag_sin_q15 : mag_sin_q15;
wire signed [15:0] cos_next = cos_neg ? -mag_cos_q15 : mag_cos_q15;
always @(posedge clk or negedge rst_n) begin
if (!rst_n) begin
sin_q15 <= 16'sd0; cos_q15 <= 16'sd0;
end else if (clk_en) begin
sin_q15 <= sin_next; cos_q15 <= cos_next;
end
end
endmodule
// ------------------------------------------------------------
// 64-entry quarter-wave sine ROM (8-bit), indices 0..63 map 0..π/2.
// Plain Verilog case ROM. You can regenerate these with a script
// or replace with a vendor-specific ROM for BRAM inference.
// ------------------------------------------------------------
module sine_qtr_lut64(
input wire [5:0] addr,
output reg [7:0] dout
input wire [5:0] addr,
output reg [7:0] dout
);
always @* begin
case (addr)
6'd0: dout = 8'd0; 6'd1: dout = 8'd6; 6'd2: dout = 8'd13; 6'd3: dout = 8'd19;
6'd4: dout = 8'd25; 6'd5: dout = 8'd31; 6'd6: dout = 8'd37; 6'd7: dout = 8'd44;
6'd8: dout = 8'd50; 6'd9: dout = 8'd56; 6'd10: dout = 8'd62; 6'd11: dout = 8'd68;
6'd12: dout = 8'd74; 6'd13: dout = 8'd80; 6'd14: dout = 8'd86; 6'd15: dout = 8'd92;
6'd16: dout = 8'd98; 6'd17: dout = 8'd103; 6'd18: dout = 8'd109; 6'd19: dout = 8'd115;
6'd20: dout = 8'd120; 6'd21: dout = 8'd126; 6'd22: dout = 8'd131; 6'd23: dout = 8'd136;
6'd24: dout = 8'd142; 6'd25: dout = 8'd147; 6'd26: dout = 8'd152; 6'd27: dout = 8'd157;
6'd28: dout = 8'd162; 6'd29: dout = 8'd167; 6'd30: dout = 8'd171; 6'd31: dout = 8'd176;
6'd32: dout = 8'd180; 6'd33: dout = 8'd185; 6'd34: dout = 8'd189; 6'd35: dout = 8'd193;
6'd36: dout = 8'd197; 6'd37: dout = 8'd201; 6'd38: dout = 8'd205; 6'd39: dout = 8'd208;
6'd40: dout = 8'd212; 6'd41: dout = 8'd215; 6'd42: dout = 8'd219; 6'd43: dout = 8'd222;
6'd44: dout = 8'd225; 6'd45: dout = 8'd228; 6'd46: dout = 8'd231; 6'd47: dout = 8'd233;
6'd48: dout = 8'd236; 6'd49: dout = 8'd238; 6'd50: dout = 8'd240; 6'd51: dout = 8'd242;
6'd52: dout = 8'd244; 6'd53: dout = 8'd246; 6'd54: dout = 8'd247; 6'd55: dout = 8'd249;
6'd56: dout = 8'd250; 6'd57: dout = 8'd251; 6'd58: dout = 8'd252; 6'd59: dout = 8'd253;
6'd60: dout = 8'd254; 6'd61: dout = 8'd254; 6'd62: dout = 8'd255; 6'd63: dout = 8'd255;
default: dout=8'd0;
endcase
end
always @* begin
case (addr)
6'd0: dout = 8'd0; 6'd1: dout = 8'd6; 6'd2: dout = 8'd13; 6'd3: dout = 8'd19;
6'd4: dout = 8'd25; 6'd5: dout = 8'd31; 6'd6: dout = 8'd37; 6'd7: dout = 8'd44;
6'd8: dout = 8'd50; 6'd9: dout = 8'd56; 6'd10: dout = 8'd62; 6'd11: dout = 8'd68;
6'd12: dout = 8'd74; 6'd13: dout = 8'd80; 6'd14: dout = 8'd86; 6'd15: dout = 8'd92;
6'd16: dout = 8'd98; 6'd17: dout = 8'd103; 6'd18: dout = 8'd109; 6'd19: dout = 8'd115;
6'd20: dout = 8'd120; 6'd21: dout = 8'd126; 6'd22: dout = 8'd131; 6'd23: dout = 8'd136;
6'd24: dout = 8'd142; 6'd25: dout = 8'd147; 6'd26: dout = 8'd152; 6'd27: dout = 8'd157;
6'd28: dout = 8'd162; 6'd29: dout = 8'd167; 6'd30: dout = 8'd171; 6'd31: dout = 8'd176;
6'd32: dout = 8'd180; 6'd33: dout = 8'd185; 6'd34: dout = 8'd189; 6'd35: dout = 8'd193;
6'd36: dout = 8'd197; 6'd37: dout = 8'd201; 6'd38: dout = 8'd205; 6'd39: dout = 8'd208;
6'd40: dout = 8'd212; 6'd41: dout = 8'd215; 6'd42: dout = 8'd219; 6'd43: dout = 8'd222;
6'd44: dout = 8'd225; 6'd45: dout = 8'd228; 6'd46: dout = 8'd231; 6'd47: dout = 8'd233;
6'd48: dout = 8'd236; 6'd49: dout = 8'd238; 6'd50: dout = 8'd240; 6'd51: dout = 8'd242;
6'd52: dout = 8'd244; 6'd53: dout = 8'd246; 6'd54: dout = 8'd247; 6'd55: dout = 8'd249;
6'd56: dout = 8'd250; 6'd57: dout = 8'd251; 6'd58: dout = 8'd252; 6'd59: dout = 8'd253;
6'd60: dout = 8'd254; 6'd61: dout = 8'd254; 6'd62: dout = 8'd255; 6'd63: dout = 8'd255;
default: dout=8'd0;
endcase
end
endmodule

View File

@@ -1,24 +0,0 @@
// ==========================================================
// Helper: FTW function (compile-time or runtime call)
// FTW = round( f_hz * 2^PHASE_BITS / FS_HZ )
// Notes:
// - Accepts integer Hz.
// - Uses 64-bit math to avoid overflow for typical params.
// - Can be called from a testbench or combinational logic that
// prepares 'ftw_in' before asserting 'ftw_we'.
// Example:
// initial begin
// #1;
// $display("FTW 1kHz = 0x%08x", ftw_from_hz(1000));
// end
// ==========================================================
function [31:0] ftw_from_hz;
input integer f_hz;
input integer phase_bits;
input integer fs_hz;
reg [63:0] numer;
begin
numer = ((64'd1 << phase_bits) * f_hz) + (fs_hz/2);
ftw_from_hz = numer / fs_hz;
end
endfunction

View File

@@ -10,8 +10,6 @@ module top_generic(
output wire[5:0] r2r
);
`include "core/nco_q15_funcs.vh"
assign led_green = 1'b0;
assign led_red = 1'b0;
@@ -19,6 +17,7 @@ module top_generic(
reg [11:0] count;
localparam integer DIV_MAX = 100_000 - 1; // 1 ms tick at 100 MHz
reg [16:0] div_counter = 0; // enough bits for 100k (2^17=131072)
reg [31:0] freq;
always @(posedge aclk) begin
if (!aresetn) begin
div_counter <= 0;
@@ -34,6 +33,7 @@ module top_generic(
div_counter <= div_counter + 1'b1;
end
end
freq <= count;
end
@@ -41,12 +41,11 @@ module top_generic(
wire clk_en;
nco_q15 #(
.CLK_HZ(100_000_000),
.PHASE_BITS(16),
.FS_HZ(40_000)
) nco (
.clk (aclk),
.rst_n (aresetn),
.ftw_in (ftw_from_hz(count, 16, 40_000)),
.freq_hz(freq),
.sin_q15(sin_q15),
.cos_q15(),
.clk_en (clk_en)

View File

@@ -1,8 +1,6 @@
`timescale 1ns/1ps
module tb_nco_q15();
`include "core/nco_q15_funcs.vh"
// Clock and reset generation
reg clk;
reg resetn;
@@ -20,26 +18,26 @@ module tb_nco_q15();
end;
reg [31:0] ftw_in;
reg [31:0] freq;
wire [15:0] sin_q15;
wire [15:0] cos_q15;
wire out_en;
nco_q15 #(.PHASE_BITS(16)) nco (
nco_q15 #(.CLK_HZ(120_000_000), .FS_HZ(40_000)) nco (
.clk (clk),
.rst_n (resetn),
.ftw_in (ftw_in),
.freq_hz(freq),
.sin_q15(sin_q15),
.cos_q15(cos_q15),
.clk_en (out_en)
);
initial begin
ftw_in = 32'h0;
freq = 32'h0;
#100
ftw_in = ftw_from_hz(1000, 16, 40000);
freq = 32'd1000;
#2_500_000
ftw_in = ftw_from_hz(2000, 16, 40000);
freq = 32'd2000;
end;
endmodule