diff --git a/project.cfg b/project.cfg index 19f79c6..6857882 100644 --- a/project.cfg +++ b/project.cfg @@ -133,7 +133,7 @@ device = xc6slx9 package = tqg144 speedgrade = -2 toplevel = top_generic -xst_opts = -vlgincdir rtl/util +xst_opts = -vlgincdir rtl/util -keep_hierarchy yes files_verilog = rtl/toplevel/top_generic.v rtl/util/conv.vh rtl/core/nco_q15.v @@ -147,6 +147,7 @@ files_verilog = rtl/toplevel/top_generic.v rtl/core/cdc_strobe_data.v rtl/core/cdc_req_resp.v rtl/core/mcu.v + rtl/core/arbiter.v rtl/core/mem_jtag_writable.v # Arch rtl/arch/spartan-6/lvds_comparator.v @@ -190,6 +191,7 @@ files_verilog = rtl/toplevel/top_generic.v rtl/wb/wb_gpio.v rtl/wb/wb_gpio_banks.v rtl/wb/wb_mux.v + rtl/wb/wb_arbiter.v rtl/wb/jtag_wb_bridge.v rtl/wb/wb_timer.v diff --git a/rtl/core/arbiter.v b/rtl/core/arbiter.v new file mode 100644 index 0000000..17c2e74 --- /dev/null +++ b/rtl/core/arbiter.v @@ -0,0 +1,138 @@ +/** + * Module: arbiter + * + * Description: + * A look ahead, round-robing parameterized arbiter. + * + * <> request + * each bit is controlled by an actor and each actor can 'request' ownership + * of the shared resource by bring high its request bit. + * + * <> grant + * when an actor has been given ownership of shared resource its 'grant' bit + * is driven high + * + * <> select + * binary representation of the grant signal (optional use) + * + * <> active + * is brought high by the arbiter when (any) actor has been given ownership + * of shared resource. + * + * + * Created: Sat Jun 1 20:26:44 EDT 2013 + * + * Author: Berin Martini // berin.martini@gmail.com + */ + `ifndef _arbiter_ `define _arbiter_ + `include "../util/clog2.vh" + +module arbiter + #(parameter + NUM_PORTS = 6, + SEL_WIDTH = ((NUM_PORTS > 1) ? `CLOG2(NUM_PORTS) : 1)) + (input wire clk, + input wire rst, + input wire [NUM_PORTS-1:0] request, + output reg [NUM_PORTS-1:0] grant, + output reg [SEL_WIDTH-1:0] select, + output reg active +); + + /** + * Local parameters + */ + + localparam WRAP_LENGTH = 2*NUM_PORTS; + + + // Find First 1 - Start from MSB and count downwards, returns 0 when no + // bit set + function [SEL_WIDTH-1:0] ff1 ( + input [NUM_PORTS-1:0] in + ); + reg set; + integer i; + + begin + set = 1'b0; + ff1 = 'b0; + + for (i = 0; i < NUM_PORTS; i = i + 1) begin + if (in[i] & ~set) begin + set = 1'b1; + ff1 = i[0 +: SEL_WIDTH]; + end + end + end + endfunction + + +`ifdef VERBOSE + initial $display("Bus arbiter with %d units", NUM_PORTS); +`endif + + + /** + * Internal signals + */ + + integer yy; + + wire next; + wire [NUM_PORTS-1:0] order; + + reg [NUM_PORTS-1:0] token; + wire [NUM_PORTS-1:0] token_lookahead [NUM_PORTS-1:0]; + wire [WRAP_LENGTH-1:0] token_wrap; + + + /** + * Implementation + */ + + assign token_wrap = {token, token}; + + assign next = ~|(token & request); + + + always @(posedge clk) + grant <= token & request; + + + always @(posedge clk) + select <= ff1(token & request); + + + always @(posedge clk) + active <= |(token & request); + + + always @(posedge clk) + if (rst) token <= 'b1; + else if (next) begin + + for (yy = 0; yy < NUM_PORTS; yy = yy + 1) begin : TOKEN_ + + if (order[yy]) begin + token <= token_lookahead[yy]; + end + end + end + + + genvar xx; + generate + for (xx = 0; xx < NUM_PORTS; xx = xx + 1) begin : ORDER_ + + assign token_lookahead[xx] = token_wrap[xx +: NUM_PORTS]; + + assign order[xx] = |(token_lookahead[xx] & request); + + end + endgenerate + + +endmodule + +`endif // `ifndef _arbiter_ \ No newline at end of file diff --git a/rtl/core/mcu.v b/rtl/core/mcu.v index b888887..f7860a5 100644 --- a/rtl/core/mcu.v +++ b/rtl/core/mcu.v @@ -17,29 +17,39 @@ module mcu #( output wire [31:0] o_GPO_A, output wire [31:0] o_GPO_B, output wire [31:0] o_GPO_C, - output wire [31:0] o_GPO_D, - output wire o_test + output wire [31:0] o_GPO_D ); localparam WITH_CSR = 1; - localparam regs = 32+WITH_CSR*4; localparam rf_width = 8; wire rst; - wire rst_mem_reason; + wire rst_wb; + wire rst_mem_peripherals; + wire rst_cmd_jtag; wire timer_irq; - assign rst = i_rst | rst_mem_reason; - - assign o_test = timer_irq; + assign rst = i_rst | rst_mem_peripherals | rst_cmd_jtag; + // Keep the Wishbone path alive during JTAG "core reset" so memory can be programmed. + assign rst_wb = i_rst; // Busses - // CPU->memory + // CPU<->memory interconnect (CPU is a WB master) wire [31:0] wb_mem_adr; wire [31:0] wb_mem_dat; wire [3:0] wb_mem_sel; wire wb_mem_we; wire wb_mem_stb; - wire [31:0] wb_mem_rdt; - wire wb_mem_ack; + wire [31:0] wb_mem_rdt_cpu; + wire wb_mem_ack_cpu; + + // Interconnect->memory (shared WB slave side) + wire [31:0] wb_mem_adr_s; + wire [31:0] wb_mem_dat_s; + wire [3:0] wb_mem_sel_s; + wire wb_mem_we_s; + wire wb_mem_stb_s; + wire [31:0] wb_mem_rdt_s; + wire wb_mem_ack_s; + // CPU->peripherals wire [31:0] wb_ext_adr; wire [31:0] wb_ext_dat; @@ -48,21 +58,7 @@ module mcu #( wire wb_ext_stb; wire [31:0] wb_ext_rdt; wire wb_ext_ack; - // CPU->RF - wire [6+WITH_CSR:0] rf_waddr; - wire [rf_width-1:0] rf_wdata; - wire rf_wen; - wire [6+WITH_CSR:0] rf_raddr; - wire [rf_width-1:0] rf_rdata; - wire rf_ren; - // combined RF and mem bus to actual RAM - wire [`CLOG2(memsize)-1:0] sram_waddr; - wire [rf_width-1:0] sram_wdata; - wire sram_wen; - wire [`CLOG2(memsize)-1:0] sram_raddr; - wire [rf_width-1:0] sram_rdata; - wire sram_ren; - + // GPIO wire [4*32-1:0] GPO; wire [4*32-1:0] GPI; @@ -75,18 +71,11 @@ module mcu #( assign GPI[32*3-1:32*2] = i_GPI_C; assign GPI[32*4-1:32*3] = i_GPI_D; - // SERV core with mux splitting dbus into mem and ext and - // arbiter combining mem and ibus - // separate rst line to let other hardware keep core under reset - servile #( - .reset_pc(32'h0000_0000), - .reset_strategy("MINI"), - .rf_width(rf_width), + cpu #( .sim(sim), - .with_csr(WITH_CSR), - .with_c(0), - .with_mdu(0) - ) servile ( + .WITH_CSR(WITH_CSR), + .rf_width(rf_width) + ) cpu ( .i_clk(i_clk), .i_rst(rst), .i_timer_irq(timer_irq), @@ -97,8 +86,8 @@ module mcu #( .o_wb_mem_sel(wb_mem_sel), .o_wb_mem_we(wb_mem_we), .o_wb_mem_stb(wb_mem_stb), - .i_wb_mem_rdt(wb_mem_rdt), - .i_wb_mem_ack(wb_mem_ack), + .i_wb_mem_rdt(wb_mem_rdt_cpu), + .i_wb_mem_ack(wb_mem_ack_cpu), //Extension interface .o_wb_ext_adr(wb_ext_adr), @@ -107,80 +96,124 @@ module mcu #( .o_wb_ext_we(wb_ext_we), .o_wb_ext_stb(wb_ext_stb), .i_wb_ext_rdt(wb_ext_rdt), - .i_wb_ext_ack(wb_ext_ack), - - //RF IF - .o_rf_waddr(rf_waddr), - .o_rf_wdata(rf_wdata), - .o_rf_wen(rf_wen), - .o_rf_raddr(rf_raddr), - .o_rf_ren(rf_ren), - .i_rf_rdata(rf_rdata) + .i_wb_ext_ack(wb_ext_ack) ); - // WB arbiter combining RF and mem interfaces into 1 - // Last 128 bytes are used for registers - servile_rf_mem_if #( - .depth(memsize), - .rf_regs(regs) - ) rf_mem_if ( - .i_clk (i_clk), - .i_rst (rst), + generate + if (jtag) begin : gen_jtag_wb + wire [31:0] wb_jtag_adr; + wire [31:0] wb_jtag_dat; + wire [3:0] wb_jtag_sel; + wire wb_jtag_we; + wire wb_jtag_cyc; + wire wb_jtag_stb; + wire [31:0] wb_jtag_rdt; + wire wb_jtag_ack; - .i_waddr(rf_waddr), - .i_wdata(rf_wdata), - .i_wen(rf_wen), - .i_raddr(rf_raddr), - .o_rdata(rf_rdata), - .i_ren(rf_ren), + wire [2*32-1:0] wbm_adr_i; + wire [2*32-1:0] wbm_dat_i; + wire [2*4-1:0] wbm_sel_i; + wire [1:0] wbm_we_i; + wire [1:0] wbm_cyc_i; + wire [1:0] wbm_stb_i; + wire [2*3-1:0] wbm_cti_i; + wire [2*2-1:0] wbm_bte_i; + wire [2*32-1:0] wbm_dat_o; + wire [1:0] wbm_ack_o; + wire [1:0] wbm_err_o; + wire [1:0] wbm_rty_o; - .o_sram_waddr(sram_waddr), - .o_sram_wdata(sram_wdata), - .o_sram_wen(sram_wen), - .o_sram_raddr(sram_raddr), - .i_sram_rdata(sram_rdata), - // .o_sram_ren(sram_ren), + assign wbm_adr_i = {wb_jtag_adr, wb_mem_adr}; + assign wbm_dat_i = {wb_jtag_dat, wb_mem_dat}; + assign wbm_sel_i = {wb_jtag_sel, wb_mem_sel}; + assign wbm_we_i = {wb_jtag_we, wb_mem_we}; + assign wbm_cyc_i = {wb_jtag_cyc, wb_mem_stb}; + assign wbm_stb_i = {wb_jtag_stb, wb_mem_stb}; + assign wbm_cti_i = 6'b0; + assign wbm_bte_i = 4'b0; - .i_wb_adr(wb_mem_adr[`CLOG2(memsize)-1:2]), - .i_wb_stb(wb_mem_stb), - .i_wb_we(wb_mem_we) , - .i_wb_sel(wb_mem_sel), - .i_wb_dat(wb_mem_dat), - .o_wb_rdt(wb_mem_rdt), - .o_wb_ack(wb_mem_ack) + assign wb_mem_rdt_cpu = wbm_dat_o[31:0]; + assign wb_mem_ack_cpu = wbm_ack_o[0]; + assign wb_jtag_rdt = wbm_dat_o[63:32]; + assign wb_jtag_ack = wbm_ack_o[1]; + + wb_arbiter #( + .dw(32), + .aw(32), + .num_masters(2) + ) wb_mem_arbiter ( + .wb_clk_i(i_clk), + .wb_rst_i(rst_wb), + .wbm_adr_i(wbm_adr_i), + .wbm_dat_i(wbm_dat_i), + .wbm_sel_i(wbm_sel_i), + .wbm_we_i(wbm_we_i), + .wbm_cyc_i(wbm_cyc_i), + .wbm_stb_i(wbm_stb_i), + .wbm_cti_i(wbm_cti_i), + .wbm_bte_i(wbm_bte_i), + .wbm_dat_o(wbm_dat_o), + .wbm_ack_o(wbm_ack_o), + .wbm_err_o(wbm_err_o), + .wbm_rty_o(wbm_rty_o), + .wbs_adr_o(wb_mem_adr_s), + .wbs_dat_o(wb_mem_dat_s), + .wbs_sel_o(wb_mem_sel_s), + .wbs_we_o(wb_mem_we_s), + .wbs_cyc_o(), + .wbs_stb_o(wb_mem_stb_s), + .wbs_cti_o(), + .wbs_bte_o(), + .wbs_dat_i(wb_mem_rdt_s), + .wbs_ack_i(wb_mem_ack_s), + .wbs_err_i(1'b0), + .wbs_rty_i(1'b0) + ); + + jtag_wb_bridge #( + .chain(1) + ) jtag_wb ( + .i_clk(i_clk), + .i_rst(i_rst), + .o_wb_adr(wb_jtag_adr), + .o_wb_dat(wb_jtag_dat), + .o_wb_sel(wb_jtag_sel), + .o_wb_we(wb_jtag_we), + .o_wb_cyc(wb_jtag_cyc), + .o_wb_stb(wb_jtag_stb), + .i_wb_rdt(wb_jtag_rdt), + .i_wb_ack(wb_jtag_ack), + .o_cmd_reset(rst_cmd_jtag) + ); + end else begin : gen_no_jtag_wb + assign wb_mem_adr_s = wb_mem_adr; + assign wb_mem_dat_s = wb_mem_dat; + assign wb_mem_sel_s = wb_mem_sel; + assign wb_mem_we_s = wb_mem_we; + assign wb_mem_stb_s = wb_mem_stb; + assign wb_mem_rdt_cpu = wb_mem_rdt_s; + assign wb_mem_ack_cpu = wb_mem_ack_s; + assign rst_cmd_jtag = 1'b0; + end + endgenerate + + memory #( + .memfile(memfile), + .memsize(memsize), + .sim(sim) + ) memory ( + .i_clk(i_clk), + .i_rst(i_rst), + .i_wb_rst(rst_wb), + .i_wb_adr(wb_mem_adr_s), + .i_wb_dat(wb_mem_dat_s), + .i_wb_sel(wb_mem_sel_s), + .i_wb_we(wb_mem_we_s), + .i_wb_stb(wb_mem_stb_s), + .o_wb_rdt(wb_mem_rdt_s), + .o_wb_ack(wb_mem_ack_s) ); - if(jtag) begin - memory_jtag #( - .memfile(memfile), - .depth(memsize), - .sim(sim) - ) mem ( - .i_clk(i_clk), - .i_rst(i_rst), - .i_waddr(sram_waddr), - .i_wdata(sram_wdata), - .i_wen(sram_wen), - .i_raddr(sram_raddr), - .o_rdata(sram_rdata), - .o_core_reset(rst_mem_reason) - ); - end else begin - serving_ram #( - .memfile(memfile), - .depth(memsize), - .sim(sim) - ) mem ( - .i_clk(i_clk), - .i_waddr(sram_waddr), - .i_wdata(sram_wdata), - .i_wen(sram_wen), - .i_raddr(sram_raddr), - .o_rdata(sram_rdata) - ); - assign rst_mem_reason = 1'b0; - end - mcu_peripherals peripherals ( .i_clk(i_clk), .i_rst(rst), @@ -194,7 +227,156 @@ module mcu #( // Peripheral IO .i_gpio(GPI), .o_gpio(GPO), - .o_timer_irq(timer_irq) + .o_timer_irq(timer_irq), + .o_core_reset(rst_mem_peripherals) ); -endmodule \ No newline at end of file +endmodule + +module cpu #( + parameter sim = 1'b0, + parameter WITH_CSR = 1, + parameter rf_width = 8 +)( + input wire i_clk, + input wire i_rst, + input wire i_timer_irq, + // CPU->memory + output wire [31:0] o_wb_mem_adr, + output wire [31:0] o_wb_mem_dat, + output wire [3:0] o_wb_mem_sel, + output wire o_wb_mem_we, + output wire o_wb_mem_stb, + input wire [31:0] i_wb_mem_rdt, + input wire i_wb_mem_ack, + // CPU->peripherals + output wire [31:0] o_wb_ext_adr, + output wire [31:0] o_wb_ext_dat, + output wire [3:0] o_wb_ext_sel, + output wire o_wb_ext_we, + output wire o_wb_ext_stb, + input wire [31:0] i_wb_ext_rdt, + input wire i_wb_ext_ack +); + wire [6+WITH_CSR:0] rf_waddr; + wire [rf_width-1:0] rf_wdata; + wire rf_wen; + wire [6+WITH_CSR:0] rf_raddr; + wire [rf_width-1:0] rf_rdata; + wire rf_ren; + + // SERV core with mux splitting dbus into mem and ext and + // arbiter combining mem and ibus. + servile #( + .reset_pc(32'h0000_0000), + .reset_strategy("MINI"), + .rf_width(rf_width), + .sim(sim), + .with_csr(WITH_CSR), + .with_c(0), + .with_mdu(0) + ) servile ( + .i_clk(i_clk), + .i_rst(i_rst), + .i_timer_irq(i_timer_irq), + + .o_wb_mem_adr(o_wb_mem_adr), + .o_wb_mem_dat(o_wb_mem_dat), + .o_wb_mem_sel(o_wb_mem_sel), + .o_wb_mem_we(o_wb_mem_we), + .o_wb_mem_stb(o_wb_mem_stb), + .i_wb_mem_rdt(i_wb_mem_rdt), + .i_wb_mem_ack(i_wb_mem_ack), + + .o_wb_ext_adr(o_wb_ext_adr), + .o_wb_ext_dat(o_wb_ext_dat), + .o_wb_ext_sel(o_wb_ext_sel), + .o_wb_ext_we(o_wb_ext_we), + .o_wb_ext_stb(o_wb_ext_stb), + .i_wb_ext_rdt(i_wb_ext_rdt), + .i_wb_ext_ack(i_wb_ext_ack), + + .o_rf_waddr(rf_waddr), + .o_rf_wdata(rf_wdata), + .o_rf_wen(rf_wen), + .o_rf_raddr(rf_raddr), + .o_rf_ren(rf_ren), + .i_rf_rdata(rf_rdata) + ); + + serv_rf_ram #( + .width(rf_width), + .csr_regs(WITH_CSR*4) + ) rf_ram ( + .i_clk(i_clk), + .i_waddr(rf_waddr), + .i_wdata(rf_wdata), + .i_wen(rf_wen), + .i_raddr(rf_raddr), + .i_ren(rf_ren), + .o_rdata(rf_rdata) + ); +endmodule + +module memory #( + parameter memfile = "", + parameter memsize = 8192, + parameter sim = 1'b0 +)( + input wire i_clk, + input wire i_rst, + input wire i_wb_rst, + input wire [31:0] i_wb_adr, + input wire [31:0] i_wb_dat, + input wire [3:0] i_wb_sel, + input wire i_wb_we, + input wire i_wb_stb, + output wire [31:0] o_wb_rdt, + output wire o_wb_ack +); + localparam mem_depth = memsize/4; + localparam mem_aw = `CLOG2(mem_depth); + + reg [31:0] mem [0:mem_depth-1] /* verilator public */; + reg [31:0] wb_rdt_r; + reg wb_ack_r; + wire [mem_aw-1:0] wb_word_adr = i_wb_adr[mem_aw+1:2]; + + assign o_wb_rdt = wb_rdt_r; + assign o_wb_ack = wb_ack_r; + + always @(posedge i_clk) begin + if (i_rst || i_wb_rst) begin + wb_ack_r <= 1'b0; + wb_rdt_r <= 32'b0; + end else begin + wb_ack_r <= i_wb_stb & ~wb_ack_r; + + if (i_wb_stb & ~wb_ack_r) begin + wb_rdt_r <= mem[wb_word_adr]; + + if (i_wb_we) begin + if (i_wb_sel[0]) mem[wb_word_adr][7:0] <= i_wb_dat[7:0]; + if (i_wb_sel[1]) mem[wb_word_adr][15:8] <= i_wb_dat[15:8]; + if (i_wb_sel[2]) mem[wb_word_adr][23:16] <= i_wb_dat[23:16]; + if (i_wb_sel[3]) mem[wb_word_adr][31:24] <= i_wb_dat[31:24]; + end + end + end + end + + integer i; + initial begin + if (sim == 1'b1) begin + for (i = 0; i < mem_depth; i = i + 1) + mem[i] = 32'h00000000; + end + if (|memfile) begin + $display("Preloading %m from %s", memfile); + $readmemh(memfile, mem); + end + wb_rdt_r = 32'b0; + wb_ack_r = 1'b0; + end + +endmodule diff --git a/rtl/core/mcu_peripherals.v b/rtl/core/mcu_peripherals.v index 0877196..771445e 100644 --- a/rtl/core/mcu_peripherals.v +++ b/rtl/core/mcu_peripherals.v @@ -14,13 +14,16 @@ module mcu_peripherals ( input wire [4*32-1:0] i_gpio, output wire [4*32-1:0] o_gpio, - output wire o_timer_irq + output wire o_timer_irq, + output wire o_core_reset ); localparam [31:0] GPIO_BASE_ADDR = 32'h4000_0000; localparam [31:0] GPIO_ADDR_MASK = 32'hFFFF_0000; localparam [31:0] TIMER_BASE_ADDR = 32'h4001_0000; localparam [31:0] TIMER_ADDR_MASK = 32'hFFFF_0000; + assign o_core_reset = 1'b0; + wire [2*32-1:0] wbs_adr; wire [2*32-1:0] wbs_dat_w; wire [2*4-1:0] wbs_sel; diff --git a/rtl/toplevel/top_generic.v b/rtl/toplevel/top_generic.v index 41afefc..cec58d2 100644 --- a/rtl/toplevel/top_generic.v +++ b/rtl/toplevel/top_generic.v @@ -64,8 +64,7 @@ module top_generic #( .o_GPO_A(GPIO_A), .o_GPO_B(GPIO_B), .o_GPO_C(GPIO_C), - .o_GPO_D(GPIO_D), - .o_test(test) + .o_GPO_D(GPIO_D) ); @@ -91,5 +90,5 @@ module top_generic #( assign LED = GPIO_B[7:0]; assign led_green = GPIO_C[0]; - assign led_red = test; + assign led_red = GPIO_C[1]; endmodule diff --git a/rtl/wb/jtag_wb_bridge.v b/rtl/wb/jtag_wb_bridge.v index a1c5b2c..bfb9e9e 100644 --- a/rtl/wb/jtag_wb_bridge.v +++ b/rtl/wb/jtag_wb_bridge.v @@ -34,8 +34,13 @@ module jtag_wb_bridge #( wire jtag_reset; wire jtag_sel; - // 48-bit DR (symmetrical command/response) - reg [47:0] jtag_shreg; + localparam integer JTAG_DR_W = 72; + + // 72-bit DR (symmetrical command/response) + // Command layout: [71:64] opcode, [63:32] addr, [31:0] data + // Response layout: [71:64] resp_seq, [63:56] status, [55:48] cmd_seq, + // [47:16] data, [15:8] flags, [7:0] last_op + reg [JTAG_DR_W-1:0] jtag_shreg; jtag_if #( .chain(chain) @@ -55,30 +60,30 @@ module jtag_wb_bridge #( wire jtag_async_reset = jtag_reset || i_rst; // =========================================================================== - // CDC request/response channel (48/48 symmetric) + // CDC request/response channel (72/72 symmetric) // Side A: JTAG/TCK domain // Side B: system/i_clk domain // =========================================================================== wire a_req_busy; wire a_req_accepted; wire a_resp_pulse; - wire [47:0] a_resp_data; + wire [JTAG_DR_W-1:0] a_resp_data; wire b_req_pulse; - wire [47:0] b_req_data; + wire [JTAG_DR_W-1:0] b_req_data; reg b_resp_pulse; - reg [47:0] b_resp_data; + reg [JTAG_DR_W-1:0] b_resp_data; wire b_resp_busy; wire b_resp_accepted; // Accept UPDATE as a request strobe (qualified by SEL and !busy) wire a_req_pulse = jtag_sel && jtag_update && !a_req_busy; - wire [47:0] a_req_data = jtag_shreg; + wire [JTAG_DR_W-1:0] a_req_data = jtag_shreg; cdc_req_resp #( - .REQ_W(48), - .RESP_W(48), + .REQ_W(JTAG_DR_W), + .RESP_W(JTAG_DR_W), .STABLE_SAMPLES(2) ) u_cdc ( .a_clk(jtag_tck), @@ -107,12 +112,12 @@ module jtag_wb_bridge #( // =========================================================================== // JTAG/TCK domain shift/capture // =========================================================================== - reg [47:0] resp_hold_tck; + reg [JTAG_DR_W-1:0] resp_hold_tck; always @(posedge jtag_tck or posedge jtag_async_reset) begin if (jtag_async_reset) begin - jtag_shreg <= 48'd0; - resp_hold_tck <= 48'd0; + jtag_shreg <= {JTAG_DR_W{1'b0}}; + resp_hold_tck <= {JTAG_DR_W{1'b0}}; end else begin // Latch new response word from CDC when it arrives (independent of CAPTURE) if (a_resp_pulse) begin @@ -124,7 +129,7 @@ module jtag_wb_bridge #( jtag_shreg <= resp_hold_tck; end else if (jtag_sel && jtag_shift) begin // Shift: MSB in, LSB out to TDO - jtag_shreg <= {jtag_tdi, jtag_shreg[47:1]}; + jtag_shreg <= {jtag_tdi, jtag_shreg[JTAG_DR_W-1:1]}; end end end @@ -138,6 +143,8 @@ module jtag_wb_bridge #( localparam [7:0] OP_RESET_OFF = 8'h11; localparam [7:0] OP_WRITE8 = 8'h20; localparam [7:0] OP_READ8 = 8'h21; + localparam [7:0] OP_WRITE32 = 8'h22; + localparam [7:0] OP_READ32 = 8'h23; localparam [7:0] OP_PING = 8'h30; localparam [7:0] OP_CLEAR_FLAGS = 8'h40; @@ -180,18 +187,18 @@ module jtag_wb_bridge #( reg act_valid; reg [7:0] act_opcode; reg [31:0] act_addr; - reg [7:0] act_data; + reg [31:0] act_data; reg [7:0] act_seq; reg q_valid; reg [7:0] q_opcode; reg [31:0] q_addr; - reg [7:0] q_data; + reg [31:0] q_data; reg [7:0] q_seq; // Response pending buffer (to avoid dropping if resp mailbox busy) reg resp_pending; - reg [47:0] resp_pending_word; + reg [JTAG_DR_W-1:0] resp_pending_word; // Lane selection wire [1:0] addr_lane = byte_aligned ? 2'b00 : act_addr[1:0]; @@ -225,11 +232,11 @@ module jtag_wb_bridge #( endfunction // Build response word - function [47:0] pack_resp( + function [JTAG_DR_W-1:0] pack_resp( input [7:0] resp_seq, input [7:0] status, input [7:0] cmd_seq, - input [7:0] data, + input [31:0] data, input [7:0] flags, input [7:0] last_op ); @@ -260,7 +267,7 @@ module jtag_wb_bridge #( task automatic enqueue_cmd( input [7:0] op, input [31:0] addr, - input [7:0] dat, + input [31:0] dat, input [7:0] seq ); begin @@ -281,7 +288,7 @@ module jtag_wb_bridge #( task automatic start_active_cmd( input [7:0] cmd_opcode, input [31:0] cmd_addr, - input [7:0] cmd_data, + input [31:0] cmd_data, input [7:0] cmd_seq ); reg [1:0] cmd_addr_lane; @@ -289,7 +296,7 @@ module jtag_wb_bridge #( cmd_addr_lane = byte_aligned ? 2'b00 : cmd_addr[1:0]; last_opcode_r <= cmd_opcode; - last_we_r <= (cmd_opcode == OP_WRITE8); + last_we_r <= (cmd_opcode == OP_WRITE8) || (cmd_opcode == OP_WRITE32); // If we're already mid-flight or holding a response, note it (diagnostic) if (wb_busy || resp_pending) @@ -333,7 +340,7 @@ module jtag_wb_bridge #( wb_we_r <= 1'b1; wb_adr_r <= cmd_addr; wb_sel_r <= sel_from_lane(cmd_addr_lane); - wb_dat_r <= dat_from_lane_byte(cmd_addr_lane, cmd_data); + wb_dat_r <= dat_from_lane_byte(cmd_addr_lane, cmd_data[7:0]); end OP_READ8: begin @@ -345,6 +352,24 @@ module jtag_wb_bridge #( wb_dat_r <= 32'b0; end + OP_WRITE32: begin + // launch WB write (full word) + wb_busy <= 1'b1; + wb_we_r <= 1'b1; + wb_adr_r <= cmd_addr; + wb_sel_r <= 4'b1111; + wb_dat_r <= cmd_data; + end + + OP_READ32: begin + // launch WB read (full word) + wb_busy <= 1'b1; + wb_we_r <= 1'b0; + wb_adr_r <= cmd_addr; + wb_sel_r <= 4'b1111; + wb_dat_r <= 32'b0; + end + default: begin flag_illegal <= 1'b1; resp_pending_word <= pack_resp(resp_seq_r, status_snapshot, cmd_seq, 8'h00, flags_sticky, cmd_opcode); @@ -381,20 +406,20 @@ module jtag_wb_bridge #( act_valid <= 1'b0; act_opcode <= 8'h00; act_addr <= 32'h0; - act_data <= 8'h00; + act_data <= 32'h0000_0000; act_seq <= 8'h00; q_valid <= 1'b0; q_opcode <= 8'h00; q_addr <= 32'h0; - q_data <= 8'h00; + q_data <= 32'h0000_0000; q_seq <= 8'h00; resp_pending <= 1'b0; - resp_pending_word<= 48'h0; + resp_pending_word<= {JTAG_DR_W{1'b0}}; b_resp_pulse <= 1'b0; - b_resp_data <= 48'h0; + b_resp_data <= {JTAG_DR_W{1'b0}}; end else begin b_resp_pulse <= 1'b0; @@ -412,15 +437,15 @@ module jtag_wb_bridge #( // If we can start immediately (no active, no wb, no pending response), do so. if (!act_valid && !wb_busy && !resp_pending) begin act_valid <= 1'b1; - act_opcode <= b_req_data[47:40]; - act_addr <= b_req_data[39:8]; - act_data <= b_req_data[7:0]; + act_opcode <= b_req_data[71:64]; + act_addr <= b_req_data[63:32]; + act_data <= b_req_data[31:0]; act_seq <= cmd_seq_r; // Start it right away - start_active_cmd(b_req_data[47:40], b_req_data[39:8], b_req_data[7:0], cmd_seq_r); + start_active_cmd(b_req_data[71:64], b_req_data[63:32], b_req_data[31:0], cmd_seq_r); end else begin // Otherwise enqueue one-deep - enqueue_cmd(b_req_data[47:40], b_req_data[39:8], b_req_data[7:0], cmd_seq_r); + enqueue_cmd(b_req_data[71:64], b_req_data[63:32], b_req_data[31:0], cmd_seq_r); end end @@ -431,27 +456,40 @@ module jtag_wb_bridge #( wb_busy <= 1'b0; wb_we_r <= 1'b0; - // Determine response byte - if (act_opcode == OP_READ8) begin - resp_pending_word <= pack_resp( - resp_seq_r, - status_snapshot, - act_seq, - byte_from_lane(addr_lane, i_wb_rdt), - flags_sticky, - act_opcode - ); - end else begin - // WRITE8: echo written byte (lightweight) - resp_pending_word <= pack_resp( - resp_seq_r, - status_snapshot, - act_seq, - act_data, - flags_sticky, - act_opcode - ); - end + // Determine response data + case (act_opcode) + OP_READ8: begin + resp_pending_word <= pack_resp( + resp_seq_r, + status_snapshot, + act_seq, + {24'b0, byte_from_lane(addr_lane, i_wb_rdt)}, + flags_sticky, + act_opcode + ); + end + OP_READ32: begin + resp_pending_word <= pack_resp( + resp_seq_r, + status_snapshot, + act_seq, + i_wb_rdt, + flags_sticky, + act_opcode + ); + end + default: begin + // WRITE8/WRITE32: echo written data + resp_pending_word <= pack_resp( + resp_seq_r, + status_snapshot, + act_seq, + act_data, + flags_sticky, + act_opcode + ); + end + endcase resp_pending <= 1'b1; end diff --git a/rtl/wb/wb_arbiter.v b/rtl/wb/wb_arbiter.v new file mode 100644 index 0000000..cf61abe --- /dev/null +++ b/rtl/wb/wb_arbiter.v @@ -0,0 +1,101 @@ +/* wb_arbiter. Part of wb_intercon + * + * ISC License + * + * Copyright (C) 2013-2019 Olof Kindgren + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + Wishbone arbiter, burst-compatible + Simple round-robin arbiter for multiple Wishbone masters + */ +`include "../util/clog2.vh" + +module wb_arbiter + #(parameter dw = 32, + parameter aw = 32, + parameter num_hosts = 0, + parameter num_masters = num_hosts) + ( + input wire wb_clk_i, + input wire wb_rst_i, + + // Wishbone Master Interface + input wire [num_masters*aw-1:0] wbm_adr_i, + input wire [num_masters*dw-1:0] wbm_dat_i, + input wire [num_masters*4-1:0] wbm_sel_i, + input wire [num_masters-1:0] wbm_we_i, + input wire [num_masters-1:0] wbm_cyc_i, + input wire [num_masters-1:0] wbm_stb_i, + input wire [num_masters*3-1:0] wbm_cti_i, + input wire [num_masters*2-1:0] wbm_bte_i, + output wire [num_masters*dw-1:0] wbm_dat_o, + output wire [num_masters-1:0] wbm_ack_o, + output wire [num_masters-1:0] wbm_err_o, + output wire [num_masters-1:0] wbm_rty_o, + + // Wishbone Slave interface + output wire [aw-1:0] wbs_adr_o, + output wire [dw-1:0] wbs_dat_o, + output wire [3:0] wbs_sel_o, + output wire wbs_we_o, + output wire wbs_cyc_o, + output wire wbs_stb_o, + output wire [2:0] wbs_cti_o, + output wire [1:0] wbs_bte_o, + input wire [dw-1:0] wbs_dat_i, + input wire wbs_ack_i, + input wire wbs_err_i, + input wire wbs_rty_i); + + +/////////////////////////////////////////////////////////////////////////////// +// Parameters +/////////////////////////////////////////////////////////////////////////////// + + //Use parameter instead of localparam to work around a bug in Xilinx ISE + parameter master_sel_bits = num_masters > 1 ? `CLOG2(num_masters) : 1; + + wire [num_masters-1:0] grant; + wire [master_sel_bits-1:0] master_sel; + wire active; + + arbiter + #(.NUM_PORTS (num_masters)) + arbiter0 + (.clk (wb_clk_i), + .rst (wb_rst_i), + .request (wbm_cyc_i), + .grant (grant), + .select (master_sel), + .active (active)); +/* verilator lint_off WIDTH */ + //Mux active master + assign wbs_adr_o = wbm_adr_i[master_sel*aw+:aw]; + assign wbs_dat_o = wbm_dat_i[master_sel*dw+:dw]; + assign wbs_sel_o = wbm_sel_i[master_sel*4+:4]; + assign wbs_we_o = wbm_we_i [master_sel]; + assign wbs_cyc_o = wbm_cyc_i[master_sel] & active; + assign wbs_stb_o = wbm_stb_i[master_sel]; + assign wbs_cti_o = wbm_cti_i[master_sel*3+:3]; + assign wbs_bte_o = wbm_bte_i[master_sel*2+:2]; + + assign wbm_dat_o = {num_masters{wbs_dat_i}}; + assign wbm_ack_o = ((wbs_ack_i & active) << master_sel); + assign wbm_err_o = ((wbs_err_i & active) << master_sel); + assign wbm_rty_o = ((wbs_rty_i & active) << master_sel); +/* verilator lint_on WIDTH */ + +endmodule // wb_arbiter diff --git a/scripts/hex_to_mif.py b/scripts/hex_to_mif.py index bed01bb..6f17037 100755 --- a/scripts/hex_to_mif.py +++ b/scripts/hex_to_mif.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 """Convert a simple .hex image to a plain .mif-style binary file. -Default output format matches build/mem_8kx8b.mif in this repo: +Default output format is 32-bit words: - one binary word per line - no header """ @@ -57,8 +57,8 @@ def main() -> None: parser.add_argument( "--word-bytes", type=int, - default=1, - help="Bytes per output word (default: 1)", + default=4, + help="Bytes per output word (default: 4)", ) parser.add_argument( "--little-endian", diff --git a/sw/blinky/Makefile b/sw/blinky/Makefile deleted file mode 100644 index 80c1417..0000000 --- a/sw/blinky/Makefile +++ /dev/null @@ -1,56 +0,0 @@ -TOOLCHAIN_PREFIX ?= riscv64-elf- - -CC := $(TOOLCHAIN_PREFIX)gcc -OBJCOPY := $(TOOLCHAIN_PREFIX)objcopy -OBJDUMP := $(TOOLCHAIN_PREFIX)objdump -SIZE := $(TOOLCHAIN_PREFIX)size - -TARGET := blinky -SRCS_C := blinky.c -SRCS_S := start.s -OBJS := $(SRCS_C:.c=.o) $(SRCS_S:.s=.o) - -ARCH_FLAGS := -march=rv32i_zicsr -mabi=ilp32 -CFLAGS := $(ARCH_FLAGS) -Os -ffreestanding -fno-builtin -Wall -Wextra -ASFLAGS := $(ARCH_FLAGS) -LDFLAGS := $(ARCH_FLAGS) -nostdlib -nostartfiles -Wl,-Bstatic,-Tlink.ld,--gc-sections,-Map,$(TARGET).map - -HEX_TO_COE := ../../scripts/hex_to_coe.py -HEX_TO_MIF := ../../scripts/hex_to_mif.py - -.PHONY: all clean disasm size - -all: $(TARGET).elf $(TARGET).bin $(TARGET).hex $(TARGET).coe $(TARGET).mif $(TARGET).elf.asm - -$(TARGET).elf: $(OBJS) link.ld - $(CC) $(LDFLAGS) -o $@ $(OBJS) - -%.o: %.c - $(CC) $(CFLAGS) -c -o $@ $< - -%.o: %.s - $(CC) $(ASFLAGS) -c -o $@ $< - -$(TARGET).bin: $(TARGET).elf - $(OBJCOPY) -O binary $< $@ - -$(TARGET).hex: $(TARGET).bin - hexdump -v -e '1/1 "%02x\n"' $< > $@ - -$(TARGET).coe: $(TARGET).hex - $(HEX_TO_COE) $< $@ - -$(TARGET).mif: $(TARGET).hex - $(HEX_TO_MIF) $< $@ - -$(TARGET).elf.asm: $(TARGET).elf - $(OBJDUMP) -d -S $< > $@ - -disasm: $(TARGET).elf.asm - -size: $(TARGET).elf - $(SIZE) $< - -clean: - rm -f $(TARGET).elf $(TARGET).bin $(TARGET).hex $(TARGET).coe $(TARGET).mif \ - $(TARGET).elf.asm $(TARGET).map $(OBJS) diff --git a/sw/blinky/blinky.c b/sw/blinky/blinky.c deleted file mode 100644 index 71a3ae5..0000000 --- a/sw/blinky/blinky.c +++ /dev/null @@ -1,27 +0,0 @@ -#include - -#define GPIO_BASE 0x40000000u -#define VOUT_BASE 0x40000004u - -static volatile uint32_t * const gpio = (volatile uint32_t *)GPIO_BASE; -static volatile uint32_t * const vout = (volatile uint32_t *)VOUT_BASE; - -static void delay(volatile uint32_t ticks){ - while (ticks--) { - __asm__ volatile ("nop"); - } -} - -int main(void) -{ - uint32_t v = 0; - - for (;;) { - for(int i=0; i<1000; i++){ - *vout = v; - v++; - delay(5u); - } - *gpio ^= 0xffffffff; - } -} diff --git a/sw/blinky/link.ld b/sw/blinky/link.ld deleted file mode 100644 index 483b1de..0000000 --- a/sw/blinky/link.ld +++ /dev/null @@ -1,33 +0,0 @@ -OUTPUT_ARCH("riscv") -ENTRY(_start) - -MEMORY -{ - RAM (rwx) : ORIGIN = 0x00000000, LENGTH = 8064 -} - -SECTIONS -{ - .text : - { - KEEP(*(.text.init)) - *(.text .text.*) - *(.rodata .rodata.*) - } > RAM - - .data : - { - *(.data .data.*) - } > RAM - - .bss (NOLOAD) : - { - __bss_start = .; - *(.bss .bss.*) - *(COMMON) - __bss_end = .; - } > RAM - - . = ALIGN(4); - __stack_top = ORIGIN(RAM) + LENGTH(RAM); -} diff --git a/sw/blinky/start.s b/sw/blinky/start.s deleted file mode 100644 index 837498e..0000000 --- a/sw/blinky/start.s +++ /dev/null @@ -1,23 +0,0 @@ -.section .text.init -.globl _start -.type _start, @function - -_start: - la sp, __stack_top - - # Zero .bss - la t0, __bss_start - la t1, __bss_end -1: - bgeu t0, t1, 2f - sw zero, 0(t0) - addi t0, t0, 4 - j 1b - -2: - call main - -3: - j 3b - -.size _start, .-_start diff --git a/sw/sweep/Makefile b/sw/sweep/Makefile index 71e5b85..a44b794 100644 --- a/sw/sweep/Makefile +++ b/sw/sweep/Makefile @@ -35,7 +35,7 @@ $(TARGET).bin: $(TARGET).elf $(OBJCOPY) -O binary $< $@ $(TARGET).hex: $(TARGET).bin - hexdump -v -e '1/1 "%02x\n"' $< > $@ + hexdump -v -e '1/4 "%08x\n"' $< > $@ $(TARGET).coe: $(TARGET).hex $(HEX_TO_COE) $< $@ diff --git a/sw/sweep/link.ld b/sw/sweep/link.ld index 3d2d455..b634e26 100644 --- a/sw/sweep/link.ld +++ b/sw/sweep/link.ld @@ -31,5 +31,5 @@ SECTIONS } > RAM . = ALIGN(4); - __stack_top = ORIGIN(RAM) + LENGTH(RAM) - 256; + __stack_top = ORIGIN(RAM) + LENGTH(RAM); } diff --git a/sw/sweep/sweep.c b/sw/sweep/sweep.c index 13b0546..27de2da 100644 --- a/sw/sweep/sweep.c +++ b/sw/sweep/sweep.c @@ -26,7 +26,7 @@ static inline void irq_init() { void timer_isr(){ static int set = 0; - *TIMER = 1840000*4; + *TIMER = 1840000*2; *LEDGR = ~(*LEDGR); } @@ -34,7 +34,7 @@ void main(){ irq_init(); *LEDGR = 3; - *TIMER = 1840000*4; + *TIMER = 1840000*2; for(;;){ for(int i=1000; i<10000; i++){ diff --git a/tools/test.cpp b/tools/test.cpp index 89bfb09..c83d1dd 100644 --- a/tools/test.cpp +++ b/tools/test.cpp @@ -10,45 +10,64 @@ static constexpr uint8_t OP_RESET_ON = 0x10; static constexpr uint8_t OP_RESET_OFF = 0x11; static constexpr uint8_t OP_WRITE8 = 0x20; static constexpr uint8_t OP_READ8 = 0x21; +static constexpr uint8_t OP_WRITE32 = 0x22; +static constexpr uint8_t OP_READ32 = 0x23; static constexpr uint8_t OP_PING = 0x30; static constexpr uint8_t OP_CLEAR_FLAGS = 0x40; -static void shift48(DigilentJtag &jtag, const uint8_t tx[6], uint8_t rx[6]) { - jtag.shiftData(tx, rx, 48); +static void shift72(DigilentJtag &jtag, const uint8_t tx[9], uint8_t rx[9]) { + jtag.shiftData(tx, rx, 72); } -static void make_cmd(uint8_t out[6], uint8_t opcode, uint32_t addr, uint8_t data) { - out[0] = data; - out[1] = (uint8_t)addr; - out[2] = (uint8_t)(addr >> 8); - out[3] = (uint8_t)(addr >> 16); - out[4] = (uint8_t)(addr >> 24); - out[5] = opcode; +static void make_cmd(uint8_t out[9], uint8_t opcode, uint32_t addr, uint32_t data) { + out[0] = (uint8_t)data; + out[1] = (uint8_t)(data >> 8); + out[2] = (uint8_t)(data >> 16); + out[3] = (uint8_t)(data >> 24); + out[4] = (uint8_t)addr; + out[5] = (uint8_t)(addr >> 8); + out[6] = (uint8_t)(addr >> 16); + out[7] = (uint8_t)(addr >> 24); + out[8] = opcode; } -static uint8_t do_cmd(DigilentJtag& jtag, uint8_t opcode, uint32_t addr, uint8_t data){ - uint8_t tx[6], rx[6]; +static uint32_t get_data32(const uint8_t rx[9]) { + return ((uint32_t)rx[2]) | + ((uint32_t)rx[3] << 8) | + ((uint32_t)rx[4] << 16) | + ((uint32_t)rx[5] << 24); +} + +static uint32_t do_cmd32(DigilentJtag& jtag, uint8_t opcode, uint32_t addr, uint32_t data){ + uint8_t tx[9], rx[9]; make_cmd(tx, opcode, addr, data); - shift48(jtag, tx, rx); + shift72(jtag, tx, rx); for(int i=0; i<32; i++){ make_cmd(tx, OP_NOP, 0, 0); - shift48(jtag, tx, rx); + shift72(jtag, tx, rx); if(rx[0] == opcode){ - return rx[2]; + return get_data32(rx); } } printf("Could not do command\r\n"); return 0; } -struct Resp48 { - uint8_t last_op; - uint8_t flags; - uint8_t data; - uint8_t cmd_seq; - uint8_t status; - uint8_t resp_seq; -}; +static inline void write8(DigilentJtag& jtag, uint32_t addr, uint8_t value) { + (void)do_cmd32(jtag, OP_WRITE8, addr, value); +} + +static inline uint8_t read8(DigilentJtag& jtag, uint32_t addr) { + return (uint8_t)do_cmd32(jtag, OP_READ8, addr, 0); +} + +static inline void write32(DigilentJtag& jtag, uint32_t addr, uint32_t value) { + (void)do_cmd32(jtag, OP_WRITE32, addr, value); +} + +static inline uint32_t read32(DigilentJtag& jtag, uint32_t addr) { + return do_cmd32(jtag, OP_READ32, addr, 0); +} int main(int argc, char** argv){ ArgParser parser(argc > 0 ? argv[0] : "test"); @@ -74,9 +93,9 @@ int main(int argc, char** argv){ jtag.setChain(1); - do_cmd(jtag, OP_CLEAR_FLAGS, 0, 0); + do_cmd32(jtag, OP_CLEAR_FLAGS, 0, 0); // Check for ping - if(do_cmd(jtag, OP_PING, 0, 0) != 0xa5){ + if((do_cmd32(jtag, OP_PING, 0, 0) & 0xffu) != 0xa5u){ printf("PING response was not right\r\n"); jtag.close(); return -1; @@ -90,32 +109,32 @@ int main(int argc, char** argv){ return -1; } - do_cmd(jtag, OP_RESET_ON, 0, 0); + do_cmd32(jtag, OP_RESET_ON, 0, 0); int nr = 0; int addr = 0; do{ - uint8_t buf[64]; - nr = fread(buf, 1, 64, f); + uint32_t buf[32]; + nr = fread(buf, sizeof(uint32_t), 32, f); for(int i=0; i 0); - do_cmd(jtag, OP_RESET_OFF, 0, 0); + do_cmd32(jtag, OP_RESET_OFF, 0, 0); fclose(f); jtag.close();