diff --git a/library/axi_dmac/Makefile b/library/axi_dmac/Makefile index c8e5969e77..61d74ffe12 100644 --- a/library/axi_dmac/Makefile +++ b/library/axi_dmac/Makefile @@ -49,6 +49,7 @@ XILINX_LIB_DEPS += util_cdc ALTERA_DEPS += ../util_axis_fifo/util_axis_fifo.v ALTERA_DEPS += ../util_axis_fifo/address_sync.v ALTERA_DEPS += ../util_cdc/sync_bits.v +ALTERA_DEPS += ../util_cdc/sync_event.v ALTERA_DEPS += axi_dmac_constr.sdc ALTERA_DEPS += axi_dmac_hw.tcl diff --git a/library/axi_dmac/axi_dmac_constr.ttcl b/library/axi_dmac/axi_dmac_constr.ttcl index 2eebd060df..f947fe1879 100644 --- a/library/axi_dmac/axi_dmac_constr.ttcl +++ b/library/axi_dmac/axi_dmac_constr.ttcl @@ -60,6 +60,34 @@ set_max_delay -quiet -datapath_only \ -to $src_clk \ [get_property -min PERIOD $src_clk] +set_max_delay -quiet -datapath_only \ + -from $src_clk \ + -to [get_cells -quiet -hier *cdc_sync_stage1_reg* \ + -filter {NAME =~ *i_rewind_req_fifo/i_waddr_sync* && IS_SEQUENTIAL}] \ + [get_property -min PERIOD $src_clk] + +set_max_delay -quiet -datapath_only \ + -from $dest_clk \ + -to [get_cells -quiet -hier *cdc_sync_stage1_reg* \ + -filter {NAME =~ *i_rewind_req_fifo/i_raddr_sync* && IS_SEQUENTIAL}] \ + [get_property -min PERIOD $dest_clk] + +set_max_delay -quiet -datapath_only \ + -from [get_cells -quiet -hier *cdc_sync_fifo_ram_reg* \ + -filter {NAME =~ *i_rewind_req_fifo* && IS_SEQUENTIAL}] \ + -to $dest_clk \ + [get_property -min PERIOD $dest_clk] + +set_false_path -quiet \ + -from $req_clk \ + -to [get_cells -quiet -hier *cdc_sync_stage1_reg* \ + -filter {NAME =~ *sync_rewind/i_sync_out* && IS_SEQUENTIAL}] + +set_false_path -quiet \ + -from $src_clk \ + -to [get_cells -quiet -hier *cdc_sync_stage1_reg* \ + -filter {NAME =~ *sync_rewind/i_sync_in* && IS_SEQUENTIAL}] + <: } :> <: if {$async_dest_req} { :> set_max_delay -quiet -datapath_only \ diff --git a/library/axi_dmac/axi_dmac_hw.tcl b/library/axi_dmac/axi_dmac_hw.tcl index d6aed7bbeb..933a850d8c 100644 --- a/library/axi_dmac/axi_dmac_hw.tcl +++ b/library/axi_dmac/axi_dmac_hw.tcl @@ -16,6 +16,7 @@ set_module_property VALIDATION_CALLBACK axi_dmac_validate ad_ip_files axi_dmac [list \ $ad_hdl_dir/library/util_cdc/sync_bits.v \ + $ad_hdl_dir/library/util_cdc/sync_event.v \ $ad_hdl_dir/library/common/up_axi.v \ $ad_hdl_dir/library/util_axis_fifo/util_axis_fifo.v \ $ad_hdl_dir/library/util_axis_fifo/address_sync.v \ diff --git a/library/axi_dmac/axi_dmac_response_manager.v b/library/axi_dmac/axi_dmac_response_manager.v index 1550de812c..b71b6590a2 100644 --- a/library/axi_dmac/axi_dmac_response_manager.v +++ b/library/axi_dmac/axi_dmac_response_manager.v @@ -60,17 +60,22 @@ module axi_dmac_response_manager #( output reg [BYTES_PER_BURST_WIDTH-1:0] measured_burst_length = 'h0, output response_partial, output reg response_valid = 1'b0, - input response_ready + input response_ready, // Interface to requester side + input completion_req_valid, + input completion_req_last, + input [1:0] completion_transfer_id ); -localparam STATE_IDLE = 2'h0; -localparam STATE_ACC = 2'h1; -localparam STATE_WRITE_RESPR = 2'h2; +localparam STATE_IDLE = 3'h0; +localparam STATE_ACC = 3'h1; +localparam STATE_WRITE_RESPR = 3'h2; +localparam STATE_ZERO_COMPL = 3'h3; +localparam STATE_WRITE_ZRCMPL = 3'h4; -reg [1:0] state = STATE_IDLE; -reg [1:0] nx_state; +reg [2:0] state = STATE_IDLE; +reg [2:0] nx_state; localparam DEST_SRC_RATIO = DMA_DATA_WIDTH_DEST/DMA_DATA_WIDTH_SRC; @@ -86,6 +91,7 @@ localparam BYTES_PER_BEAT_WIDTH = DEST_SRC_RATIO_WIDTH + BYTES_PER_BEAT_WIDTH_SR localparam BURST_LEN_WIDTH = BYTES_PER_BURST_WIDTH - BYTES_PER_BEAT_WIDTH; wire do_acc_st; +wire do_compl; reg req_eot = 1'b0; reg req_response_partial = 1'b0; reg [BYTES_PER_BURST_WIDTH-1:0] req_response_dest_data_burst_length = 'h0; @@ -99,6 +105,10 @@ wire [BYTES_PER_BURST_WIDTH-1:0] response_dest_data_burst_length; wire [BURST_LEN_WIDTH-1:0] burst_lenght; reg [BURST_LEN_WIDTH-1:0] burst_pointer_end; +reg [1:0] to_complete_count = 'h0; +reg [1:0] transfer_id = 'h0; +reg completion_req_last_found = 1'b0; + util_axis_fifo #( .DATA_WIDTH(BYTES_PER_BURST_WIDTH+1+1), .ADDRESS_WIDTH(0), @@ -150,7 +160,7 @@ begin if (req_resetn == 1'b0) begin response_valid <= 1'b0; end else begin - if (nx_state == STATE_WRITE_RESPR) begin + if (nx_state == STATE_WRITE_RESPR || nx_state == STATE_WRITE_ZRCMPL) begin response_valid <= 1'b1; end else if (response_ready == 1'b1) begin response_valid <= 1'b0; @@ -188,6 +198,9 @@ always @(*) begin STATE_IDLE: begin if (response_dest_valid == 1'b1) begin nx_state = STATE_ACC; + end else if (|to_complete_count) begin + if (transfer_id == completion_transfer_id) + nx_state = STATE_ZERO_COMPL; end end STATE_ACC: begin @@ -198,6 +211,20 @@ always @(*) begin nx_state = STATE_IDLE; end end + STATE_ZERO_COMPL: begin + if (|to_complete_count) begin + nx_state = STATE_WRITE_ZRCMPL; + end else begin + if (completion_req_last_found == 1'b1) begin + nx_state = STATE_IDLE; + end + end + end + STATE_WRITE_ZRCMPL:begin + if (response_ready == 1'b1) begin + nx_state = STATE_ZERO_COMPL; + end + end default: begin nx_state = STATE_IDLE; end @@ -212,4 +239,39 @@ always @(posedge req_clk) begin end end +assign do_compl = (state == STATE_WRITE_ZRCMPL) && response_ready; + +// Once the last completion request from request generator is received +// we can wait for completions from the destination side +always @(posedge req_clk) begin + if (req_resetn == 1'b0) begin + completion_req_last_found <= 1'b0; + end else if (completion_req_valid) begin + completion_req_last_found <= completion_req_last; + end else if (state ==STATE_ZERO_COMPL && ~(|to_complete_count)) begin + completion_req_last_found <= 1'b0; + end +end + +// Track transfers so we can tell when did the destination completed all its +// transfers +always @(posedge req_clk) begin + if (req_resetn == 1'b0) begin + transfer_id <= 'h0; + end else if ((state == STATE_ACC && req_eot) || do_compl) begin + transfer_id <= transfer_id + 1; + end +end + +// Count how many transfers we need to complete +always @(posedge req_clk) begin + if (req_resetn == 1'b0) begin + to_complete_count <= 'h0; + end else if (completion_req_valid & ~do_compl) begin + to_complete_count <= to_complete_count + 1; + end else if (~completion_req_valid & do_compl) begin + to_complete_count <= to_complete_count - 1; + end +end + endmodule diff --git a/library/axi_dmac/data_mover.v b/library/axi_dmac/data_mover.v index fcd61d4c67..077d9f6b8f 100644 --- a/library/axi_dmac/data_mover.v +++ b/library/axi_dmac/data_mover.v @@ -47,10 +47,16 @@ module dmac_data_mover #( output [ID_WIDTH-1:0] response_id, input eot, + output rewind_req_valid, + input rewind_req_ready, + output [ID_WIDTH+3-1:0] rewind_req_data, + output reg bl_valid = 'b0, input bl_ready, output reg [BEATS_PER_BURST_WIDTH-1:0] measured_last_burst_length, + output block_descr_to_dst, + output [ID_WIDTH-1:0] source_id, output source_eot, @@ -97,36 +103,46 @@ wire transfer_abort_s; wire last_load; wire last; +wire early_tlast; assign xfer_req = active; assign response_id = id; assign source_id = id; -assign source_eot = eot; +assign source_eot = eot || early_tlast; assign last = eot ? last_eot : last_non_eot; assign s_axi_ready = (pending_burst & active) & ~transfer_abort_s; -assign m_axi_valid = (s_axi_sync_valid | transfer_abort_s) & pending_burst & active; -assign m_axi_data = transfer_abort_s == 1'b1 ? {DATA_WIDTH{1'b0}} : s_axi_data; -assign m_axi_last = last; +assign m_axi_valid = s_axi_sync_valid & s_axi_ready; +assign m_axi_data = s_axi_data; +assign m_axi_last = last || early_tlast; +assign m_axi_partial_burst = early_tlast; + +assign block_descr_to_dst = transfer_abort_s; generate if (ALLOW_ABORT == 1) begin + wire programmed_last; + reg transfer_abort = 1'b0; reg req_xlast_d = 1'b0; + reg [1:0] transfer_id = 2'b0; + assign programmed_last = (last == 1'b1 && eot == 1'b1 && req_xlast_d == 1'b1); /* * A 'last' on the external interface indicates the end of an packet. If such a * 'last' indicator is observed before the end of the current transfer stop - * accepting data on the external interface and complete the current transfer by - * writing zeros to the buffer. + * accepting data on the external interface until a new descriptor is + * received that is the first segment of a transfer. */ always @(posedge clk) begin if (resetn == 1'b0) begin transfer_abort <= 1'b0; + end else if (req_valid == 1'b1 && req_ready == 1'b1 && req_xlast_d == 1'b1) begin + transfer_abort <= 1'b0; end else if (m_axi_valid == 1'b1) begin - if (last == 1'b1 && eot == 1'b1 && req_xlast_d == 1'b1) begin + if (programmed_last == 1'b1) begin transfer_abort <= 1'b0; end else if (s_axi_last == 1'b1) begin transfer_abort <= 1'b1; @@ -135,18 +151,33 @@ generate if (ALLOW_ABORT == 1) begin end always @(posedge clk) begin - if (req_ready == 1'b1) begin + if (req_ready == 1'b1 && req_valid == 1'b1) begin req_xlast_d <= req_xlast; end end assign transfer_abort_s = transfer_abort; - assign m_axi_partial_burst = (transfer_abort == 1'b0) && (s_axi_last == 1'b1) && - !(last == 1'b1 && eot == 1'b1 && req_xlast_d == 1'b1); + assign early_tlast = (s_axi_ready == 1'b1) && (m_axi_valid == 1'b1) && + (s_axi_last == 1'b1) && (programmed_last == 1'b0); + + assign rewind_req_valid = early_tlast; + assign rewind_req_data = {transfer_id,req_xlast_d,id_next}; + + // The width of the id must fit the number of transfers that can be in flight + // in the burst memory + always @(posedge clk) begin + if (resetn == 1'b0) begin + transfer_id <= 2'b0; + end else if (req_valid == 1'b1 && req_ready == 1'b1) begin + transfer_id <= transfer_id + 1'b1; + end + end end else begin assign transfer_abort_s = 1'b0; - assign m_axi_partial_burst = 1'b0; + assign early_tlast = 1'b0; + assign rewind_req_valid = 1'b0; + assign rewind_req_data = 'h0; end endgenerate /* @@ -164,7 +195,7 @@ end // If we want to support zero delay between transfers we have to assert // req_ready on the same cycle on which the last load happens. assign last_load = m_axi_valid && last_eot && eot; -assign req_ready = last_load || ~active; +assign req_ready = last_load || ~active || (transfer_abort_s & rewind_req_ready); always @(posedge clk) begin if (req_ready) begin @@ -192,7 +223,7 @@ always @(posedge clk) begin end always @(posedge clk) begin - if (last_load) begin + if (last_load || early_tlast) begin bl_valid <= 1'b1; measured_last_burst_length <= beat_counter_minus_one; end else if (bl_ready) begin @@ -212,7 +243,7 @@ end always @(*) begin - if (m_axi_valid == 1'b1 && last == 1'b1) + if (m_axi_valid == 1'b1 && (last == 1'b1 || early_tlast == 1'b1)) id_next <= inc_id(id); else id_next <= id; diff --git a/library/axi_dmac/request_arb.v b/library/axi_dmac/request_arb.v index acc8e0f758..fd4538dbd3 100644 --- a/library/axi_dmac/request_arb.v +++ b/library/axi_dmac/request_arb.v @@ -246,6 +246,9 @@ wire [BEATS_PER_BURST_WIDTH_SRC-1:0] src_req_last_burst_length; wire src_req_sync_transfer_start; wire src_req_xlast; +reg [DMA_ADDRESS_WIDTH_DEST-1:0] src_req_dest_address_cur = 'h0; +reg src_req_xlast_cur = 1'b0; + /* TODO wire src_response_valid; wire src_response_ready; @@ -262,6 +265,7 @@ wire src_valid; wire [DMA_DATA_WIDTH_SRC-1:0] src_data; wire src_last; wire src_partial_burst; +wire block_descr_to_dst; wire src_fifo_valid; wire [DMA_DATA_WIDTH_SRC-1:0] src_fifo_data; wire src_fifo_last; @@ -276,6 +280,25 @@ wire dest_burst_info_partial; wire [ID_WIDTH-1:0] dest_burst_info_id; wire dest_burst_info_write; +reg src_dest_valid_hs = 1'b0; +wire src_dest_valid_hs_masked; +wire src_dest_ready_hs; + +wire req_rewind_req_valid; +wire [ID_WIDTH+3-1:0] req_rewind_req_data; + +wire completion_req_valid; +wire completion_req_last; +wire [1:0] completion_transfer_id; + +wire rewind_req_valid; +wire rewind_req_ready; +wire [ID_WIDTH+3-1:0] rewind_req_data; + +reg src_throttler_enabled = 1'b1; +wire src_throttler_enable; +wire rewind_state; + /* Unused for now wire response_src_valid; wire response_src_ready = 1'b1; @@ -584,8 +607,6 @@ wire src_address_eot = eot_mem_src[src_address_id]; assign dbg_src_address_id = src_address_id; assign dbg_src_data_id = src_data_id; -assign src_partial_burst = 1'b0; - dmac_src_mm_axi #( .ID_WIDTH(ID_WIDTH), .DMA_DATA_WIDTH(DMA_DATA_WIDTH_SRC), @@ -670,6 +691,7 @@ assign src_response_valid = 1'b0; assign src_response_resp = 2'b0; */ + dmac_src_axi_stream #( .ID_WIDTH(ID_WIDTH), .S_AXIS_DATA_WIDTH(DMA_DATA_WIDTH_SRC), @@ -692,10 +714,16 @@ dmac_src_axi_stream #( .eot(src_eot), + .rewind_req_valid(rewind_req_valid), + .rewind_req_ready(rewind_req_ready), + .rewind_req_data(rewind_req_data), + .bl_valid(src_bl_valid), .bl_ready(src_bl_ready), .measured_last_burst_length(src_burst_length), + .block_descr_to_dst(block_descr_to_dst), + .source_id(source_id), .source_eot(source_eot), @@ -712,10 +740,39 @@ dmac_src_axi_stream #( .s_axis_xfer_req(s_axis_xfer_req) ); +util_axis_fifo #( + .DATA_WIDTH(ID_WIDTH + 3), + .ADDRESS_WIDTH(0), + .ASYNC_CLK(ASYNC_CLK_REQ_SRC) +) i_rewind_req_fifo ( + .s_axis_aclk(src_clk), + .s_axis_aresetn(src_resetn), + .s_axis_valid(rewind_req_valid), + .s_axis_ready(rewind_req_ready), + .s_axis_empty(), + .s_axis_data(rewind_req_data), + .s_axis_room(), + + .m_axis_aclk(req_clk), + .m_axis_aresetn(req_resetn), + .m_axis_valid(req_rewind_req_valid), + .m_axis_ready(1'b1), + .m_axis_data(req_rewind_req_data), + .m_axis_level() +); + end else begin assign s_axis_ready = 1'b0; assign s_axis_xfer_req = 1'b0; +assign rewind_req_valid = 1'b0; +assign rewind_req_data = 'h0; + +assign req_rewind_req_valid = 'b0; +assign req_rewind_req_data = 'h0; + +assign src_partial_burst = 1'b0; +assign block_descr_to_dst = 1'b0; end @@ -736,7 +793,6 @@ assign dbg_src_data_id = 'h00; assign src_response_valid = 1'b0; assign src_response_resp = 2'b0; */ -assign src_partial_burst = 1'b0; dmac_src_fifo_inf #( .ID_WIDTH(ID_WIDTH), @@ -811,15 +867,37 @@ function compare_id; end endfunction +sync_event #(.ASYNC_CLK(ASYNC_CLK_REQ_SRC)) sync_rewind ( + .in_clk(req_clk), + .in_event(rewind_state), + .out_clk(src_clk), + .out_event(src_throttler_enable) +); + +always @(posedge src_clk) begin + if (src_resetn == 1'b0) begin + src_throttler_enabled <= 'b1; + end else if (rewind_req_valid) begin + src_throttler_enabled <= 'b0; + end else if (src_throttler_enable) begin + src_throttler_enabled <= 'b1; + end +end + /* * Make sure that we do not request more data than what fits into the * store-and-forward burst memory. + * Throttler must be blocked during rewind since it does not tolerate + * a decrement of the request ID. */ always @(posedge src_clk) begin if (src_resetn == 1'b0) begin src_throttled_request_id <= 'h00; + end else if (rewind_req_valid) begin + src_throttled_request_id <= rewind_req_data[ID_WIDTH-1:0]; end else if (src_throttled_request_id != src_request_id && - compare_id(src_throttled_request_id, src_data_request_id)) begin + compare_id(src_throttled_request_id, src_data_request_id) && + src_throttler_enabled) begin src_throttled_request_id <= inc_id(src_throttled_request_id); end end @@ -909,22 +987,12 @@ axi_register_slice #( }) ); -splitter #( - .NUM_M(2) -) i_req_splitter ( - .clk(req_clk), - .resetn(req_resetn), - .s_valid(req_valid), - .s_ready(req_ready), - .m_valid({ - req_gen_valid, - req_src_valid - }), - .m_ready({ - req_gen_ready, - req_src_ready - }) -); +// Don't let the request generator run in advance more than one descriptor +// The descriptor FIFO should not block the start of the request generator +// since it becomes ready earlier. +assign req_gen_valid = req_valid & req_ready; +assign req_src_valid = req_valid & req_ready; +assign req_ready = req_gen_ready & req_src_ready; util_axis_fifo #( .DATA_WIDTH(DMA_ADDRESS_WIDTH_DEST + 1), @@ -933,12 +1001,12 @@ util_axis_fifo #( ) i_dest_req_fifo ( .s_axis_aclk(src_clk), .s_axis_aresetn(src_resetn), - .s_axis_valid(src_dest_valid), - .s_axis_ready(src_dest_ready), + .s_axis_valid(src_dest_valid_hs_masked), + .s_axis_ready(src_dest_ready_hs), .s_axis_empty(), .s_axis_data({ - src_req_dest_address, - src_req_xlast + src_req_dest_address_cur, + src_req_xlast_cur }), .s_axis_room(), @@ -986,22 +1054,31 @@ util_axis_fifo #( .m_axis_level() ); -splitter #( - .NUM_M(2) -) i_src_splitter ( - .clk(src_clk), - .resetn(src_resetn), - .s_valid(src_req_spltr_valid), - .s_ready(src_req_spltr_ready), - .m_valid({ - src_req_valid, - src_dest_valid - }), - .m_ready({ - src_req_ready, - src_dest_ready - }) -); +// Save the descriptor in the source clock domain since the submission to +// destination is delayed. +always @(posedge src_clk) begin + if (src_req_valid == 1'b1 && src_req_ready == 1'b1) begin + src_req_dest_address_cur <= src_req_dest_address; + src_req_xlast_cur <= src_req_xlast; + end +end + +always @(posedge src_clk) begin + if (src_resetn == 1'b0) begin + src_dest_valid_hs <= 1'b0; + end else if (src_req_valid == 1'b1 && src_req_ready == 1'b1) begin + src_dest_valid_hs <= 1'b1; + end else if (src_dest_ready_hs == 1'b1) begin + src_dest_valid_hs <= 1'b0; + end +end + +// Forward the descriptor to the destination only after the source decided to +// do so +assign src_dest_valid_hs_masked = src_dest_valid_hs == 1'b1 && block_descr_to_dst == 1'b0; +assign src_req_spltr_ready = src_req_ready && src_dest_ready_hs; +assign src_req_valid = src_req_spltr_valid && src_req_spltr_ready; + /* Unused for now util_axis_fifo #( @@ -1035,9 +1112,18 @@ dmac_request_generator #( .request_id(request_id), .response_id(response_id), + .rewind_req_valid(req_rewind_req_valid), + .rewind_req_data(req_rewind_req_data), + .rewind_state(rewind_state), + + .completion_req_valid(completion_req_valid), + .completion_req_last(completion_req_last), + .completion_transfer_id(completion_transfer_id), + .req_valid(req_gen_valid), .req_ready(req_gen_ready), .req_burst_count(req_length[DMA_LENGTH_WIDTH-1:BYTES_PER_BURST_WIDTH]), + .req_xlast(req_xlast), .enable(req_enable), @@ -1067,7 +1153,12 @@ axi_dmac_response_manager #( .measured_burst_length(measured_burst_length), .response_partial(response_partial), .response_valid(response_valid), - .response_ready(response_ready) + .response_ready(response_ready), + + .completion_req_valid(completion_req_valid), + .completion_req_last(completion_req_last), + .completion_transfer_id(completion_transfer_id) + ); diff --git a/library/axi_dmac/request_generator.v b/library/axi_dmac/request_generator.v index 47956d9016..2f459789d8 100644 --- a/library/axi_dmac/request_generator.v +++ b/library/axi_dmac/request_generator.v @@ -44,9 +44,20 @@ module dmac_request_generator #( output [ID_WIDTH-1:0] request_id, input [ID_WIDTH-1:0] response_id, + input rewind_req_valid, + input [ID_WIDTH+3-1:0] rewind_req_data, + output rewind_state, + + output abort_req, + + output reg completion_req_valid = 1'b0, + output completion_req_last, + output [1:0] completion_transfer_id, + input req_valid, output reg req_ready, input [BURSTS_PER_TRANSFER_WIDTH-1:0] req_burst_count, + input req_xlast, input enable, @@ -55,14 +66,25 @@ module dmac_request_generator #( `include "inc_id.vh" -localparam STATE_IDLE = 3'h0; -localparam STATE_GEN_ID = 3'h1; -localparam STATE_REWIND_ID = 3'h2; -localparam STATE_CONSUME = 3'h3; -localparam STATE_WAIT_LAST = 3'h4; +localparam STATE_IDLE = 3'h0; +localparam STATE_GEN_ID = 3'h1; +localparam STATE_REWIND_ID = 3'h2; +localparam STATE_CONSUME = 3'h3; +localparam STATE_WAIT_LAST = 3'h4; reg [2:0] state = STATE_IDLE; reg [2:0] nx_state; + +reg [1:0] rew_transfer_id = 1'b0; +reg rew_req_xlast; +reg [ID_WIDTH-1:0] rew_id = 'h0; + +reg cur_transfer_id = 1'b0; +reg cur_req_xlast; + +wire transfer_id_match; +reg nx_completion_req_valid; + /* * Here we only need to count the number of bursts, which means we can ignore * the lower bits of the byte count. The last last burst may not contain the @@ -71,6 +93,7 @@ reg [2:0] nx_state; */ reg [BURSTS_PER_TRANSFER_WIDTH-1:0] burst_count = 'h00; +reg [BURSTS_PER_TRANSFER_WIDTH-1:0] cur_burst_length = 'h00; reg [ID_WIDTH-1:0] id; wire [ID_WIDTH-1:0] id_next = inc_id(id); wire incr_en; @@ -85,14 +108,24 @@ assign incr_id = (state == STATE_GEN_ID) && (incr_en == 1'b1); always @(posedge clk) begin if (state == STATE_IDLE) begin burst_count <= req_burst_count; + end else if (state == STATE_REWIND_ID) begin + burst_count <= cur_burst_length; end else if (incr_id == 1'b1) begin burst_count <= burst_count - 1'b1; end end +always @(posedge clk) begin + if (req_ready == 1'b1 & req_valid == 1'b1) begin + cur_req_xlast <= req_xlast; + cur_burst_length <= req_burst_count; + end +end always @(posedge clk) begin if (resetn == 1'b0) begin id <= 'h0; + end else if (state == STATE_REWIND_ID) begin + id <= rew_id; end else if (incr_id == 1'b1) begin id <= id_next; end @@ -102,23 +135,87 @@ always @(posedge clk) begin if (resetn == 1'b0) begin req_ready <= 1'b0; end else begin - req_ready <= (nx_state == STATE_IDLE); + req_ready <= (nx_state == STATE_IDLE || nx_state == STATE_CONSUME); end end +assign transfer_id_match = cur_transfer_id == rew_transfer_id[0]; + +always @(posedge clk) begin + if (resetn == 1'b0) begin + cur_transfer_id <= 1'b0; + end else if (req_valid == 1'b1 && req_ready == 1'b1) begin + cur_transfer_id <= ~cur_transfer_id; + end +end + +/* + * Once rewind request is received we need to stop incrementing the burst ID. + * + * If the current segment matches the segment that was interrupted and + * if it was a last segment we ignore consecutive segments until the last + * segment is received, in other case we can jump to the next segment. + * + * If the current segment is newer than the one got interrupted and the + * interrupted one was a last segment we need to replay the current + * segment with the adjusted burst ID. If the interrupted segment was not last + * we need to consume/ignore all segments until a last segment is received. + * + * Completion requests are generated for every segment that is + * consumed/ignored. These are handled by the response_manager once the + * interrupted segment got transferred to the destination. + */ always @(*) begin nx_state = state; + nx_completion_req_valid = 0; case (state) STATE_IDLE: begin - if (req_valid == 1'b1) begin + if (rewind_req_valid == 1'b1) begin + nx_state = STATE_REWIND_ID; + end else if (req_valid == 1'b1) begin nx_state = STATE_GEN_ID; end end STATE_GEN_ID: begin - if (eot == 1'b1 && incr_en == 1'b1) begin + if (rewind_req_valid == 1'b1) begin + nx_state = STATE_REWIND_ID; + end else if (eot == 1'b1 && incr_en == 1'b1) begin + nx_state = STATE_IDLE; + end + end + STATE_REWIND_ID: begin + if (transfer_id_match) begin + if (rew_req_xlast) begin + nx_state = STATE_IDLE; + end else begin + nx_state = STATE_CONSUME; + end + end else begin + if (rew_req_xlast) begin + nx_state = STATE_GEN_ID; + end else if (cur_req_xlast) begin + nx_state = STATE_IDLE; + nx_completion_req_valid = 1; + end else begin + nx_state = STATE_CONSUME; + nx_completion_req_valid = 1; + end + end + end + STATE_CONSUME: begin + if (req_valid) begin + nx_completion_req_valid = 1; + nx_state = STATE_WAIT_LAST; + end + end + STATE_WAIT_LAST:begin + if (cur_req_xlast) begin nx_state = STATE_IDLE; + end else begin + nx_state = STATE_CONSUME; end end + default: begin nx_state = STATE_IDLE; end @@ -133,4 +230,22 @@ always @(posedge clk) begin end end +always @(posedge clk) begin + if (rewind_req_valid == 1'b1) begin + {rew_transfer_id, rew_req_xlast, rew_id} <= rewind_req_data; + end +end + +always @(posedge clk) begin + if (resetn == 1'b0) begin + completion_req_valid <= 1'b0; + end else begin + completion_req_valid <= nx_completion_req_valid; + end +end +assign completion_req_last = cur_req_xlast; +assign completion_transfer_id = rew_transfer_id; + +assign rewind_state = (state == STATE_REWIND_ID); + endmodule diff --git a/library/axi_dmac/src_axi_stream.v b/library/axi_dmac/src_axi_stream.v index 8647576634..33173aab2c 100644 --- a/library/axi_dmac/src_axi_stream.v +++ b/library/axi_dmac/src_axi_stream.v @@ -50,10 +50,16 @@ module dmac_src_axi_stream #( output [ID_WIDTH-1:0] response_id, input eot, + output rewind_req_valid, + input rewind_req_ready, + output [ID_WIDTH+3-1:0] rewind_req_data, + output bl_valid, input bl_ready, output [BEATS_PER_BURST_WIDTH-1:0] measured_last_burst_length, + output block_descr_to_dst, + output [ID_WIDTH-1:0] source_id, output source_eot, @@ -93,10 +99,16 @@ dmac_data_mover # ( .response_id(response_id), .eot(eot), + .rewind_req_valid(rewind_req_valid), + .rewind_req_ready(rewind_req_ready), + .rewind_req_data(rewind_req_data), + .bl_valid(bl_valid), .bl_ready(bl_ready), .measured_last_burst_length(measured_last_burst_length), + .block_descr_to_dst(block_descr_to_dst), + .source_id(source_id), .source_eot(source_eot),