`timescale 1ns / 1ps module cpu(clk, reset); input clk; input reset; // --- memory instances --- wire[15:0] IM_addr; wire[15:0] IM_data; inst_mem IM(.clka(clk), .addra(IM_addr), .douta(IM_data) ); wire[15:0] DM_addr; wire[15:0] DM_data; wire[15:0] DM_wdata; wire DM_write; data_mem DM(.clka(clk), .addra(DM_addr), .douta(DM_data), .dina(DM_wdata), .wea(DM_write) ); // register file wire[3:0] RF_A_addr, RF_B_addr, RF_C_addr; // read ports wire[15:0] RF_A_data, RF_B_data, RF_C_data; wire[3:0] RF_W_addr; // write port wire[15:0] RF_W_data; wire RF_W_write; wire branch_taken; reg_file RF(.clk(clk), .A_addr(RF_A_addr), .B_addr(RF_B_addr), .A_data(RF_A_data), .B_data(RF_B_data), .C_addr(RF_C_addr), .C_data(RF_C_data), .W_addr(RF_W_addr), .W_data(RF_W_data), .W_write(RF_W_write) ); // PC reg[15:0] pc; // --- PIPELINE --- // feedback to control logic coming from various stages wire BranchPred; wire[15:0] BranchPredOff; wire BranchWrong; wire[15:0] BranchWrongOff; // -- stage 0: fetch // in: pc // (reg defined aboved) // out: insn wire[15:0] pipe0_out_insn; pipe0 p0(.in_pc_addr(pc), .out_insn(pipe0_out_insn), // stage 0 owns IM .IM_addr(IM_addr), .IM_data(IM_data) ); // -- stage 1: regread // in: insn wire[15:0] pipe1_in_insn; // out: insn, A, B, C, K, Pred wire[15:0] pipe1_out_insn, pipe1_out_A, pipe1_out_B, pipe1_out_C, pipe1_out_K; wire pipe1_out_Pred; pipe1 p1(.in_insn(pipe1_in_insn), .out_insn(pipe1_out_insn), .out_A(pipe1_out_A), .out_B(pipe1_out_B), .out_C(pipe1_out_C), .out_K(pipe1_out_K), .out_Pred(pipe1_out_Pred), // stage 1 owns read-ports of RF .RF_A_addr(RF_A_addr), .RF_B_addr(RF_B_addr), .RF_A_data(RF_A_data), .RF_B_data(RF_B_data), .RF_C_addr(RF_C_addr), .RF_C_data(RF_C_data), // to control logic .BranchPred(BranchPred), .BranchPredOff(BranchPredOff) ); // -- stage 2: ALU // in: insn, A, B, C, K, Pred wire[15:0] pipe2_in_insn, pipe2_in_A, pipe2_in_B, pipe2_in_C, pipe2_in_K; wire pipe2_in_Pred; // out: insn, Q, C wire[15:0] pipe2_out_insn, pipe2_out_Q, pipe2_out_C; // data forwarding muxes // (reg so that always block below can assign) reg[1:0] fwd_Asrc, fwd_Bsrc, fwd_Csrc; // 00=normal, 01=p3_in, // 10=p4_in wire[3:0] fwd_p2_op, fwd_p3_op, fwd_p4_op; wire[3:0] fwd_p2_rA, fwd_p2_rB, fwd_p2_rC, fwd_p3_R, fwd_p4_R; wire fwd_needA, fwd_needB, fwd_needC; wire fwd_p3_have, fwd_p3_stall; wire fwd_p4_have; wire[15:0] fwd_p3_data, fwd_p4_data; wire[15:0] pipe2_in_A_mux, pipe2_in_B_mux, pipe2_in_C_mux; assign pipe2_in_A_mux = (fwd_Asrc == 2'b00) ? pipe2_in_A : (fwd_Asrc == 2'b01 ? fwd_p3_data : fwd_p4_data); assign pipe2_in_B_mux = (fwd_Bsrc == 2'b00) ? pipe2_in_B : (fwd_Bsrc == 2'b01 ? fwd_p3_data : fwd_p4_data); assign pipe2_in_C_mux = (fwd_Csrc == 2'b00) ? pipe2_in_C : (fwd_Csrc == 2'b01 ? fwd_p3_data : fwd_p4_data); pipe2 p2(.in_insn(pipe2_in_insn), .in_A(pipe2_in_A_mux), .in_B(pipe2_in_B_mux), .in_C(pipe2_in_C_mux), .in_K(pipe2_in_K), .in_Pred(pipe2_in_Pred), .out_insn(pipe2_out_insn), .out_Q(pipe2_out_Q), .out_C(pipe2_out_C), // to control logic .BranchWrong(BranchWrong), .BranchWrongOff(BranchWrongOff), .Branch(branch_taken) ); // -- stage 3: mem // in: insn, Q, C wire[15:0] pipe3_in_insn, pipe3_in_Q, pipe3_in_C; // out: insn, Q wire[15:0] pipe3_out_insn, pipe3_out_Q; // out to data forwarding assign fwd_p3_data = pipe3_in_Q; pipe3 p3(.in_insn(pipe3_in_insn), .in_Q(pipe3_in_Q), .in_C(pipe3_in_C), .out_insn(pipe3_out_insn), .out_Q(pipe3_out_Q), // stage 3 owns DM .DM_addr(DM_addr), .DM_data(DM_data), .DM_wdata(DM_wdata), .DM_write(DM_write) ); // -- stage 4: writeback // in: insn, Q wire[15:0] pipe4_in_insn, pipe4_in_Q; // out to data forwarding assign fwd_p4_data = pipe4_in_Q; pipe4 p4(.in_insn(pipe4_in_insn), .in_Q(pipe4_in_Q), // stage 4 owns RF's write port .RF_W_addr(RF_W_addr), .RF_W_data(RF_W_data), .RF_W_write(RF_W_write) ); // --- PIPELINE REGISTERS --- parameter nop_insn = 16'hF000; // used for pipeline bubbles wire pipe0_stall, pipe0_bubble; wire pipe1_stall, pipe1_bubble; wire pipe2_stall, pipe2_bubble; wire pipe3_stall, pipe3_bubble; // stage 0 -> stage 1 preg16 p0_1(.clk(clk), .reset(reset), .in(pipe0_out_insn), .out(pipe1_in_insn), .stall(pipe0_stall), .bubble(pipe0_bubble), .nop(nop_insn) ); // stage 1 -> stage 2 preg16 p1_1(.clk(clk), .reset(reset), .in(pipe1_out_insn), .out(pipe2_in_insn), .stall(pipe1_stall), .bubble(pipe1_bubble), .nop(nop_insn) ); preg16 p1_2(.clk(clk), .reset(reset), .in(pipe1_out_A), .out(pipe2_in_A), .stall(pipe1_stall), .bubble(pipe1_bubble), .nop(0) ); preg16 p1_3(.clk(clk), .reset(reset), .in(pipe1_out_B), .out(pipe2_in_B), .stall(pipe1_stall), .bubble(pipe1_bubble), .nop(0) ); preg16 p1_4(.clk(clk), .reset(reset), .in(pipe1_out_C), .out(pipe2_in_C), .stall(pipe1_stall), .bubble(pipe1_bubble), .nop(0) ); preg16 p1_5(.clk(clk), .reset(reset), .in(pipe1_out_K), .out(pipe2_in_K), .stall(pipe1_stall), .bubble(pipe1_bubble), .nop(0) ); preg1 p1_6(.clk(clk), .reset(reset), .in(pipe1_out_Pred), .out(pipe2_in_Pred), .stall(pipe1_stall), .bubble(pipe1_bubble), .nop(0) ); // stage 2 -> stage 3 preg16 p2_1(.clk(clk), .reset(reset), .in(pipe2_out_insn), .out(pipe3_in_insn), .stall(pipe2_stall), .bubble(pipe2_bubble), .nop(nop_insn) ); preg16 p2_2(.clk(clk), .reset(reset), .in(pipe2_out_Q), .out(pipe3_in_Q), .stall(pipe2_stall), .bubble(pipe2_bubble), .nop(0) ); preg16 p2_3(.clk(clk), .reset(reset), .in(pipe2_out_C), .out(pipe3_in_C), .stall(pipe2_stall), .bubble(pipe2_bubble), .nop(0) ); // stage 3 -> stage 4 preg16 p3_1(.clk(clk), .reset(reset), .in(pipe3_out_insn), .out(pipe4_in_insn), .stall(pipe3_stall), .bubble(pipe3_bubble), .nop(nop_insn) ); preg16 p3_2(.clk(clk), .reset(reset), .in(pipe3_out_Q), .out(pipe4_in_Q), .stall(pipe3_stall), .bubble(pipe3_bubble), .nop(0) ); assign BranchPred = 0; // stubbed out // --- CONTROL --- // PC management // we have: BranchPred, BranchPredOff, BranchWrong, BranchWrongOff // carry PC through stage 2 as well wire[15:0] pc_stage1, pc_stage2; preg16 pc_1(.clk(clk), .reset(reset), .in(pc), .out(pc_stage1), .stall(pipe0_stall), .bubble(pipe0_bubble), .nop(0) ); preg16 pc_2(.clk(clk), .reset(reset), .in(pc_stage1), .out(pc_stage2), .stall(pipe1_stall), .bubble(pipe1_bubble), .nop(0) ); always @(posedge clk) begin if(reset) pc = 0; else begin $display("---"); $display("PC is %4h", pc); $display("p1: insn = %4h", pipe1_in_insn); $display("p2: insn = %4h", pipe2_in_insn); $display("p3: insn = %4h", pipe3_in_insn); $display("p4: insn = %4h", pipe4_in_insn); if(BranchWrong) // could be wrong two ways: branch predicted but not taken, // or branch not predicted but actually taken pc = pc_stage2 + (pipe2_in_Pred ? 1 : BranchWrongOff); else if(BranchPred) pc = pc_stage1 + BranchPredOff; else if(!pipe0_stall && !pipe0_bubble) pc = pc + 1; else pc = pc; end end // STALL/BUBBLE LOGIC reg HazardStall; // ALU stall -- bubble after p2, stall before it // on BranchPred or BranchWrong, bubble after p0 assign pipe0_bubble = BranchPred || BranchWrong; assign pipe0_stall = HazardStall; // on BranchWrong, bubble after p1 assign pipe1_bubble = BranchWrong; assign pipe1_stall = HazardStall; assign pipe2_bubble = HazardStall; assign pipe2_stall = 0; assign pipe3_bubble = 0; assign pipe3_stall = 0; // DATA FORWARDING // we derive the signals below from insns in p2, p3, p4 // we output HazardStall and fwd_{A,B,C}src // src selects: 0'b00 is normal, 0'b01 is p3_in, 0'b10 is p4_in assign fwd_p2_op = pipe2_in_insn[15:12]; assign fwd_p3_op = pipe3_in_insn[15:12]; assign fwd_p4_op = pipe4_in_insn[15:12]; assign fwd_p2_rA = pipe2_in_insn[7:4]; assign fwd_p2_rB = pipe2_in_insn[3:0]; assign fwd_p2_rC = pipe2_in_insn[11:8]; assign fwd_p3_R = pipe3_in_insn[11:8]; assign fwd_p4_R = pipe4_in_insn[11:8]; // need* are instructions that need A,B,C in p2 (ALU stage) assign fwd_needA = fwd_p2_op == 2 || // ADD fwd_p2_op == 4 || // SUB fwd_p2_op == 8 || // MUL fwd_p2_op == 9 || // DIV fwd_p2_op == 10; // MOD assign fwd_needB = fwd_p2_op == 2 || // ADD fwd_p2_op == 4 || // SUB fwd_p2_op == 8 || // MUL fwd_p2_op == 9 || // DIV fwd_p2_op == 10 || // MOD fwd_p2_op == 13 || // LDR fwd_p2_op == 14; // STR assign fwd_needC = fwd_p2_op == 1 || // ST fwd_p2_op == 5 || // JMPZ fwd_p2_op == 6 || // JMPN fwd_p2_op == 14; // STR // p3_have are instructions that have Q ready *before* or *after* p3 (mem) // (p3_stall below differentiates the insns that neeed a stall -- ie, ld) assign fwd_p3_have = fwd_p3_op == 0 || // LD fwd_p3_op == 2 || // ADD fwd_p3_op == 3 || // LDC fwd_p3_op == 4 || // SUB fwd_p3_op == 8 || // MUL fwd_p3_op == 9 || // DIV fwd_p3_op == 10 || // MOD fwd_p3_op == 13; // LDR // p3_stall are instructions that don't have Q ready until *after* p3 assign fwd_p3_stall = fwd_p3_op == 0 || // LD fwd_p3_op == 13; // LDR // p4_have are instructions that have Q ready *before* p4 (writeback) // these are *all* insns that write back to regfile assign fwd_p4_have = fwd_p4_op == 0 || // LD fwd_p4_op == 2 || // ADD fwd_p4_op == 3 || // LDC fwd_p4_op == 4 || // SUB fwd_p4_op == 8 || // MUL fwd_p4_op == 9 || // DIV fwd_p4_op == 10 || // MOD fwd_p4_op == 13; // LDR // now the fun part! assigning muxes and stalls based on input above. // This is the Magic Box(tm) always @(fwd_p3_have or fwd_p3_stall or fwd_p4_have or fwd_needA or fwd_needB or fwd_needC or fwd_p2_rA or fwd_p2_rB or fwd_p2_rC or fwd_p3_R or fwd_p4_R) begin HazardStall = 0; fwd_Asrc = 0'b00; fwd_Bsrc = 0'b00; fwd_Csrc = 0'b00; // precedence: // p3 first, then p4 (later result preferred over earlier) // also, NB: // load stall always overrides others // (load's second cycle is handled naturally when the load // reaches p4 with the loaded data; nothign special needs to be // done except stall the pipeline when a load with a needed // result is in p3.) // handle inputs one at a time. if(fwd_needA) begin if(fwd_p3_have && (fwd_p3_R == fwd_p2_rA)) begin if(fwd_p3_stall) HazardStall = 1; else fwd_Asrc = 2'b01; // p3 end else if(fwd_p4_have && (fwd_p4_R == fwd_p2_rA)) fwd_Asrc = 2'b10; // p4 end if(fwd_needB) begin if(fwd_p3_have && (fwd_p3_R == fwd_p2_rB) ) begin if(fwd_p3_stall) HazardStall = 1; else fwd_Bsrc = 2'b01; // p3 end else if(fwd_p4_have && (fwd_p4_R == fwd_p2_rB)) fwd_Bsrc = 2'b10; // p4 end if(fwd_needC) begin if(fwd_p3_have && (fwd_p3_R == fwd_p2_rC)) begin if(fwd_p3_stall) HazardStall = 1; else fwd_Csrc = 2'b01; // p3 end else if(fwd_p4_have && (fwd_p4_R == fwd_p2_rC)) fwd_Csrc = 2'b10; // p4 end end // --- INSTRUMENTATION --- reg[31:0] count_tick; reg[31:0] count_insn; initial count_tick = 0; initial count_insn = 0; always @(posedge clk) begin count_tick <= count_tick + 1; if(pipe4_in_insn[15:12] != 4'hF) count_insn <= count_insn + 1; $display("Ticks: %d ; Instructions: %d", count_tick, count_insn); end endmodule