diff --git a/himbaechel/uarch/gowin/constids.inc b/himbaechel/uarch/gowin/constids.inc index 77de2ee814..1add903340 100644 --- a/himbaechel/uarch/gowin/constids.inc +++ b/himbaechel/uarch/gowin/constids.inc @@ -890,6 +890,9 @@ X(WRE) // BSRAM X(BSRAM_SUBTYPE) +X(WRITE_MODE) +X(READ_MODE) +X(RESET_MODE) X(BIT_WIDTH) X(BIT_WIDTH_0) X(BIT_WIDTH_1) diff --git a/himbaechel/uarch/gowin/gowin.cc b/himbaechel/uarch/gowin/gowin.cc index 751f1865b0..5bb7bd2f96 100644 --- a/himbaechel/uarch/gowin/gowin.cc +++ b/himbaechel/uarch/gowin/gowin.cc @@ -259,6 +259,7 @@ void GowinImpl::adjust_dsp_pin_mapping(void) void GowinImpl::prePlace() { assign_cell_info(); } void GowinImpl::postPlace() { + gwu.have_SP32(); if (ctx->debug) { log_info("================== Final Placement ===================\n"); for (auto &cell : ctx->cells) { diff --git a/himbaechel/uarch/gowin/gowin.h b/himbaechel/uarch/gowin/gowin.h index abff9031e3..728d6df21c 100644 --- a/himbaechel/uarch/gowin/gowin.h +++ b/himbaechel/uarch/gowin/gowin.h @@ -95,7 +95,9 @@ NPNR_PACKED_STRUCT(struct Extra_chip_data_POD { Bottom_io_POD bottom_io; RelSlice diff_io_types; // chip flags - static constexpr int32_t HAS_SP32 = 0; + static constexpr int32_t HAS_SP32 = 1; + static constexpr int32_t NEED_SP_FIX = 2; + static constexpr int32_t NEED_BSRAM_OUTREG_FIX = 4; }); } // namespace diff --git a/himbaechel/uarch/gowin/gowin_arch_gen.py b/himbaechel/uarch/gowin/gowin_arch_gen.py index e84e853bef..73a5ade1cb 100644 --- a/himbaechel/uarch/gowin/gowin_arch_gen.py +++ b/himbaechel/uarch/gowin/gowin_arch_gen.py @@ -15,7 +15,9 @@ BEL_FLAG_SIMPLE_IO = 0x100 # Chip flags -CHIP_HAS_SP32 = 0x1 +CHIP_HAS_SP32 = 0x1 +CHIP_NEED_SP_FIX = 0x2 +CHIP_NEED_BSRAM_OUTREG_FIX = 0x4 # Z of the bels # sync with C++ part! @@ -1008,8 +1010,17 @@ def main(): db = pickle.load(f) chip_flags = 0; - if device not in {"GW1NS-4", "GW1N-9"}: - chip_flags &= CHIP_HAS_SP32; + # XXX compatibility + if not hasattr(db, "chip_flags"): + if device not in {"GW1NS-4", "GW1N-9"}: + chip_flags |= CHIP_HAS_SP32; + else: + if "HAS_SP32" in db.chip_flags: + chip_flags |= CHIP_HAS_SP32; + if "NEED_SP_FIX" in db.chip_flags: + chip_flags |= CHIP_NEED_SP_FIX; + if "NEED_BSRAM_OUTREG_FIX" in db.chip_flags: + chip_flags |= CHIP_NEED_BSRAM_OUTREG_FIX; X = db.cols; Y = db.rows; diff --git a/himbaechel/uarch/gowin/gowin_utils.cc b/himbaechel/uarch/gowin/gowin_utils.cc index 3aba699d67..ac6153af1f 100644 --- a/himbaechel/uarch/gowin/gowin_utils.cc +++ b/himbaechel/uarch/gowin/gowin_utils.cc @@ -118,6 +118,18 @@ bool GowinUtils::have_SP32(void) return extra->chip_flags & Extra_chip_data_POD::HAS_SP32; } +bool GowinUtils::need_SP_fix(void) +{ + const Extra_chip_data_POD *extra = reinterpret_cast(ctx->chip_info->extra_data.get()); + return extra->chip_flags & Extra_chip_data_POD::NEED_SP_FIX; +} + +bool GowinUtils::need_BSRAM_OUTREG_fix(void) +{ + const Extra_chip_data_POD *extra = reinterpret_cast(ctx->chip_info->extra_data.get()); + return extra->chip_flags & Extra_chip_data_POD::NEED_BSRAM_OUTREG_FIX; +} + std::unique_ptr GowinUtils::create_cell(IdString name, IdString type) { NPNR_ASSERT(!ctx->cells.count(name)); diff --git a/himbaechel/uarch/gowin/gowin_utils.h b/himbaechel/uarch/gowin/gowin_utils.h index ce6b8d9fca..c4d3e628dd 100644 --- a/himbaechel/uarch/gowin/gowin_utils.h +++ b/himbaechel/uarch/gowin/gowin_utils.h @@ -34,6 +34,11 @@ struct GowinUtils Loc get_pair_iologic_bel(Loc loc); BelId get_io_bel_from_iologic(BelId bel); + // BSRAM + bool have_SP32(void); + bool need_SP_fix(void); + bool need_BSRAM_OUTREG_fix(void); + // DSP inline int get_dsp_18_z(int z) const { return z & (~3); } inline int get_dsp_9_idx(int z) const { return z & 3; } @@ -81,9 +86,6 @@ struct GowinUtils return is_global_wire(ctx->getPipSrcWire(pip)) || is_global_wire(ctx->getPipDstWire(pip)); } - // chip dependent - bool have_SP32(void); - // make cell but do not include it in the list of chip cells. std::unique_ptr create_cell(IdString name, IdString type); }; diff --git a/himbaechel/uarch/gowin/pack.cc b/himbaechel/uarch/gowin/pack.cc index 193892e2e7..ee1ce9f200 100644 --- a/himbaechel/uarch/gowin/pack.cc +++ b/himbaechel/uarch/gowin/pack.cc @@ -508,13 +508,13 @@ struct GowinPacker make_iob_nets(*out_iob); } - IdString create_aux_name(IdString main_name, int idx = 0) + IdString create_aux_name(IdString main_name, int idx = 0, const char *str_suffix = "_aux$") { std::string sfx(""); if (idx) { sfx = std::to_string(idx); } - return ctx->id(main_name.str(ctx) + std::string("_aux$") + sfx); + return ctx->id(main_name.str(ctx) + std::string(str_suffix) + sfx); } BelId get_aux_iologic_bel(const CellInfo &ci) @@ -1331,22 +1331,173 @@ struct GowinPacker } } - // If the memory is controlled by the CE, then it is logical for the OCE to - // also respond to this signal, unless the OCE is controlled separately. - void bsram_handle_sp_oce(CellInfo *ci, IdString ce_pin, IdString oce_pin) + // Some chips cannot, for some reason, use internal BSRAM registers to + // implement READ_MODE=1'b1 (pipeline) with a word width other than 32 or + // 36 bits. + // We work around this by adding an external DFF and using BSRAM + // as READ_MODE=1'b0 (bypass). + void bsram_fix_outreg(CellInfo *ci, std::vector> &new_cells) { - const NetInfo *net = ci->getPort(oce_pin); - NPNR_ASSERT(ci->getPort(ce_pin) != nullptr); - if (net == nullptr || net->name == ctx->id("$PACKER_VCC") || net->name == ctx->id("$PACKER_GND")) { + int bit_width = ci->params.at(id_BIT_WIDTH).as_int64(); + if (bit_width == 32 || bit_width == 36) { + return; + } + int read_mode = ci->params.at(id_READ_MODE).as_int64(); + if (read_mode == 0) { + return; + } + NetInfo *ce_net = ci->getPort(id_CE); + NetInfo *oce_net = ci->getPort(id_OCE); + if (ce_net == nullptr || oce_net == nullptr) { + return; + } + if (ce_net->name == ctx->id("$PACKER_GND") || oce_net->name == ctx->id("$PACKER_GND")) { + return; + } + + if (ctx->verbose) { + log_info(" apply the BSRAM OUTREG fix\n"); + } + ci->setParam(id_READ_MODE, 0); + ci->disconnectPort(id_OCE); + ci->connectPort(id_OCE, ce_net); + + NetInfo *reset_net = ci->getPort(id_RESET); + bool sync_reset = ci->params.at(id_RESET_MODE).as_string() == std::string("SYNC"); + IdString dff_type = sync_reset ? id_DFFRE : id_DFFCE; + IdString reset_port = sync_reset ? id_RESET : id_CLEAR; + + for (int i = 0; i < bit_width; ++i) { + IdString do_name = ctx->idf("DO[%d]", i); + const NetInfo *net = ci->getPort(do_name); if (net != nullptr) { - ci->disconnectPort(oce_pin); + if (net->users.empty()) { + ci->disconnectPort(do_name); + continue; + } + + // create DFF + auto cache_dff_cell = gwu.create_cell(create_aux_name(ci->name, i, "_cache_dff$"), dff_type); + CellInfo *cache_dff = cache_dff_cell.get(); + cache_dff->addInput(id_CE); + cache_dff->connectPort(id_CE, oce_net); + + cache_dff->addInput(reset_port); + cache_dff->connectPort(reset_port, reset_net); + + ci->copyPortTo(id_CLK, cache_dff, id_CLK); + + cache_dff->addOutput(id_Q); + ci->movePortTo(do_name, cache_dff, id_Q); + + cache_dff->addInput(id_D); + ci->connectPorts(do_name, cache_dff, id_D); + + new_cells.push_back(std::move(cache_dff_cell)); } - ci->copyPortTo(ce_pin, ci, oce_pin); } + } + + // Analysis of the images generated by the IDE showed that some components + // are being added at the input and output of the BSRAM. Two LUTs are + // added on the WRE and CE inputs (strangely, OCE is not affected), a pair + // of LUT-DFFs on each DO output, and one or two flipflops of different + // types in the auxiliary network. + // The semantics of these additions are unclear, but we can replicate this behavior. + // Fix BSRAM in single port mode. + void bsram_fix_sp(CellInfo *ci, std::vector> &new_cells) + { + int bit_width = ci->params.at(id_BIT_WIDTH).as_int64(); + if (ctx->verbose) { - log_info("%s: %s = %s = %s\n", ctx->nameOf(ci), ce_pin.c_str(ctx), oce_pin.c_str(ctx), - ctx->nameOf(ci->getPort(oce_pin))); + log_info(" apply the SP fix\n"); + } + // create WRE LUT + auto wre_lut_cell = gwu.create_cell(create_aux_name(ci->name, 0, "_wre_lut$"), id_LUT4); + CellInfo *wre_lut = wre_lut_cell.get(); + wre_lut->setParam(id_INIT, 0x8888); + ci->movePortTo(id_CE, wre_lut, id_I0); + ci->movePortTo(id_WRE, wre_lut, id_I1); + wre_lut->addOutput(id_F); + ci->connectPorts(id_WRE, wre_lut, id_F); + + // create CE LUT + auto ce_lut_cell = gwu.create_cell(create_aux_name(ci->name, 0, "_ce_lut$"), id_LUT4); + CellInfo *ce_lut = ce_lut_cell.get(); + ce_lut->setParam(id_INIT, 0xeeee); + wre_lut->copyPortTo(id_I0, ce_lut, id_I0); + wre_lut->copyPortTo(id_I1, ce_lut, id_I1); + ce_lut->addOutput(id_F); + ci->connectPorts(id_CE, ce_lut, id_F); + + // create ce reg + int write_mode = ci->params.at(id_WRITE_MODE).as_int64(); + IdString dff_type = write_mode ? id_DFF : id_DFFR; + auto ce_pre_dff_cell = gwu.create_cell(create_aux_name(ci->name, 0, "_ce_pre_dff$"), dff_type); + CellInfo *ce_pre_dff = ce_pre_dff_cell.get(); + ce_pre_dff->addInput(id_D); + ce_lut->copyPortTo(id_I0, ce_pre_dff, id_D); + ci->copyPortTo(id_CLK, ce_pre_dff, id_CLK); + if (dff_type == id_DFFR) { + wre_lut->copyPortTo(id_I1, ce_pre_dff, id_RESET); + } + ce_pre_dff->addOutput(id_Q); + + // new ce src with Q pin (used by output pins, not by BSRAM itself) + CellInfo *new_ce_net_src = ce_pre_dff; + + // add delay register in pipeline mode + int read_mode = ci->params.at(id_READ_MODE).as_int64(); + if (read_mode) { + auto ce_pipe_dff_cell = gwu.create_cell(create_aux_name(ci->name, 0, "_ce_pipe_dff$"), id_DFF); + new_cells.push_back(std::move(ce_pipe_dff_cell)); + CellInfo *ce_pipe_dff = new_cells.back().get(); + ce_pipe_dff->addInput(id_D); + new_ce_net_src->connectPorts(id_Q, ce_pipe_dff, id_D); + ci->copyPortTo(id_CLK, ce_pipe_dff, id_CLK); + ce_pipe_dff->addOutput(id_Q); + new_ce_net_src = ce_pipe_dff; + } + + // used outputs of the BSRAM convert to cached + for (int i = 0; i < bit_width; ++i) { + IdString do_name = ctx->idf("DO[%d]", i); + const NetInfo *net = ci->getPort(do_name); + if (net != nullptr) { + if (net->users.empty()) { + ci->disconnectPort(do_name); + continue; + } + // create cache lut + auto cache_lut_cell = gwu.create_cell(create_aux_name(ci->name, i, "_cache_lut$"), id_LUT4); + CellInfo *cache_lut = cache_lut_cell.get(); + cache_lut->setParam(id_INIT, 0xcaca); + cache_lut->addInput(id_I0); + cache_lut->addInput(id_I1); + cache_lut->addInput(id_I2); + ci->movePortTo(do_name, cache_lut, id_F); + ci->connectPorts(do_name, cache_lut, id_I1); + new_ce_net_src->connectPorts(id_Q, cache_lut, id_I2); + + // create cache DFF + auto cache_dff_cell = gwu.create_cell(create_aux_name(ci->name, i, "_cache_dff$"), id_DFFE); + CellInfo *cache_dff = cache_dff_cell.get(); + cache_dff->addInput(id_CE); + cache_dff->addInput(id_D); + ci->copyPortTo(id_CLK, cache_dff, id_CLK); + new_ce_net_src->connectPorts(id_Q, cache_dff, id_CE); + cache_lut->copyPortTo(id_I1, cache_dff, id_D); + cache_dff->addOutput(id_Q); + cache_dff->connectPorts(id_Q, cache_lut, id_I0); + + new_cells.push_back(std::move(cache_lut_cell)); + new_cells.push_back(std::move(cache_dff_cell)); + } } + + new_cells.push_back(std::move(wre_lut_cell)); + new_cells.push_back(std::move(ce_lut_cell)); + new_cells.push_back(std::move(ce_pre_dff_cell)); } void pack_ROM(CellInfo *ci) @@ -1432,7 +1583,6 @@ struct GowinPacker ci->renamePort(ctx->idf("BLKSELB[%d]", i), ctx->idf("BLKSELB%d", i)); } - bsram_handle_sp_oce(ci, id_CEB, id_OCE); ci->copyPortTo(id_OCE, ci, id_OCEB); // Port A @@ -1565,7 +1715,19 @@ struct GowinPacker } int bit_width = ci->params.at(id_BIT_WIDTH).as_int64(); - bsram_handle_sp_oce(ci, id_CE, id_OCE); + + // XXX strange WRE<->CE relations + // Gowin IDE adds two LUTs to the WRE and CE signals. The logic is + // unclear, but without them effects occur. Perhaps this is a + // correction of some BSRAM defects. + if (gwu.need_SP_fix()) { + bsram_fix_sp(ci, new_cells); + } + + // Some chips have faulty output registers + if (gwu.need_BSRAM_OUTREG_fix()) { + bsram_fix_outreg(ci, new_cells); + } // XXX UG285-1.3.6_E Gowin BSRAM & SSRAM User Guide: // For GW1N-9/GW1NR-9/GW1NS-4 series, 32/36-bit SP/SPX9 is divided into two @@ -2726,9 +2888,6 @@ struct GowinPacker pack_gsr(); ctx->check(); - pack_inv(); - ctx->check(); - pack_wideluts(); ctx->check(); @@ -2751,6 +2910,9 @@ struct GowinPacker pack_dsp(); ctx->check(); + pack_inv(); + ctx->check(); + pack_buffered_nets(); ctx->fixupHierarchy();