Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Gowin. Add fix for Single Port BSRAM #1332

Merged
merged 2 commits into from
Jun 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions himbaechel/uarch/gowin/constids.inc
Original file line number Diff line number Diff line change
Expand Up @@ -890,6 +890,9 @@ X(WRE)

// BSRAM
X(BSRAM_SUBTYPE)
X(WRITE_MODE)
X(READ_MODE)
X(RESET_MODE)
X(BIT_WIDTH)
X(BIT_WIDTH_0)
X(BIT_WIDTH_1)
Expand Down
1 change: 1 addition & 0 deletions himbaechel/uarch/gowin/gowin.cc
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,7 @@ void GowinImpl::adjust_dsp_pin_mapping(void)
void GowinImpl::prePlace() { assign_cell_info(); }
void GowinImpl::postPlace()
{
gwu.have_SP32();
if (ctx->debug) {
log_info("================== Final Placement ===================\n");
for (auto &cell : ctx->cells) {
Expand Down
4 changes: 3 additions & 1 deletion himbaechel/uarch/gowin/gowin.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,9 @@ NPNR_PACKED_STRUCT(struct Extra_chip_data_POD {
Bottom_io_POD bottom_io;
RelSlice<IdString> diff_io_types;
// chip flags
static constexpr int32_t HAS_SP32 = 0;
static constexpr int32_t HAS_SP32 = 1;
static constexpr int32_t NEED_SP_FIX = 2;
static constexpr int32_t NEED_BSRAM_OUTREG_FIX = 4;
});

} // namespace
Expand Down
17 changes: 14 additions & 3 deletions himbaechel/uarch/gowin/gowin_arch_gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@
BEL_FLAG_SIMPLE_IO = 0x100

# Chip flags
CHIP_HAS_SP32 = 0x1
CHIP_HAS_SP32 = 0x1
CHIP_NEED_SP_FIX = 0x2
CHIP_NEED_BSRAM_OUTREG_FIX = 0x4

# Z of the bels
# sync with C++ part!
Expand Down Expand Up @@ -1008,8 +1010,17 @@ def main():
db = pickle.load(f)

chip_flags = 0;
if device not in {"GW1NS-4", "GW1N-9"}:
chip_flags &= CHIP_HAS_SP32;
# XXX compatibility
if not hasattr(db, "chip_flags"):
if device not in {"GW1NS-4", "GW1N-9"}:
chip_flags |= CHIP_HAS_SP32;
else:
if "HAS_SP32" in db.chip_flags:
chip_flags |= CHIP_HAS_SP32;
if "NEED_SP_FIX" in db.chip_flags:
chip_flags |= CHIP_NEED_SP_FIX;
if "NEED_BSRAM_OUTREG_FIX" in db.chip_flags:
chip_flags |= CHIP_NEED_BSRAM_OUTREG_FIX;

X = db.cols;
Y = db.rows;
Expand Down
12 changes: 12 additions & 0 deletions himbaechel/uarch/gowin/gowin_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,18 @@ bool GowinUtils::have_SP32(void)
return extra->chip_flags & Extra_chip_data_POD::HAS_SP32;
}

bool GowinUtils::need_SP_fix(void)
{
const Extra_chip_data_POD *extra = reinterpret_cast<const Extra_chip_data_POD *>(ctx->chip_info->extra_data.get());
return extra->chip_flags & Extra_chip_data_POD::NEED_SP_FIX;
}

bool GowinUtils::need_BSRAM_OUTREG_fix(void)
{
const Extra_chip_data_POD *extra = reinterpret_cast<const Extra_chip_data_POD *>(ctx->chip_info->extra_data.get());
return extra->chip_flags & Extra_chip_data_POD::NEED_BSRAM_OUTREG_FIX;
}

std::unique_ptr<CellInfo> GowinUtils::create_cell(IdString name, IdString type)
{
NPNR_ASSERT(!ctx->cells.count(name));
Expand Down
8 changes: 5 additions & 3 deletions himbaechel/uarch/gowin/gowin_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,11 @@ struct GowinUtils
Loc get_pair_iologic_bel(Loc loc);
BelId get_io_bel_from_iologic(BelId bel);

// BSRAM
bool have_SP32(void);
bool need_SP_fix(void);
bool need_BSRAM_OUTREG_fix(void);

// DSP
inline int get_dsp_18_z(int z) const { return z & (~3); }
inline int get_dsp_9_idx(int z) const { return z & 3; }
Expand Down Expand Up @@ -81,9 +86,6 @@ struct GowinUtils
return is_global_wire(ctx->getPipSrcWire(pip)) || is_global_wire(ctx->getPipDstWire(pip));
}

// chip dependent
bool have_SP32(void);

// make cell but do not include it in the list of chip cells.
std::unique_ptr<CellInfo> create_cell(IdString name, IdString type);
};
Expand Down
196 changes: 179 additions & 17 deletions himbaechel/uarch/gowin/pack.cc
Original file line number Diff line number Diff line change
Expand Up @@ -508,13 +508,13 @@ struct GowinPacker
make_iob_nets(*out_iob);
}

IdString create_aux_name(IdString main_name, int idx = 0)
IdString create_aux_name(IdString main_name, int idx = 0, const char *str_suffix = "_aux$")
{
std::string sfx("");
if (idx) {
sfx = std::to_string(idx);
}
return ctx->id(main_name.str(ctx) + std::string("_aux$") + sfx);
return ctx->id(main_name.str(ctx) + std::string(str_suffix) + sfx);
}

BelId get_aux_iologic_bel(const CellInfo &ci)
Expand Down Expand Up @@ -1331,22 +1331,173 @@ struct GowinPacker
}
}

// If the memory is controlled by the CE, then it is logical for the OCE to
// also respond to this signal, unless the OCE is controlled separately.
void bsram_handle_sp_oce(CellInfo *ci, IdString ce_pin, IdString oce_pin)
// Some chips cannot, for some reason, use internal BSRAM registers to
// implement READ_MODE=1'b1 (pipeline) with a word width other than 32 or
// 36 bits.
// We work around this by adding an external DFF and using BSRAM
// as READ_MODE=1'b0 (bypass).
void bsram_fix_outreg(CellInfo *ci, std::vector<std::unique_ptr<CellInfo>> &new_cells)
{
const NetInfo *net = ci->getPort(oce_pin);
NPNR_ASSERT(ci->getPort(ce_pin) != nullptr);
if (net == nullptr || net->name == ctx->id("$PACKER_VCC") || net->name == ctx->id("$PACKER_GND")) {
int bit_width = ci->params.at(id_BIT_WIDTH).as_int64();
if (bit_width == 32 || bit_width == 36) {
return;
}
int read_mode = ci->params.at(id_READ_MODE).as_int64();
if (read_mode == 0) {
return;
}
NetInfo *ce_net = ci->getPort(id_CE);
NetInfo *oce_net = ci->getPort(id_OCE);
if (ce_net == nullptr || oce_net == nullptr) {
return;
}
if (ce_net->name == ctx->id("$PACKER_GND") || oce_net->name == ctx->id("$PACKER_GND")) {
return;
}

if (ctx->verbose) {
log_info(" apply the BSRAM OUTREG fix\n");
}
ci->setParam(id_READ_MODE, 0);
ci->disconnectPort(id_OCE);
ci->connectPort(id_OCE, ce_net);

NetInfo *reset_net = ci->getPort(id_RESET);
bool sync_reset = ci->params.at(id_RESET_MODE).as_string() == std::string("SYNC");
IdString dff_type = sync_reset ? id_DFFRE : id_DFFCE;
IdString reset_port = sync_reset ? id_RESET : id_CLEAR;

for (int i = 0; i < bit_width; ++i) {
IdString do_name = ctx->idf("DO[%d]", i);
const NetInfo *net = ci->getPort(do_name);
if (net != nullptr) {
ci->disconnectPort(oce_pin);
if (net->users.empty()) {
ci->disconnectPort(do_name);
continue;
}

// create DFF
auto cache_dff_cell = gwu.create_cell(create_aux_name(ci->name, i, "_cache_dff$"), dff_type);
CellInfo *cache_dff = cache_dff_cell.get();
cache_dff->addInput(id_CE);
cache_dff->connectPort(id_CE, oce_net);

cache_dff->addInput(reset_port);
cache_dff->connectPort(reset_port, reset_net);

ci->copyPortTo(id_CLK, cache_dff, id_CLK);

cache_dff->addOutput(id_Q);
ci->movePortTo(do_name, cache_dff, id_Q);

cache_dff->addInput(id_D);
ci->connectPorts(do_name, cache_dff, id_D);

new_cells.push_back(std::move(cache_dff_cell));
}
ci->copyPortTo(ce_pin, ci, oce_pin);
}
}

// Analysis of the images generated by the IDE showed that some components
// are being added at the input and output of the BSRAM. Two LUTs are
// added on the WRE and CE inputs (strangely, OCE is not affected), a pair
// of LUT-DFFs on each DO output, and one or two flipflops of different
// types in the auxiliary network.
// The semantics of these additions are unclear, but we can replicate this behavior.
// Fix BSRAM in single port mode.
void bsram_fix_sp(CellInfo *ci, std::vector<std::unique_ptr<CellInfo>> &new_cells)
{
int bit_width = ci->params.at(id_BIT_WIDTH).as_int64();

if (ctx->verbose) {
log_info("%s: %s = %s = %s\n", ctx->nameOf(ci), ce_pin.c_str(ctx), oce_pin.c_str(ctx),
ctx->nameOf(ci->getPort(oce_pin)));
log_info(" apply the SP fix\n");
}
// create WRE LUT
auto wre_lut_cell = gwu.create_cell(create_aux_name(ci->name, 0, "_wre_lut$"), id_LUT4);
CellInfo *wre_lut = wre_lut_cell.get();
wre_lut->setParam(id_INIT, 0x8888);
ci->movePortTo(id_CE, wre_lut, id_I0);
ci->movePortTo(id_WRE, wre_lut, id_I1);
wre_lut->addOutput(id_F);
ci->connectPorts(id_WRE, wre_lut, id_F);

// create CE LUT
auto ce_lut_cell = gwu.create_cell(create_aux_name(ci->name, 0, "_ce_lut$"), id_LUT4);
CellInfo *ce_lut = ce_lut_cell.get();
ce_lut->setParam(id_INIT, 0xeeee);
wre_lut->copyPortTo(id_I0, ce_lut, id_I0);
wre_lut->copyPortTo(id_I1, ce_lut, id_I1);
ce_lut->addOutput(id_F);
ci->connectPorts(id_CE, ce_lut, id_F);

// create ce reg
int write_mode = ci->params.at(id_WRITE_MODE).as_int64();
IdString dff_type = write_mode ? id_DFF : id_DFFR;
auto ce_pre_dff_cell = gwu.create_cell(create_aux_name(ci->name, 0, "_ce_pre_dff$"), dff_type);
CellInfo *ce_pre_dff = ce_pre_dff_cell.get();
ce_pre_dff->addInput(id_D);
ce_lut->copyPortTo(id_I0, ce_pre_dff, id_D);
ci->copyPortTo(id_CLK, ce_pre_dff, id_CLK);
if (dff_type == id_DFFR) {
wre_lut->copyPortTo(id_I1, ce_pre_dff, id_RESET);
}
ce_pre_dff->addOutput(id_Q);

// new ce src with Q pin (used by output pins, not by BSRAM itself)
CellInfo *new_ce_net_src = ce_pre_dff;

// add delay register in pipeline mode
int read_mode = ci->params.at(id_READ_MODE).as_int64();
if (read_mode) {
auto ce_pipe_dff_cell = gwu.create_cell(create_aux_name(ci->name, 0, "_ce_pipe_dff$"), id_DFF);
new_cells.push_back(std::move(ce_pipe_dff_cell));
CellInfo *ce_pipe_dff = new_cells.back().get();
ce_pipe_dff->addInput(id_D);
new_ce_net_src->connectPorts(id_Q, ce_pipe_dff, id_D);
ci->copyPortTo(id_CLK, ce_pipe_dff, id_CLK);
ce_pipe_dff->addOutput(id_Q);
new_ce_net_src = ce_pipe_dff;
}

// used outputs of the BSRAM convert to cached
for (int i = 0; i < bit_width; ++i) {
IdString do_name = ctx->idf("DO[%d]", i);
const NetInfo *net = ci->getPort(do_name);
if (net != nullptr) {
if (net->users.empty()) {
ci->disconnectPort(do_name);
continue;
}
// create cache lut
auto cache_lut_cell = gwu.create_cell(create_aux_name(ci->name, i, "_cache_lut$"), id_LUT4);
CellInfo *cache_lut = cache_lut_cell.get();
cache_lut->setParam(id_INIT, 0xcaca);
cache_lut->addInput(id_I0);
cache_lut->addInput(id_I1);
cache_lut->addInput(id_I2);
ci->movePortTo(do_name, cache_lut, id_F);
ci->connectPorts(do_name, cache_lut, id_I1);
new_ce_net_src->connectPorts(id_Q, cache_lut, id_I2);

// create cache DFF
auto cache_dff_cell = gwu.create_cell(create_aux_name(ci->name, i, "_cache_dff$"), id_DFFE);
CellInfo *cache_dff = cache_dff_cell.get();
cache_dff->addInput(id_CE);
cache_dff->addInput(id_D);
ci->copyPortTo(id_CLK, cache_dff, id_CLK);
new_ce_net_src->connectPorts(id_Q, cache_dff, id_CE);
cache_lut->copyPortTo(id_I1, cache_dff, id_D);
cache_dff->addOutput(id_Q);
cache_dff->connectPorts(id_Q, cache_lut, id_I0);

new_cells.push_back(std::move(cache_lut_cell));
new_cells.push_back(std::move(cache_dff_cell));
}
}

new_cells.push_back(std::move(wre_lut_cell));
new_cells.push_back(std::move(ce_lut_cell));
new_cells.push_back(std::move(ce_pre_dff_cell));
}

void pack_ROM(CellInfo *ci)
Expand Down Expand Up @@ -1432,7 +1583,6 @@ struct GowinPacker
ci->renamePort(ctx->idf("BLKSELB[%d]", i), ctx->idf("BLKSELB%d", i));
}

bsram_handle_sp_oce(ci, id_CEB, id_OCE);
ci->copyPortTo(id_OCE, ci, id_OCEB);

// Port A
Expand Down Expand Up @@ -1565,7 +1715,19 @@ struct GowinPacker
}

int bit_width = ci->params.at(id_BIT_WIDTH).as_int64();
bsram_handle_sp_oce(ci, id_CE, id_OCE);

// XXX strange WRE<->CE relations
// Gowin IDE adds two LUTs to the WRE and CE signals. The logic is
// unclear, but without them effects occur. Perhaps this is a
// correction of some BSRAM defects.
if (gwu.need_SP_fix()) {
bsram_fix_sp(ci, new_cells);
}

// Some chips have faulty output registers
if (gwu.need_BSRAM_OUTREG_fix()) {
bsram_fix_outreg(ci, new_cells);
}

// XXX UG285-1.3.6_E Gowin BSRAM & SSRAM User Guide:
// For GW1N-9/GW1NR-9/GW1NS-4 series, 32/36-bit SP/SPX9 is divided into two
Expand Down Expand Up @@ -2726,9 +2888,6 @@ struct GowinPacker
pack_gsr();
ctx->check();

pack_inv();
ctx->check();

pack_wideluts();
ctx->check();

Expand All @@ -2751,6 +2910,9 @@ struct GowinPacker
pack_dsp();
ctx->check();

pack_inv();
ctx->check();

pack_buffered_nets();

ctx->fixupHierarchy();
Expand Down