-
Notifications
You must be signed in to change notification settings - Fork 50
Main Function Clock Crossings
Clock crossings are being incrementally implemented...
Clock Cross Type | Vivado | Quartus | Diamond | GHDL+Yosys+NextPNR | Efinity |
---|---|---|---|---|---|
Same clock domain 'global wires' (not a clock crossing, looks like one) | X | X | X | X | X |
Integer ratio freq, synchronous, streaming unidirectional 'volatile' (de)serializer buffers | X | X | X | ||
Non-integer ratio freq, streaming unidirectional 'volatile' (de)serializer buffers (async fifo based) | |||||
Flow controlled (async fifos) | X |
See Arty Board and AWS example for most recent working code.
Each PipelineC function is a single clock domain. So crossing between clock domains equates to moving data into and out of functions. If moving data between main functions of the same frequency the connection is just a wire and be used for composing structure outside of normal hierarchical nesting of modules.
Ex. Consider two functions:
- slow() - running at 100 Mhz
- fast() running R times the rate of slow(), ex. R=3 fast running at 300 MHz
In the above image 2 values are streamed in the fast domain, operated on in parallel (ex. squared, single cycle) in the slow domain, and streamed in order back to the fast domain. (see example code at the bottom of this page).
These clock crossings are unidirectional and are described by a global variable, a READ function, and a WRITE function. This example shows sending data both directions from fast to slow and vice versa.
Ex. slow to fast
volatile uint32_t slow_to_fast;
uint32_t slow_to_fast_READ();
void slow_to_fast_WRITE(uint32_t data[R]);
Ex. fast to slow
volatile uint32_t fast_to_slow;
uint32_t[R] fast_to_slow_READ();
void fast_to_slow_WRITE(uint32_t data);
Pseudo code showing both funcs:
fast()
{
// Write a data into slow
fast_to_slow_WRITE(uint32_t single_data);
// Read a data from slow
uint32_t single_data = slow_to_fast_READ();
}
slow()
{
// Write R datas into fast
slow_to_fast_WRITE(uint32_t datas[R]);
// Read R datas from fast
uint32_t[R] datas = fast_to_slow_READ();
}
In the fast()
domain, only single units of data, uint32_t
are used. In the slow domain, R
times much more bandwidth is needed and thus the slow()
domain functions use uint32_t[R]
arrays of data. The elements of these arrays cross to and from the fast()
domain as single units of data.
Above, the global variable representing the clock crossing is defined as volatile
because the clock crossing is between two different clocks and some buffering inserting volatile
zeros are inserted. R can be rounded to the next nearest integer (rounded down or up depending on clock cross direction, non integer ratios, etc). For example, slow@100Mhz, fast@120Mhz, R=1.2. R could be rounded to 1 or 2 for moving data in/out of the domains without a bandwidth reduction.
These shouldn't really be thought of as clock crossings since non volatile here means that the clocks are synchronous (specifically same freq for now, R=1 only). Such 'crossings' can really be thought of as arbitrary direct connections between main functions. These can be used for exposing modules with globally visible ports/wires.
Data can be moved between arbitrary clock domains at arbitrary rates with async FIFO buffers.
Ex. any clock domain to any other
uint32_t async_fifo[FIFO_DEPTH]; // Specify type and depth of fifo, depth rounded as needed to accommodate read and write sizes
uint32_t[2] data, uint1_t read_valid = async_fifo_READ_2(uint1_t read_enable); // Any? read size is allowed, ex. 2
uint1_t write_ready = async_fifo_WRITE_4(uint32_t data[4], uint1_t write_enable); // Any? write size is allowed, ex. 4
At the moment only equal read and write sizes are supported!
These crossings are not volatile
since they are valid during each cycle / do not insert zeros into data stream (read and write clock domains are independent).
The above examples are pseudo code. C doesn't support returning arrays so the actual implementation of this involves a bit of ugly code generation. See examples mentioned at the top of this page and the autogenerated code page.
Example real PipelineC:
#pragma PART "xc7a35ticsg324-1l" // xc7a35ticsg324-1l = Arty, xcvu9p-flgb2104-2-i = AWS F1
#pragma MAIN_MHZ fast 300.0
#pragma MAIN_MHZ slow 100.0
// Stream of uint64_t values
#include "uintN_t.h"
typedef struct uint64_s
{
uint64_t data;
uint1_t valid;
} uint64_s;
#include "uint64_s_array_N_t.h" // Auto generated
volatile uint64_s fast_to_slow;
#include "fast_to_slow_clock_crossing.h" // Auto generated
volatile uint64_s slow_to_fast;
#include "slow_to_fast_clock_crossing.h" // Auto generated
uint64_s fast(uint64_s in_data)
{
// Send data into slow domain
fast_to_slow_write_t to_slow_array;
to_slow_array.data[0] = in_data;
fast_to_slow_WRITE(to_slow_array);
// Get data from slow domain
slow_to_fast_read_t from_slow_array;
from_slow_array = slow_to_fast_READ();
uint64_s out_data = from_slow_array.data[0];
return out_data;
}
void slow()
{
// Get datas from fast domain
fast_to_slow_read_t from_fast_array;
from_fast_array = fast_to_slow_READ();
// Square all the values in parallel
slow_to_fast_write_t to_fast_array;
uint32_t i;
for(i=0;i<slow_to_fast_RATIO;i+=1)
{
to_fast_array.data[i].data = from_fast_array.data[i].data * from_fast_array.data[i].data;
to_fast_array.data[i].valid = from_fast_array.data[i].valid;
}
// Send data into fast domain
slow_to_fast_WRITE(to_fast_array);
}
#include "compiler.h"
#include "wire.h"
#include "arty/src/leds/led0_3.c"
#pragma MAIN_MHZ fast 166.66
#pragma MAIN_MHZ slow 25.0
#include "uintN_t.h"
#define data_t uint32_t
data_t fast_to_slow[4];
#include "fast_to_slow_clock_crossing.h" // Auto generated
data_t slow_to_fast[4];
#include "slow_to_fast_clock_crossing.h" // Auto generated
void fast(uint1_t reset)
{
// Drive leds with state, default lit
static uint1_t test_failed = 0;
uint1_t led = 1;
if(test_failed)
{
led = 0;
}
WIRE_WRITE(uint1_t, led0, led)
WIRE_WRITE(uint1_t, led1, !reset)
// Send a test pattern into slow
static data_t test_data = 0;
// Try to write a test data
data_t wr_data[1];
wr_data[0] = test_data;
uint1_t wr_en = 1;
// Reset input to fifo
if(reset)
{
wr_en = 0;
}
fast_to_slow_write_t write = fast_to_slow_WRITE_1(wr_data, wr_en);
// Did the write go through?
if(write.ready)
{
// Next test data
test_data += 1;
}
// Reset statics
if(reset)
{
test_data = 0;
}
// Receive test pattern from slow
static data_t expected = 0;
// Get data from slow domain
uint1_t rd_en = 1;
// Reset input to fifo
if(reset)
{
rd_en = 0;
}
// Try to read 1 data element from the fifo
slow_to_fast_read_t read = slow_to_fast_READ_1(rd_en);
// Did the read go through
if(rd_en & read.valid)
{
if(read.data[0] != expected)
{
// Failed test
test_failed = 1;
}
else
{
// Continue checking test pattern
expected += 1;
}
}
// Reset statics
if(reset)
{
test_failed = 0;
expected = 0;
}
}
void slow(uint1_t reset)
{
// Drive leds with state, default lit
static uint1_t test_failed = 0;
uint1_t led = 1;
if(test_failed)
{
led = 0;
}
WIRE_WRITE(uint1_t, led2, led)
WIRE_WRITE(uint1_t, led3, !reset)
// Send a test pattern into fast
static data_t test_data = 0;
// Try to write a test data
data_t wr_data[1];
wr_data[0] = test_data;
uint1_t wr_en = 1;
// Reset input to fifo
if(reset)
{
wr_en = 0;
}
slow_to_fast_write_t write = slow_to_fast_WRITE_1(wr_data, wr_en);
// Did the write go through?
if(write.ready)
{
// Next test data
test_data += 1;
}
// Reset statics
if(reset)
{
test_data = 0;
}
// Receive test pattern from fast
static data_t expected = 0;
// Get data from fast domain
uint1_t rd_en = 1;
// Reset input to fifo
if(reset)
{
rd_en = 0;
}
// Try to read 1 data element from the fifo
fast_to_slow_read_t read = fast_to_slow_READ_1(rd_en);
// Did the read go through
if(rd_en & read.valid)
{
if(read.data[0] != expected)
{
// Failed test
test_failed = 1;
}
else
{
// Continue checking test pattern
expected += 1;
}
}
// Reset statics
if(reset)
{
test_failed = 0;
expected = 0;
}
}