Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 14 additions & 4 deletions backend/cmd/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ type Network struct {
}

type Transport struct {
PropagateFault bool
PropagateFault bool `toml:"propagate_fault"`
}

type TFTP struct {
Expand All @@ -27,9 +27,18 @@ type TFTP struct {
}

type Blcu struct {
IP string
DownloadOrderId uint16
UploadOrderId uint16
IP string `toml:"ip"`
DownloadOrderId uint16 `toml:"download_order_id"`
UploadOrderId uint16 `toml:"upload_order_id"`
}

type TCP struct {
BackoffMinMs int `toml:"backoff_min_ms"` // Minimum backoff duration in milliseconds
BackoffMaxMs int `toml:"backoff_max_ms"` // Maximum backoff duration in milliseconds
BackoffMultiplier float64 `toml:"backoff_multiplier"` // Exponential backoff multiplier
MaxRetries int `toml:"max_retries"` // Maximum number of retries before cycling (0 or negative = infinite)
ConnectionTimeout int `toml:"connection_timeout_ms"` // Connection timeout in milliseconds
KeepAlive int `toml:"keep_alive_ms"` // Keep-alive interval in milliseconds
}

type Config struct {
Expand All @@ -39,5 +48,6 @@ type Config struct {
Network Network
Transport Transport
TFTP TFTP
TCP TCP
Blcu Blcu
}
60 changes: 40 additions & 20 deletions backend/cmd/config.toml
Original file line number Diff line number Diff line change
@@ -1,16 +1,56 @@
# Hyperloop UPV Backend Configuration
# Configuration file for the H10 Control Station backend server

# <-- CHECKLIST -->
# 1. Check that all the boards you want to use are declared in the [vehicle] section
# 2. Set the branch you want to use for the ADJ configuration
# 3. Toggle the Fault Propagation to your needs (treu/false)
# 4. Check the TCP configuration and make sure to use the needed Keep Alive settings


# Vehicle Configuration
[vehicle]
boards = ["HVSCU", "PCU", "BLCU"]

# ADJ (Architecture Description JSON) Configuration
[adj]
branch = "software" # Leave blank when using ADJ as a submodule (like this: "")
test = true # Enable test mode

# Network Configuration
[network]
manual = false # Manual network device selection

# Transport Configuration
[transport]
propagate_fault = true

# TCP Configuration
# These settings control how the backend reconnects to boards when connections are lost
[tcp]
backoff_min_ms = 100 # Minimum backoff duration in milliseconds
backoff_max_ms = 5000 # Maximum backoff duration in milliseconds
backoff_multiplier = 1.5 # Exponential backoff multiplier (e.g., 1.5 means each retry waits 1.5x longer)
max_retries = 0 # Maximum retries before cycling (0 = infinite retries, recommended for persistent reconnection)
connection_timeout_ms = 1000 # Connection timeout in milliseconds
keep_alive_ms = 1000 # Keep-alive interval in milliseconds

# BLCU (Boot Loader Control Unit) Configuration
[blcu]
ip = "127.0.0.1" # TFTP server IP address
download_order_id = 1 # Packet ID for download orders (0 = use default)
upload_order_id = 2 # Packet ID for upload orders (0 = use default)

# TFTP Configuration
[tftp]
block_size = 131072 # TFTP block size in bytes (128kB)
retries = 3 # Maximum number of retries before aborting transfer
timeout_ms = 5000 # Timeout between retries in milliseconds
backoff_factor = 2 # Backoff multiplier for retry delays
enable_progress = true # Enable progress callbacks during transfers

# <-- DO NOT TOUCH BELOW THIS LINE -->

# Server Configuration
[server.ethernet-view]
address = "127.0.0.1:4040"
Expand All @@ -33,23 +73,3 @@ order_data = "/orderStructures"
programable_boards = "/uploadableBoards"
connections = "/backend"
files = "/"

# ADJ (Architecture Description JSON) Configuration
[adj]
branch = "software" # Leave blank when using ADJ as a submodule (like this: "")
test = true # Enable test mode

# Network Configuration
[network]
manual = true # Manual network device selection

# Transport Configuration
[transport]
propagate_fault = true

[tftp]
block_size = 131072 # TFTP block size in bytes (128kB)
retries = 3 # Maximum number of retries before aborting transfer
timeout_ms = 5000 # Timeout between retries in milliseconds
backoff_factor = 2 # Backoff multiplier for retry delays
enable_progress = true # Enable progress callbacks during transfers
37 changes: 36 additions & 1 deletion backend/cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,42 @@ func main() {
if err != nil {
panic("Failed to resolve local backend TCP client address")
}
go transp.HandleClient(tcp.NewClientConfig(backendTcpClientAddr), fmt.Sprintf("%s:%d", adj.Info.Addresses[board.Name], adj.Info.Ports[TcpServer]))
// Create TCP client config with custom parameters from config
clientConfig := tcp.NewClientConfig(backendTcpClientAddr)

// Apply custom timeout if specified
if config.TCP.ConnectionTimeout > 0 {
clientConfig.Timeout = time.Duration(config.TCP.ConnectionTimeout) * time.Millisecond
}

// Apply custom keep-alive if specified
if config.TCP.KeepAlive > 0 {
clientConfig.KeepAlive = time.Duration(config.TCP.KeepAlive) * time.Millisecond
}

// Apply custom backoff parameters
if config.TCP.BackoffMinMs > 0 || config.TCP.BackoffMaxMs > 0 || config.TCP.BackoffMultiplier > 0 {
minBackoff := 100 * time.Millisecond // default
maxBackoff := 5 * time.Second // default
multiplier := 1.5 // default

if config.TCP.BackoffMinMs > 0 {
minBackoff = time.Duration(config.TCP.BackoffMinMs) * time.Millisecond
}
if config.TCP.BackoffMaxMs > 0 {
maxBackoff = time.Duration(config.TCP.BackoffMaxMs) * time.Millisecond
}
if config.TCP.BackoffMultiplier > 0 {
multiplier = config.TCP.BackoffMultiplier
}

clientConfig.ConnectionBackoffFunction = tcp.NewExponentialBackoff(minBackoff, multiplier, maxBackoff)
}

// Apply max retries (0 or negative means infinite)
clientConfig.MaxConnectionRetries = config.TCP.MaxRetries

go transp.HandleClient(clientConfig, fmt.Sprintf("%s:%d", adj.Info.Addresses[board.Name], adj.Info.Ports[TcpServer]))
i++
}

Expand Down
4 changes: 4 additions & 0 deletions backend/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ require (
github.com/pin/tftp/v3 v3.0.0
github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c
github.com/rs/zerolog v1.29.0
github.com/stretchr/testify v1.9.0
golang.org/x/exp v0.0.0-20230522175609-2e198f4a06a1
)

Expand All @@ -22,6 +23,7 @@ require (
github.com/ProtonMail/go-crypto v1.0.0 // indirect
github.com/cloudflare/circl v1.3.7 // indirect
github.com/cyphar/filepath-securejoin v0.2.4 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/emirpasic/gods v1.18.1 // indirect
github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376 // indirect
github.com/go-git/go-billy/v5 v5.5.0 // indirect
Expand All @@ -32,6 +34,7 @@ require (
github.com/mattn/go-isatty v0.0.17 // indirect
github.com/pjbgf/sha1cd v0.3.0 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3 // indirect
github.com/skeema/knownhosts v1.2.2 // indirect
github.com/xanzy/ssh-agent v0.3.3 // indirect
Expand All @@ -40,6 +43,7 @@ require (
golang.org/x/sys v0.31.0 // indirect
golang.org/x/tools v0.13.0 // indirect
gopkg.in/warnings.v0 v0.1.2 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)

require (
Expand Down
22 changes: 15 additions & 7 deletions backend/pkg/transport/network/tcp/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,29 +39,37 @@ func (client *Client) Dial() (net.Conn, error) {
var err error
var conn net.Conn
client.logger.Info().Msg("dialing")
// The max connection retries will not work because the for loop never completes, it always returns and the function is called again by transport.
// Reset retry counter when starting a new dial attempt
client.currentRetries = 0

for client.config.MaxConnectionRetries <= 0 || client.currentRetries < client.config.MaxConnectionRetries {
client.currentRetries++
conn, err = client.config.DialContext(client.config.Context, "tcp", client.address)

backoffDuration := client.config.ConnectionBackoffFunction(client.currentRetries)
client.logger.Error().Stack().Err(err).Dur("backoff", backoffDuration).Int("retries", client.currentRetries+1).Msg("retrying")
time.Sleep(backoffDuration)

if err == nil {
client.logger.Info().Msg("connected")
client.currentRetries = 0
return conn, nil
}

// Check if context was cancelled
if client.config.Context.Err() != nil {
client.logger.Error().Stack().Err(client.config.Context.Err()).Msg("canceled")
return nil, client.config.Context.Err()
}

// Check if we should retry this error
if netErr, ok := err.(net.Error); !client.config.TryReconnect || (!errors.Is(err, syscall.ECONNREFUSED) && (!ok || !netErr.Timeout())) {
client.logger.Error().Stack().Err(err).Msg("failed")
client.logger.Error().Stack().Err(err).Msg("failed with non-retryable error")
return nil, err
}

// Increment retry counter and calculate backoff
client.currentRetries++
backoffDuration := client.config.ConnectionBackoffFunction(client.currentRetries)
client.logger.Error().Stack().Err(err).Dur("backoff", backoffDuration).Int("retry", client.currentRetries).Msg("retrying after backoff")

// Sleep for backoff duration
time.Sleep(backoffDuration)
}

client.logger.Debug().Int("max", client.config.MaxConnectionRetries).Msg("max connection retries exceeded")
Expand Down
Loading
Loading