Skip to content

Commit ff7aa08

Browse files
committed
Use random port for federate servers
1 parent 947c9c8 commit ff7aa08

File tree

5 files changed

+37
-60
lines changed

5 files changed

+37
-60
lines changed

core/federated/RTI/rti_remote.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1007,7 +1007,7 @@ void handle_federate_resign(federate_info_t *my_fed) {
10071007
// an orderly shutdown.
10081008
// close(my_fed->socket); // from unistd.h
10091009

1010-
lf_print("Federate %d has resigned.", my_fed->enclave.id);
1010+
lf_print("RTI: Federate %d has resigned.", my_fed->enclave.id);
10111011

10121012
// Check downstream federates to see whether they should now be granted a TAG.
10131013
// To handle cycles, need to create a boolean array to keep

core/federated/federate.c

Lines changed: 21 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -116,22 +116,9 @@ federation_metadata_t federation_metadata = {
116116
};
117117

118118
void create_server(int specified_port, int id) {
119-
if (specified_port > UINT16_MAX ||
120-
specified_port < 0) {
121-
lf_print_error(
122-
"create_server(): The specified port (%d) is out of range."
123-
" Starting with %d instead.",
124-
specified_port,
125-
DEFAULT_PORT
126-
);
127-
specified_port = 0;
128-
}
119+
assert(specified_port <= UINT16_MAX && specified_port >= 0);
129120
uint16_t port = (uint16_t)specified_port;
130-
if (specified_port == 0) {
131-
// Use the default starting port + 1 + id (to try to get unique ports for each federate)
132-
port = DEFAULT_PORT + 1 + id;
133-
}
134-
lf_print("Creating a socket server on port %d.", port);
121+
LF_PRINT_LOG("Creating a socket server on port %d.", port);
135122
// Create an IPv4 socket for TCP (not UDP) communication over IP (0).
136123
int socket_descriptor = create_real_time_tcp_socket_errexit();
137124

@@ -149,40 +136,37 @@ void create_server(int specified_port, int id) {
149136
socket_descriptor,
150137
(struct sockaddr *) &server_fd,
151138
sizeof(server_fd));
152-
// If the binding fails with this port and no particular port was specified
153-
// in the LF program, then try the next few ports in sequence.
154139
int count = 0;
155-
while (result != 0 && count++ < PORT_BIND_RETRY_LIMIT) {
156-
if (specified_port != 0) {
157-
port++;
158-
server_fd.sin_port = htons(port);
159-
LF_PRINT_DEBUG("Failed to get port. Trying %d.", port);
160-
} else {
161-
LF_PRINT_DEBUG("Failed to get port %d. Will try again after waiting.", port);
162-
lf_sleep(PORT_BIND_RETRY_INTERVAL);
163-
}
164-
server_fd.sin_port = htons(port);
140+
while (result < 0 && count++ < PORT_BIND_RETRY_LIMIT) {
141+
lf_sleep(PORT_BIND_RETRY_INTERVAL);
165142
result = bind(
166143
socket_descriptor,
167144
(struct sockaddr *) &server_fd,
168145
sizeof(server_fd));
169146
}
170147
if (result < 0) {
171-
if (specified_port == 0) {
172-
lf_print_error_and_exit("Failed to bind socket. Cannot find a usable port.");
173-
} else {
174-
lf_print_error_and_exit("Failed to bind socket. Specified port is not available.");
148+
lf_print_error_and_exit("Failed to bind socket on port %d.", port);
149+
}
150+
151+
// Set the global server port.
152+
if (specified_port == 0) {
153+
// Need to retrieve the port number assigned by the OS.
154+
struct sockaddr_in assigned;
155+
socklen_t addr_len = sizeof(assigned);
156+
if (getsockname(socket_descriptor, (struct sockaddr *) &assigned, &addr_len) < 0) {
157+
lf_print_error_and_exit("Failed to retrieve assigned port number.");
175158
}
159+
_fed.server_port = ntohs(assigned.sin_port);
160+
} else {
161+
_fed.server_port = port;
176162
}
177-
LF_PRINT_LOG("Server for communicating with other federates started using port %d.", port);
178163

179164
// Enable listening for socket connections.
180165
// The second argument is the maximum number of queued socket requests,
181166
// which according to the Mac man page is limited to 128.
182167
listen(socket_descriptor, 128);
183168

184-
// Set the global server port
185-
_fed.server_port = port;
169+
lf_print("Server for communicating with other federates started using port %d.", _fed.server_port);
186170

187171
// Send the server port number to the RTI
188172
// on an MSG_TYPE_ADDRESS_ADVERTISEMENT message (@see net_common.h).
@@ -2288,6 +2272,7 @@ void terminate_execution(environment_t* env) {
22882272
// function should NEVER be called while holding any mutex lock.
22892273
lf_mutex_lock(&outbound_socket_mutex);
22902274
for (int i=0; i < NUMBER_OF_FEDERATES; i++) {
2275+
22912276
// Close outbound connections, in case they have not closed themselves.
22922277
// This will result in EOF being sent to the remote federate, I think.
22932278
_lf_close_outbound_socket(i);
@@ -2319,10 +2304,11 @@ void terminate_execution(environment_t* env) {
23192304

23202305
LF_PRINT_DEBUG("Waiting for inbound p2p socket listener threads.");
23212306
// Wait for each inbound socket listener thread to close.
2322-
if (_fed.number_of_inbound_p2p_connections > 0) {
2307+
if (_fed.number_of_inbound_p2p_connections > 0 && _fed.inbound_socket_listeners != NULL) {
23232308
LF_PRINT_LOG("Waiting for %zu threads listening for incoming messages to exit.",
23242309
_fed.number_of_inbound_p2p_connections);
23252310
for (int i=0; i < _fed.number_of_inbound_p2p_connections; i++) {
2311+
if (_fed.inbound_socket_listeners[i] == NULL) continue;
23262312
// Ignoring errors here.
23272313
lf_thread_join(_fed.inbound_socket_listeners[i], NULL);
23282314
}

core/reactor_common.c

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,11 @@ void _lf_trigger_reaction(environment_t* env, reaction_t* reaction, int worker_n
265265
* counts between time steps and at the end of execution.
266266
*/
267267
void _lf_start_time_step(environment_t *env) {
268+
if (_lf_execution_started == false) {
269+
// Execution hasn't started, so this is probably being invoked in termination
270+
// due to an error.
271+
return;
272+
}
268273
assert(env != GLOBAL_ENVIRONMENT);
269274
LF_PRINT_LOG("--------- Start time step at tag " PRINTF_TAG ".", env->current_tag.time - start_time, env->current_tag.microstep);
270275
// Handle dynamically created tokens for mutable inputs.
@@ -1726,14 +1731,13 @@ void termination(void) {
17261731
// It should only be called for the top-level environment, which, after convention, is the first environment.
17271732
terminate_execution(env);
17281733

1729-
17301734
// In order to free tokens, we perform the same actions we would have for a new time step.
17311735
for (int i = 0; i<num_envs; i++) {
1732-
lf_print("---- Terminating environment %u", env->id);
1733-
if (!env->initialized) {
1736+
if (env == NULL || !env->initialized) {
17341737
lf_print_warning("---- Environment %u was never initialized", env->id);
17351738
continue;
17361739
}
1740+
lf_print("---- Terminating environment %u", env->id);
17371741
// Stop any tracing, if it is running.
17381742
stop_trace_locked(env->trace);
17391743

@@ -1743,7 +1747,6 @@ void termination(void) {
17431747
// Free events and tokens suspended by modal reactors.
17441748
_lf_terminate_modal_reactors(env);
17451749
#endif
1746-
17471750
// If the event queue still has events on it, report that.
17481751
if (env->event_q != NULL && pqueue_size(env->event_q) > 0) {
17491752
lf_print_warning("---- There are %zu unprocessed future events on the event queue.", pqueue_size(env->event_q));

include/core/federated/federate.h

Lines changed: 6 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -305,22 +305,14 @@ void* listen_to_federates(void*);
305305
* (@see net_common.h). This function expects no response
306306
* from the RTI.
307307
*
308-
* If a port is specified by the user, that will be used
309-
* as the only possibility for the server. This function
310-
* will fail if that port is not available. If a port is not
311-
* specified, the DEFAULT_PORT + 1 + id will be attempted,
312-
* The function will keep incrementing the port in this case
313-
* until the number of tries reaches PORT_BIND_RETRY_LIMIT (@see net_common.h).
308+
* If a port is specified by the user, that will be used.
309+
* Otherwise, a random port will be assigned. If the bind fails,
310+
* it will retry after PORT_BIND_RETRY_INTERVAL until it has tried
311+
* PORT_BIND_RETRY_LIMIT times. Then it will fail.
314312
*
315-
* @note This function is similar to create_server(...) in rti.c.
316-
* However, it contains specific log messages for the peer to
317-
* peer connections between federates. It also additionally
318-
* sends an address advertisement (MSG_TYPE_ADDRESS_ADVERTISEMENT) message to the
319-
* RTI informing it of the port. Finally, it tries multiple ports
320-
* because the port used by a federate is not as important as the port used
321-
* by the RTI.
313+
* @note This function is different from create_server(...) in rti.c.
322314
*
323-
* @param specified_port The specified port by the user.
315+
* @param specified_port The specified port by the user or 0 to use a random port.
324316
* @param id The id of the federate (to help find a unique port).
325317
*/
326318
void create_server(int specified_port, int id);

include/core/federated/network/net_common.h

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -243,14 +243,10 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
243243
#define PORT_BIND_RETRY_LIMIT 100
244244

245245
/**
246-
* Default port number for the RTI and base number for federates' socket server.
246+
* Default port number for the RTI.
247247
* Unless a specific port has been specified by the LF program in the "at"
248248
* for the RTI or on the command line, when the RTI starts up, it will attempt
249-
* to open a socket server on this port. The federates start attempting to bind
250-
* to ports one greater than this plus the federate ID, and if that port is not,
251-
* then they try incrementing the port number with up to PORT_BIND_RETRY_LIMIT
252-
* attempts. The RTI will try repeatedly on the same port number with a delay
253-
* of PORT_BIND_RETRY_INTERVAL and a maximum number of attempts of PORT_BIND_RETRY_LIMIT.
249+
* to open a socket server on this port.
254250
*/
255251
#define DEFAULT_PORT 15045u
256252

0 commit comments

Comments
 (0)