From 062c6c37f75bf98e7d5c8e2e1fb425cad22899ae Mon Sep 17 00:00:00 2001 From: Yahya Hassanzadeh Date: Mon, 16 Oct 2023 13:03:16 -0700 Subject: [PATCH 01/29] wip sender scaling test --- network/p2p/p2pbuilder/libp2pNodeBuilder.go | 6 +- network/p2p/p2pbuilder/libp2pscaler.go | 4 +- network/p2p/p2pbuilder/libp2pscaler_test.go | 36 ++-- network/p2p/p2pbuilder/utils.go | 6 +- network/p2p/p2pnode/libp2pNode_test.go | 75 ++------ network/p2p/p2pnode/resourceManager_test.go | 186 ++++++++++++++++++++ network/p2p/test/fixtures.go | 8 + 7 files changed, 232 insertions(+), 89 deletions(-) create mode 100644 network/p2p/p2pnode/resourceManager_test.go diff --git a/network/p2p/p2pbuilder/libp2pNodeBuilder.go b/network/p2p/p2pbuilder/libp2pNodeBuilder.go index 5a593c2255e..5f155c029e3 100644 --- a/network/p2p/p2pbuilder/libp2pNodeBuilder.go +++ b/network/p2p/p2pbuilder/libp2pNodeBuilder.go @@ -219,11 +219,11 @@ func (builder *LibP2PNodeBuilder) Build() (p2p.LibP2PNode, error) { limits := rcmgr.DefaultLimits libp2p.SetDefaultServiceLimits(&limits) - mem, err := allowedMemory(builder.resourceManagerCfg.MemoryLimitRatio) + mem, err := AllowedMemory(builder.resourceManagerCfg.MemoryLimitRatio) if err != nil { return nil, fmt.Errorf("could not get allowed memory: %w", err) } - fd, err := allowedFileDescriptors(builder.resourceManagerCfg.FileDescriptorsRatio) + fd, err := AllowedFileDescriptors(builder.resourceManagerCfg.FileDescriptorsRatio) if err != nil { return nil, fmt.Errorf("could not get allowed file descriptors: %w", err) } @@ -238,7 +238,7 @@ func (builder *LibP2PNodeBuilder) Build() (p2p.LibP2PNode, error) { Int64("allowed_memory", mem). Int("allowed_file_descriptors", fd). Msg("allowed memory and file descriptors are fetched from the system") - newLimitConfigLogger(builder.logger).logResourceManagerLimits(l) + NewLimitConfigLogger(builder.logger).LogResourceManagerLimits(l) opts = append(opts, libp2p.ResourceManager(mgr)) builder.logger.Info().Msg("libp2p resource manager is set to default with metrics") diff --git a/network/p2p/p2pbuilder/libp2pscaler.go b/network/p2p/p2pbuilder/libp2pscaler.go index 8612a53d34d..d6bdaf2ef5a 100644 --- a/network/p2p/p2pbuilder/libp2pscaler.go +++ b/network/p2p/p2pbuilder/libp2pscaler.go @@ -16,14 +16,14 @@ func getNumFDs() (int, error) { return int(l.Cur), nil } -func allowedMemory(scaleFactor float64) (int64, error) { +func AllowedMemory(scaleFactor float64) (int64, error) { if scaleFactor <= 0 || scaleFactor > 1 { return 0, fmt.Errorf("memory scale factor must be greater than 0 and less than or equal to 1: %f", scaleFactor) } return int64(math.Floor(float64(memory.TotalMemory()) * scaleFactor)), nil } -func allowedFileDescriptors(scaleFactor float64) (int, error) { +func AllowedFileDescriptors(scaleFactor float64) (int, error) { if scaleFactor <= 0 || scaleFactor > 1 { return 0, fmt.Errorf("fd scale factor must be greater than 0 and less than or equal to 1: %f", scaleFactor) } diff --git a/network/p2p/p2pbuilder/libp2pscaler_test.go b/network/p2p/p2pbuilder/libp2pscaler_test.go index 789554866d0..094f8a4e700 100644 --- a/network/p2p/p2pbuilder/libp2pscaler_test.go +++ b/network/p2p/p2pbuilder/libp2pscaler_test.go @@ -12,44 +12,44 @@ func TestAllowedMemoryScale(t *testing.T) { require.True(t, m > 0) // scaling with factor of 1 should return the total memory. - s, err := allowedMemory(1) + s, err := AllowedMemory(1) require.NoError(t, err) require.Equal(t, int64(m), s) // scaling with factor of 0 should return an error. - _, err = allowedMemory(0) + _, err = AllowedMemory(0) require.Error(t, err) // scaling with factor of -1 should return an error. - _, err = allowedMemory(-1) + _, err = AllowedMemory(-1) require.Error(t, err) // scaling with factor of 2 should return an error. - _, err = allowedMemory(2) + _, err = AllowedMemory(2) require.Error(t, err) // scaling with factor of 0.5 should return half the total memory. - s, err = allowedMemory(0.5) + s, err = AllowedMemory(0.5) require.NoError(t, err) require.Equal(t, int64(m/2), s) // scaling with factor of 0.1 should return 10% of the total memory. - s, err = allowedMemory(0.1) + s, err = AllowedMemory(0.1) require.NoError(t, err) require.Equal(t, int64(m/10), s) // scaling with factor of 0.01 should return 1% of the total memory. - s, err = allowedMemory(0.01) + s, err = AllowedMemory(0.01) require.NoError(t, err) require.Equal(t, int64(m/100), s) // scaling with factor of 0.001 should return 0.1% of the total memory. - s, err = allowedMemory(0.001) + s, err = AllowedMemory(0.001) require.NoError(t, err) require.Equal(t, int64(m/1000), s) // scaling with factor of 0.0001 should return 0.01% of the total memory. - s, err = allowedMemory(0.0001) + s, err = AllowedMemory(0.0001) require.NoError(t, err) require.Equal(t, int64(m/10000), s) } @@ -61,44 +61,44 @@ func TestAllowedFileDescriptorsScale(t *testing.T) { require.True(t, fd > 0) // scaling with factor of 1 should return the total file descriptors. - s, err := allowedFileDescriptors(1) + s, err := AllowedFileDescriptors(1) require.NoError(t, err) require.Equal(t, fd, s) // scaling with factor of 0 should return an error. - _, err = allowedFileDescriptors(0) + _, err = AllowedFileDescriptors(0) require.Error(t, err) // scaling with factor of -1 should return an error. - _, err = allowedFileDescriptors(-1) + _, err = AllowedFileDescriptors(-1) require.Error(t, err) // scaling with factor of 2 should return an error. - _, err = allowedFileDescriptors(2) + _, err = AllowedFileDescriptors(2) require.Error(t, err) // scaling with factor of 0.5 should return half the total file descriptors. - s, err = allowedFileDescriptors(0.5) + s, err = AllowedFileDescriptors(0.5) require.NoError(t, err) require.Equal(t, fd/2, s) // scaling with factor of 0.1 should return 10% of the total file descriptors. - s, err = allowedFileDescriptors(0.1) + s, err = AllowedFileDescriptors(0.1) require.NoError(t, err) require.Equal(t, fd/10, s) // scaling with factor of 0.01 should return 1% of the total file descriptors. - s, err = allowedFileDescriptors(0.01) + s, err = AllowedFileDescriptors(0.01) require.NoError(t, err) require.Equal(t, fd/100, s) // scaling with factor of 0.001 should return 0.1% of the total file descriptors. - s, err = allowedFileDescriptors(0.001) + s, err = AllowedFileDescriptors(0.001) require.NoError(t, err) require.Equal(t, fd/1000, s) // scaling with factor of 0.0001 should return 0.01% of the total file descriptors. - s, err = allowedFileDescriptors(0.0001) + s, err = AllowedFileDescriptors(0.0001) require.NoError(t, err) require.Equal(t, fd/10000, s) } diff --git a/network/p2p/p2pbuilder/utils.go b/network/p2p/p2pbuilder/utils.go index ef2a2bc1ae9..2e05bbd5b84 100644 --- a/network/p2p/p2pbuilder/utils.go +++ b/network/p2p/p2pbuilder/utils.go @@ -32,8 +32,8 @@ type limitConfigLogger struct { logger zerolog.Logger } -// newLimitConfigLogger creates a new limitConfigLogger. -func newLimitConfigLogger(logger zerolog.Logger) *limitConfigLogger { +// NewLimitConfigLogger creates a new limitConfigLogger. +func NewLimitConfigLogger(logger zerolog.Logger) *limitConfigLogger { return &limitConfigLogger{logger: logger} } @@ -51,7 +51,7 @@ func (l *limitConfigLogger) withBaseLimit(prefix string, baseLimit rcmgr.Resourc Str(fmt.Sprintf("%s_memory", prefix), fmt.Sprintf("%v", baseLimit.Memory)).Logger() } -func (l *limitConfigLogger) logResourceManagerLimits(config rcmgr.ConcreteLimitConfig) { +func (l *limitConfigLogger) LogResourceManagerLimits(config rcmgr.ConcreteLimitConfig) { // PartialLimit config is the same as ConcreteLimit config, but with the exported fields. pCfg := config.ToPartialLimitConfig() l.logGlobalResourceLimits(pCfg) diff --git a/network/p2p/p2pnode/libp2pNode_test.go b/network/p2p/p2pnode/libp2pNode_test.go index ad42ec17108..904e5ff0aeb 100644 --- a/network/p2p/p2pnode/libp2pNode_test.go +++ b/network/p2p/p2pnode/libp2pNode_test.go @@ -28,7 +28,6 @@ import ( "github.com/onflow/flow-go/network/p2p/p2plogging" "github.com/onflow/flow-go/network/p2p/p2pnode" p2ptest "github.com/onflow/flow-go/network/p2p/test" - "github.com/onflow/flow-go/network/p2p/unicast/protocols" "github.com/onflow/flow-go/network/p2p/utils" validator "github.com/onflow/flow-go/network/validator/pubsub" "github.com/onflow/flow-go/utils/unittest" @@ -298,7 +297,10 @@ func TestCreateStream_SinglePairwiseConnection(t *testing.T) { go createConcurrentStreams(t, ctxWithTimeout, nodes, ids, numOfStreamsPerNode, streamChan, done) unittest.RequireCloseBefore(t, done, 5*time.Second, "could not create streamChan on time") - require.Len(t, streamChan, expectedTotalNumOfStreams, fmt.Sprintf("expected %d total number of streamChan created got %d", expectedTotalNumOfStreams, len(streamChan))) + require.Len(t, + streamChan, + expectedTotalNumOfStreams, + fmt.Sprintf("expected %d total number of streamChan created got %d", expectedTotalNumOfStreams, len(streamChan))) // ensure only a single connection exists between all nodes ensureSinglePairwiseConnection(t, nodes) @@ -401,67 +403,14 @@ func TestCreateStream_SinglePeerDial(t *testing.T) { expectedNumOfDialRetries := int64(p2pnode.MaxConnectAttempt) // we expect the second routine to retry creating a stream p2pnode.MaxConnectAttempt when dialing is in progress expectedCreateStreamRetries := int64(p2pnode.MaxConnectAttempt) - require.Equal(t, expectedNumOfDialRetries, dialPeerRetries.Load(), fmt.Sprintf("expected %d dial peer retries got %d", expectedNumOfDialRetries, dialPeerRetries.Load())) - require.Equal(t, expectedCreateStreamRetries, createStreamRetries.Load(), fmt.Sprintf("expected %d dial peer retries got %d", expectedCreateStreamRetries, createStreamRetries.Load())) -} - -// TestCreateStream_InboundConnResourceLimit ensures that the setting the resource limit config for -// PeerDefaultLimits.ConnsInbound restricts the number of inbound connections created from a peer to the configured value. -// NOTE: If this test becomes flaky, it indicates a violation of the single inbound connection guarantee. -// In such cases the test should not be quarantined but requires immediate resolution. -func TestCreateStream_InboundConnResourceLimit(t *testing.T) { - idProvider := mockmodule.NewIdentityProvider(t) - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - signalerCtx := irrecoverable.NewMockSignalerContext(t, ctx) - - sporkID := unittest.IdentifierFixture() - - sender, id1 := p2ptest.NodeFixture( - t, - sporkID, - t.Name(), - idProvider, - p2ptest.WithDefaultResourceManager(), - p2ptest.WithCreateStreamRetryDelay(10*time.Millisecond)) - - receiver, id2 := p2ptest.NodeFixture( - t, - sporkID, - t.Name(), - idProvider, - p2ptest.WithDefaultResourceManager(), - p2ptest.WithCreateStreamRetryDelay(10*time.Millisecond)) - - idProvider.On("ByPeerID", sender.ID()).Return(&id1, true).Maybe() - idProvider.On("ByPeerID", receiver.ID()).Return(&id2, true).Maybe() - - p2ptest.StartNodes(t, signalerCtx, []p2p.LibP2PNode{sender, receiver}) - defer p2ptest.StopNodes(t, []p2p.LibP2PNode{sender, receiver}, cancel) - - p2ptest.LetNodesDiscoverEachOther(t, signalerCtx, []p2p.LibP2PNode{sender, receiver}, flow.IdentityList{&id1, &id2}) - - var allStreamsCreated sync.WaitGroup - // at this point both nodes have discovered each other and we can now create an - // arbitrary number of streams from sender -> receiver. This will force libp2p - // to create multiple streams concurrently and attempt to reuse the single pairwise - // connection. If more than one connection is established while creating the conccurent - // streams this indicates a bug in the libp2p PeerBaseLimitConnsInbound limit. - defaultProtocolID := protocols.FlowProtocolID(sporkID) - expectedNumOfStreams := int64(50) - for i := int64(0); i < expectedNumOfStreams; i++ { - allStreamsCreated.Add(1) - go func() { - defer allStreamsCreated.Done() - _, err := sender.Host().NewStream(ctx, receiver.ID(), defaultProtocolID) - require.NoError(t, err) - }() - } - - unittest.RequireReturnsBefore(t, allStreamsCreated.Wait, 2*time.Second, "could not create streams on time") - require.Len(t, receiver.Host().Network().ConnsToPeer(sender.ID()), 1) - actualNumOfStreams := p2putils.CountStream(sender.Host(), receiver.ID(), defaultProtocolID, network.DirOutbound) - require.Equal(t, expectedNumOfStreams, int64(actualNumOfStreams), fmt.Sprintf("expected to create %d number of streams got %d", expectedNumOfStreams, actualNumOfStreams)) + require.Equal(t, + expectedNumOfDialRetries, + dialPeerRetries.Load(), + fmt.Sprintf("expected %d dial peer retries got %d", expectedNumOfDialRetries, dialPeerRetries.Load())) + require.Equal(t, + expectedCreateStreamRetries, + createStreamRetries.Load(), + fmt.Sprintf("expected %d dial peer retries got %d", expectedCreateStreamRetries, createStreamRetries.Load())) } // createStreams will attempt to create n number of streams concurrently between each combination of node pairs. diff --git a/network/p2p/p2pnode/resourceManager_test.go b/network/p2p/p2pnode/resourceManager_test.go new file mode 100644 index 00000000000..32070aca932 --- /dev/null +++ b/network/p2p/p2pnode/resourceManager_test.go @@ -0,0 +1,186 @@ +package p2pnode_test + +import ( + "context" + "fmt" + "sync" + "sync/atomic" + "testing" + "time" + + "github.com/libp2p/go-libp2p" + "github.com/libp2p/go-libp2p/core/network" + rcmgr "github.com/libp2p/go-libp2p/p2p/host/resource-manager" + "github.com/stretchr/testify/require" + + "github.com/onflow/flow-go/model/flow" + "github.com/onflow/flow-go/module/irrecoverable" + mockmodule "github.com/onflow/flow-go/module/mock" + "github.com/onflow/flow-go/network/internal/p2putils" + "github.com/onflow/flow-go/network/p2p" + p2ptest "github.com/onflow/flow-go/network/p2p/test" + "github.com/onflow/flow-go/network/p2p/unicast/protocols" + "github.com/onflow/flow-go/utils/unittest" +) + +// TestCreateStream_InboundConnResourceLimit ensures that the setting the resource limit config for +// PeerDefaultLimits.ConnsInbound restricts the number of inbound connections created from a peer to the configured value. +// NOTE: If this test becomes flaky, it indicates a violation of the single inbound connection guarantee. +// In such cases the test should not be quarantined but requires immediate resolution. +func TestCreateStream_InboundConnResourceLimit(t *testing.T) { + idProvider := mockmodule.NewIdentityProvider(t) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + signalerCtx := irrecoverable.NewMockSignalerContext(t, ctx) + + sporkID := unittest.IdentifierFixture() + + sender, id1 := p2ptest.NodeFixture( + t, + sporkID, + t.Name(), + idProvider, + p2ptest.WithDefaultResourceManager(), + p2ptest.WithCreateStreamRetryDelay(10*time.Millisecond)) + + receiver, id2 := p2ptest.NodeFixture( + t, + sporkID, + t.Name(), + idProvider, + p2ptest.WithDefaultResourceManager(), + p2ptest.WithCreateStreamRetryDelay(10*time.Millisecond)) + + idProvider.On("ByPeerID", sender.ID()).Return(&id1, true).Maybe() + idProvider.On("ByPeerID", receiver.ID()).Return(&id2, true).Maybe() + + p2ptest.StartNodes(t, signalerCtx, []p2p.LibP2PNode{sender, receiver}) + defer p2ptest.StopNodes(t, []p2p.LibP2PNode{sender, receiver}, cancel) + + p2ptest.LetNodesDiscoverEachOther(t, signalerCtx, []p2p.LibP2PNode{sender, receiver}, flow.IdentityList{&id1, &id2}) + + var allStreamsCreated sync.WaitGroup + // at this point both nodes have discovered each other and we can now create an + // arbitrary number of streams from sender -> receiver. This will force libp2p + // to create multiple streams concurrently and attempt to reuse the single pairwise + // connection. If more than one connection is established while creating the conccurent + // streams this indicates a bug in the libp2p PeerBaseLimitConnsInbound limit. + defaultProtocolID := protocols.FlowProtocolID(sporkID) + expectedNumOfStreams := int64(50) + for i := int64(0); i < expectedNumOfStreams; i++ { + allStreamsCreated.Add(1) + go func() { + defer allStreamsCreated.Done() + require.NoError(t, sender.Host().Connect(ctx, receiver.Host().Peerstore().PeerInfo(receiver.ID()))) + _, err := sender.Host().NewStream(ctx, receiver.ID(), defaultProtocolID) + require.NoError(t, err) + }() + } + + unittest.RequireReturnsBefore(t, allStreamsCreated.Wait, 2*time.Second, "could not create streams on time") + require.Len(t, receiver.Host().Network().ConnsToPeer(sender.ID()), 1) + actualNumOfStreams := p2putils.CountStream(sender.Host(), receiver.ID(), defaultProtocolID, network.DirOutbound) + require.Equal(t, + expectedNumOfStreams, + int64(actualNumOfStreams), + fmt.Sprintf("expected to create %d number of streams got %d", expectedNumOfStreams, actualNumOfStreams)) +} + +// TestCreateStream_InboundConnResourceLimit ensures that the setting the resource limit config for +// PeerDefaultLimits.ConnsInbound restricts the number of inbound connections created from a peer to the configured value. +// NOTE: If this test becomes flaky, it indicates a violation of the single inbound connection guarantee. +// In such cases the test should not be quarantined but requires immediate resolution. +func TestCreateStream_PeerResourceLimit_NoScale(t *testing.T) { + idProvider := mockmodule.NewIdentityProvider(t) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + signalerCtx := irrecoverable.NewMockSignalerContext(t, ctx) + + sporkID := unittest.IdentifierFixture() + + // cfg, err := flowconfig.DefaultConfig() + // require.NoError(t, err) + + // p2pbuilder.NewLimitConfigLogger(unittest.Logger()).LogResourceManagerLimits(l) + + // mem, err := p2pbuilder.AllowedMemory(cfg.NetworkConfig.MemoryLimitRatio) + // require.NoError(t, err) + // + // fd, err := p2pbuilder.AllowedFileDescriptors(cfg.NetworkConfig.FileDescriptorsRatio) + // require.NoError(t, err) + // limits.SystemBaseLimit.StreamsInbound = 1 + // limits.SystemLimitIncrease.StreamsInbound = 1 + // limits.ProtocolBaseLimit.StreamsOutbound = 10_0000 + resourceManagerSnd, err := rcmgr.NewResourceManager(rcmgr.NewFixedLimiter(rcmgr.InfiniteLimits)) + require.NoError(t, err) + sender, id1 := p2ptest.NodeFixture( + t, + sporkID, + t.Name(), + idProvider, + p2ptest.WithResourceManager(resourceManagerSnd), + p2ptest.WithCreateStreamRetryDelay(10*time.Millisecond)) + + limits := rcmgr.DefaultLimits + libp2p.SetDefaultServiceLimits(&limits) + // mem, err := p2pbuilder.AllowedMemory(cfg.NetworkConfig.MemoryLimitRatio) + // require.NoError(t, err) + // + // fd, err := p2pbuilder.AllowedFileDescriptors(cfg.NetworkConfig.FileDescriptorsRatio) + // require.NoError(t, err) + // limits.SystemBaseLimit.StreamsInbound = 1 + // limits.SystemLimitIncrease.StreamsInbound = 1 + // limits.ProtocolBaseLimit.StreamsOutbound = 10_0000 + l := limits.Scale(0, 0) + resourceManagerRcv, err := rcmgr.NewResourceManager(rcmgr.NewFixedLimiter(l)) + require.NoError(t, err) + receiver, id2 := p2ptest.NodeFixture( + t, + sporkID, + t.Name(), + idProvider, + p2ptest.WithResourceManager(resourceManagerRcv), + p2ptest.WithCreateStreamRetryDelay(10*time.Millisecond)) + + idProvider.On("ByPeerID", sender.ID()).Return(&id1, true).Maybe() + idProvider.On("ByPeerID", receiver.ID()).Return(&id2, true).Maybe() + + p2ptest.StartNodes(t, signalerCtx, []p2p.LibP2PNode{sender, receiver}) + defer p2ptest.StopNodes(t, []p2p.LibP2PNode{sender, receiver}, cancel) + + p2ptest.LetNodesDiscoverEachOther(t, signalerCtx, []p2p.LibP2PNode{sender, receiver}, flow.IdentityList{&id1, &id2}) + + var allStreamsCreated sync.WaitGroup + // at this point both nodes have discovered each other and we can now create an + // arbitrary number of streams from sender -> receiver. This will force libp2p + // to create multiple streams concurrently and attempt to reuse the single pairwise + // connection. If more than one connection is established while creating the conccurent + // streams this indicates a bug in the libp2p PeerBaseLimitConnsInbound limit. + defaultProtocolID := protocols.FlowProtocolID(sporkID) + maxAllowedStreams := l.ToPartialLimitConfig().ProtocolPeerDefault.StreamsInbound + t.Log("maxAllowedStreamsInbound", maxAllowedStreams) + t.Log("maxAllowedPeerStreamInbound", l.ToPartialLimitConfig().ProtocolPeerDefault.StreamsInbound) + t.Log("maxAllowedStreamsOutboundPeer (sender)", rcmgr.InfiniteLimits.ToPartialLimitConfig().ProtocolPeerDefault.StreamsOutbound) + t.Log("maxAllowedStreamSystemInbound", l.ToPartialLimitConfig().System.StreamsInbound) + surplus := int64(l.ToPartialLimitConfig().System.StreamsInbound) + errorCount := int64(0) + for i := int64(0); i < int64(maxAllowedStreams)+surplus; i++ { + allStreamsCreated.Add(1) + go func() { + defer allStreamsCreated.Done() + _, err := sender.Host().NewStream(ctx, receiver.ID(), defaultProtocolID) + if err != nil { + atomic.AddInt64(&errorCount, 1) // count the number of errors + } + }() + } + + unittest.RequireReturnsBefore(t, allStreamsCreated.Wait, 2*time.Second, "could not create streams on time") + require.Len(t, receiver.Host().Network().ConnsToPeer(sender.ID()), 1) + actualNumOfStreams := p2putils.CountStream(sender.Host(), receiver.ID(), defaultProtocolID, network.DirOutbound) + require.Equal(t, + int64(maxAllowedStreams), + int64(actualNumOfStreams), + fmt.Sprintf("expected to create %d number of streams got %d", maxAllowedStreams, actualNumOfStreams)) + require.Equalf(t, int64(surplus), atomic.LoadInt64(&errorCount), "expected to get %d errors got %d", surplus, atomic.LoadInt64(&errorCount)) +} diff --git a/network/p2p/test/fixtures.go b/network/p2p/test/fixtures.go index 656e5d6f0cb..bcdca5e3264 100644 --- a/network/p2p/test/fixtures.go +++ b/network/p2p/test/fixtures.go @@ -398,6 +398,14 @@ func WithDefaultResourceManager() NodeFixtureParameterOption { } } +// WithResourceManager sets the resource manager to the provided resource manager. +// Otherwise, it uses the resource manager provided by the test (the infinite resource manager). +func WithResourceManager(resourceManager network.ResourceManager) NodeFixtureParameterOption { + return func(p *NodeFixtureParameters) { + p.ResourceManager = resourceManager + } +} + func WithUnicastHandlerFunc(handler network.StreamHandler) NodeFixtureParameterOption { return func(p *NodeFixtureParameters) { p.HandlerFunc = handler From efdf38f8feb2ac388666e82a9d1e32e9face336b Mon Sep 17 00:00:00 2001 From: Yahya Hassanzadeh Date: Tue, 17 Oct 2023 10:50:41 -0700 Subject: [PATCH 02/29] adds test for system-wide stream limit is not enforced --- network/internal/p2putils/utils.go | 2 +- network/p2p/p2pnode/resourceManager_test.go | 136 +++++++++++--------- 2 files changed, 74 insertions(+), 64 deletions(-) diff --git a/network/internal/p2putils/utils.go b/network/internal/p2putils/utils.go index 2325df8734a..292ce75f230 100644 --- a/network/internal/p2putils/utils.go +++ b/network/internal/p2putils/utils.go @@ -159,7 +159,7 @@ func IPPortFromMultiAddress(addrs ...multiaddr.Multiaddr) (string, string, error return "", "", err } - //there should only be one valid IPv4 address + // there should only be one valid IPv4 address return ipOrHostname, port, nil } return "", "", fmt.Errorf("ip address or hostname not found") diff --git a/network/p2p/p2pnode/resourceManager_test.go b/network/p2p/p2pnode/resourceManager_test.go index 32070aca932..a6adac64219 100644 --- a/network/p2p/p2pnode/resourceManager_test.go +++ b/network/p2p/p2pnode/resourceManager_test.go @@ -4,13 +4,13 @@ import ( "context" "fmt" "sync" - "sync/atomic" "testing" "time" "github.com/libp2p/go-libp2p" "github.com/libp2p/go-libp2p/core/network" rcmgr "github.com/libp2p/go-libp2p/p2p/host/resource-manager" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/onflow/flow-go/model/flow" @@ -86,11 +86,25 @@ func TestCreateStream_InboundConnResourceLimit(t *testing.T) { fmt.Sprintf("expected to create %d number of streams got %d", expectedNumOfStreams, actualNumOfStreams)) } -// TestCreateStream_InboundConnResourceLimit ensures that the setting the resource limit config for -// PeerDefaultLimits.ConnsInbound restricts the number of inbound connections created from a peer to the configured value. -// NOTE: If this test becomes flaky, it indicates a violation of the single inbound connection guarantee. -// In such cases the test should not be quarantined but requires immediate resolution. -func TestCreateStream_PeerResourceLimit_NoScale(t *testing.T) { +// TestCreateStream_SystemStreamLimit_NotEnforced is a re-production of a hypothetical bug where the system-wide inbound stream limit of libp2p resource management +// was not being enforced. The purpose of this test is to share with the libp2p community as well as to evaluate the existence of the bug on +// future libp2p versions. +// Test scenario works as follows: +// - We have 30 senders and 1 receiver. +// - The senders are running with a resource manager that allows infinite number of streams; so that they can create as many streams as they want. +// - The receiver is running with a resource manager with base limits and no scaling. +// - The test reads the peer protocol default limits for inbound streams at receiver; say x; which is the limit for the number of inbound streams from each sender on a +// specific protocol. +// - Each sender creates x-1 streams to the receiver on a specific protocol. This is done to ensure that the receiver has x-1 streams from each sender; a total of +// 30*(x-1) streams at the receiver. +// - Test first ensures that numerically 30 * (x - 1) > max system-wide inbound stream limit; i.e., the total number of streams created by all senders is greater than +// the system-wide limit. +// - Then each sender creates x - 1 streams concurrently to the receiver. +// - At the end of the test we ensure that the total number of streams created by all senders is greater than the system-wide limit; which should not be the case if the +// system-wide limit is being enforced. +func TestCreateStream_SystemStreamLimit_NotEnforced(t *testing.T) { + nodeCount := 30 + idProvider := mockmodule.NewIdentityProvider(t) ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -98,39 +112,16 @@ func TestCreateStream_PeerResourceLimit_NoScale(t *testing.T) { sporkID := unittest.IdentifierFixture() - // cfg, err := flowconfig.DefaultConfig() - // require.NoError(t, err) - - // p2pbuilder.NewLimitConfigLogger(unittest.Logger()).LogResourceManagerLimits(l) - - // mem, err := p2pbuilder.AllowedMemory(cfg.NetworkConfig.MemoryLimitRatio) - // require.NoError(t, err) - // - // fd, err := p2pbuilder.AllowedFileDescriptors(cfg.NetworkConfig.FileDescriptorsRatio) - // require.NoError(t, err) - // limits.SystemBaseLimit.StreamsInbound = 1 - // limits.SystemLimitIncrease.StreamsInbound = 1 - // limits.ProtocolBaseLimit.StreamsOutbound = 10_0000 resourceManagerSnd, err := rcmgr.NewResourceManager(rcmgr.NewFixedLimiter(rcmgr.InfiniteLimits)) require.NoError(t, err) - sender, id1 := p2ptest.NodeFixture( - t, - sporkID, - t.Name(), + senders, senderIds := p2ptest.NodesFixture(t, sporkID, t.Name(), nodeCount, idProvider, p2ptest.WithResourceManager(resourceManagerSnd), p2ptest.WithCreateStreamRetryDelay(10*time.Millisecond)) limits := rcmgr.DefaultLimits libp2p.SetDefaultServiceLimits(&limits) - // mem, err := p2pbuilder.AllowedMemory(cfg.NetworkConfig.MemoryLimitRatio) - // require.NoError(t, err) - // - // fd, err := p2pbuilder.AllowedFileDescriptors(cfg.NetworkConfig.FileDescriptorsRatio) - // require.NoError(t, err) - // limits.SystemBaseLimit.StreamsInbound = 1 - // limits.SystemLimitIncrease.StreamsInbound = 1 - // limits.ProtocolBaseLimit.StreamsOutbound = 10_0000 + l := limits.Scale(0, 0) resourceManagerRcv, err := rcmgr.NewResourceManager(rcmgr.NewFixedLimiter(l)) require.NoError(t, err) @@ -142,45 +133,64 @@ func TestCreateStream_PeerResourceLimit_NoScale(t *testing.T) { p2ptest.WithResourceManager(resourceManagerRcv), p2ptest.WithCreateStreamRetryDelay(10*time.Millisecond)) - idProvider.On("ByPeerID", sender.ID()).Return(&id1, true).Maybe() + for i, sender := range senders { + idProvider.On("ByPeerID", sender.ID()).Return(senderIds[i], true).Maybe() + } idProvider.On("ByPeerID", receiver.ID()).Return(&id2, true).Maybe() - p2ptest.StartNodes(t, signalerCtx, []p2p.LibP2PNode{sender, receiver}) - defer p2ptest.StopNodes(t, []p2p.LibP2PNode{sender, receiver}, cancel) + p2ptest.StartNodes(t, signalerCtx, append(senders, receiver)) + defer p2ptest.StopNodes(t, append(senders, receiver), cancel) - p2ptest.LetNodesDiscoverEachOther(t, signalerCtx, []p2p.LibP2PNode{sender, receiver}, flow.IdentityList{&id1, &id2}) + p2ptest.LetNodesDiscoverEachOther(t, signalerCtx, append(senders, receiver), append(senderIds, &id2)) var allStreamsCreated sync.WaitGroup - // at this point both nodes have discovered each other and we can now create an - // arbitrary number of streams from sender -> receiver. This will force libp2p - // to create multiple streams concurrently and attempt to reuse the single pairwise - // connection. If more than one connection is established while creating the conccurent - // streams this indicates a bug in the libp2p PeerBaseLimitConnsInbound limit. defaultProtocolID := protocols.FlowProtocolID(sporkID) - maxAllowedStreams := l.ToPartialLimitConfig().ProtocolPeerDefault.StreamsInbound - t.Log("maxAllowedStreamsInbound", maxAllowedStreams) - t.Log("maxAllowedPeerStreamInbound", l.ToPartialLimitConfig().ProtocolPeerDefault.StreamsInbound) - t.Log("maxAllowedStreamsOutboundPeer (sender)", rcmgr.InfiniteLimits.ToPartialLimitConfig().ProtocolPeerDefault.StreamsOutbound) - t.Log("maxAllowedStreamSystemInbound", l.ToPartialLimitConfig().System.StreamsInbound) - surplus := int64(l.ToPartialLimitConfig().System.StreamsInbound) - errorCount := int64(0) - for i := int64(0); i < int64(maxAllowedStreams)+surplus; i++ { - allStreamsCreated.Add(1) - go func() { - defer allStreamsCreated.Done() - _, err := sender.Host().NewStream(ctx, receiver.ID(), defaultProtocolID) - if err != nil { - atomic.AddInt64(&errorCount, 1) // count the number of errors - } - }() + maxInboundStreamPerPeer := l.ToPartialLimitConfig().ProtocolPeerDefault.StreamsInbound + maxSystemInboundStream := l.ToPartialLimitConfig().System.StreamsInbound + + t.Log("max allowed inbound stream from each sender to receiver (per protocol)", maxInboundStreamPerPeer) + t.Log("max allowed inbound stream across all peers and protocols at receiver (system-wide)", maxSystemInboundStream) + + // sanity check; if each peer creates maxInboundStreamPerPeer-1 streams, and we assume there the maxSystemInboundStream is not enforced; then to validate the hypothesis we need + // to ensure that (maxInboundStreamPerPeer - 1) * nodeCount > maxSystemInboundStream, i.e., if each peer creates maxInboundStreamPerPeer-1 streams, then the total number of streams + // end up being greater than the system-wide limit. + require.Greaterf(t, + int64(maxInboundStreamPerPeer-1)*int64(nodeCount), + int64(maxSystemInboundStream), + "(maxInboundStreamPerPeer - 1) * nodeCount should be greater than maxSystemInboundStream") + + for sIndex := range senders { + sender := senders[sIndex] + for i := int64(0); i < int64(maxInboundStreamPerPeer-1); i++ { + allStreamsCreated.Add(1) + go func() { + defer allStreamsCreated.Done() + _, err := sender.Host().NewStream(ctx, receiver.ID(), defaultProtocolID) + require.NoError(t, err, "error creating stream") + }() + } } unittest.RequireReturnsBefore(t, allStreamsCreated.Wait, 2*time.Second, "could not create streams on time") - require.Len(t, receiver.Host().Network().ConnsToPeer(sender.ID()), 1) - actualNumOfStreams := p2putils.CountStream(sender.Host(), receiver.ID(), defaultProtocolID, network.DirOutbound) - require.Equal(t, - int64(maxAllowedStreams), - int64(actualNumOfStreams), - fmt.Sprintf("expected to create %d number of streams got %d", maxAllowedStreams, actualNumOfStreams)) - require.Equalf(t, int64(surplus), atomic.LoadInt64(&errorCount), "expected to get %d errors got %d", surplus, atomic.LoadInt64(&errorCount)) + + totalStreams := 0 + for i, sender := range senders { + actualNumOfStreams := p2putils.CountStream(sender.Host(), receiver.ID(), defaultProtocolID, network.DirOutbound) + t.Logf("sender %d has %d streams", i, actualNumOfStreams) + assert.Equalf(t, + int64(maxInboundStreamPerPeer-1), + int64(actualNumOfStreams), + "expected to create %d number of streams got %d", + int64(maxInboundStreamPerPeer-1), + actualNumOfStreams) + totalStreams += actualNumOfStreams + } + + // when system-wide limit is not enforced, the total number of streams created by all senders should be greater than the system-wide limit. + require.Greaterf(t, + totalStreams, + l.ToPartialLimitConfig().Stream.StreamsInbound, + "expected to create more than %d number of streams got %d", + l.ToPartialLimitConfig().Stream.StreamsInbound, + totalStreams) } From 543bc2c4184369a7bc0e1c70063708087ca14391 Mon Sep 17 00:00:00 2001 From: Yahya Hassanzadeh Date: Wed, 18 Oct 2023 16:36:52 -0700 Subject: [PATCH 03/29] wip implementing backbone test --- network/internal/p2putils/utils.go | 97 +++-- network/p2p/p2pnode/libp2pStream_test.go | 20 +- network/p2p/p2pnode/resourceManager_test.go | 402 +++++++++++++++++++- 3 files changed, 483 insertions(+), 36 deletions(-) diff --git a/network/internal/p2putils/utils.go b/network/internal/p2putils/utils.go index 292ce75f230..f5bc4f01e4c 100644 --- a/network/internal/p2putils/utils.go +++ b/network/internal/p2putils/utils.go @@ -4,11 +4,11 @@ import ( "fmt" "net" - "github.com/libp2p/go-libp2p/core" "github.com/libp2p/go-libp2p/core/crypto" "github.com/libp2p/go-libp2p/core/host" "github.com/libp2p/go-libp2p/core/network" "github.com/libp2p/go-libp2p/core/peer" + "github.com/libp2p/go-libp2p/core/protocol" "github.com/multiformats/go-multiaddr" "github.com/rs/zerolog" @@ -69,46 +69,95 @@ func ConnectednessToString(connectedness network.Connectedness) (string, bool) { } -// FindOutboundStream finds an existing outbound stream to the target id if it exists by querying libp2p -func FindOutboundStream(host host.Host, targetID peer.ID, protocol core.ProtocolID) (network.Stream, bool) { - streams := FilterStream(host, targetID, protocol, network.DirOutbound, false) - if len(streams) > 0 { - return streams[0], true +// CountStream finds total number of outbound stream to the target id +func CountStream(host host.Host, targetID peer.ID, opts ...FilterOption) int { + streams := FilterStream(host, targetID, append(opts, All())...) + return len(streams) +} + +// FilterOptions holds the filtering options used in FilterStream. +type FilterOptions struct { + // dir specifies the direction of the streams to be filtered. + // The default value is network.DirBoth, which considers both inbound and outbound streams. + dir network.Direction + + // protocol specifies the protocol ID of the streams to be filtered. + // The default value is an empty string, which considers streams of all protocol IDs. + protocol protocol.ID + + // all specifies whether to return all matching streams or just the first matching stream. + // The default value is false, which returns just the first matching stream. + all bool +} + +// FilterOption defines a function type that modifies FilterOptions. +type FilterOption func(*FilterOptions) + +// Direction is a FilterOption for setting the direction of the streams to be filtered. +func Direction(dir network.Direction) FilterOption { + return func(opts *FilterOptions) { + opts.dir = dir } - return nil, false } -// CountStream finds total number of outbound stream to the target id -func CountStream(host host.Host, targetID peer.ID, protocol core.ProtocolID, dir network.Direction) int { - streams := FilterStream(host, targetID, protocol, dir, true) - return len(streams) +// Protocol is a FilterOption for setting the protocol ID of the streams to be filtered. +func Protocol(protocol protocol.ID) FilterOption { + return func(opts *FilterOptions) { + opts.protocol = protocol + } } -// FilterStream finds one or all existing outbound streams to the target id if it exists. -// if parameter all is true - all streams are found else the first stream found is returned -func FilterStream(host host.Host, targetID peer.ID, protocol core.ProtocolID, dir network.Direction, all bool) []network.Stream { +// All is a FilterOption for setting whether to return all matching streams or just the first matching stream. +func All() FilterOption { + return func(opts *FilterOptions) { + opts.all = true + } +} +// FilterStream filters the streams to a target peer based on the provided options. +// The default behavior is to consider all directions and protocols, and return just the first matching stream. +// This behavior can be customized by providing FilterOption values. +// +// Usage: +// +// - To find all outbound streams to a target peer with a specific protocol ID: +// streams := FilterStream(host, targetID, Direction(network.DirOutbound), Protocol(myProtocolID), All(true)) +// +// - To find the first inbound stream to a target peer, regardless of protocol ID: +// stream := FilterStream(host, targetID, Direction(network.DirInbound)) +// +// host is the host from which to filter streams. +// targetID is the ID of the target peer. +// options is a variadic parameter that allows zero or more FilterOption values to be provided. +// +// It returns a slice of network.Stream values that match the filtering criteria. +func FilterStream(host host.Host, targetID peer.ID, options ...FilterOption) []network.Stream { var filteredStreams []network.Stream - // choose the connection only if it is connected + // default values + opts := FilterOptions{ + dir: network.DirUnknown, // by default, consider both inbound and outbound streams + protocol: "", // by default, consider streams of all protocol IDs + all: false, // by default, return just the first matching stream + } + + // apply provided options + for _, option := range options { + option(&opts) + } + if host.Network().Connectedness(targetID) != network.Connected { return filteredStreams } - // get all connections conns := host.Network().ConnsToPeer(targetID) - - // find a connection which is in the connected state for _, conn := range conns { - - // get all streams streams := conn.GetStreams() for _, stream := range streams { - - // choose a stream which is marked as outbound and is for the flow protocol - if stream.Stat().Direction == dir && stream.Protocol() == protocol { + if (opts.dir == network.DirUnknown || stream.Stat().Direction == opts.dir) && + (opts.protocol == "" || stream.Protocol() == opts.protocol) { filteredStreams = append(filteredStreams, stream) - if !all { + if !opts.all { return filteredStreams } } diff --git a/network/p2p/p2pnode/libp2pStream_test.go b/network/p2p/p2pnode/libp2pStream_test.go index 8d693a501cf..6a90c2c5207 100644 --- a/network/p2p/p2pnode/libp2pStream_test.go +++ b/network/p2p/p2pnode/libp2pStream_test.go @@ -164,7 +164,7 @@ func testCreateStream(t *testing.T, sporkId flow.Identifier, unicasts []protocol id2 := identities[1] // Assert that there is no outbound stream to the target yet - require.Equal(t, 0, p2putils.CountStream(nodes[0].Host(), nodes[1].ID(), protocolID, network.DirOutbound)) + require.Equal(t, 0, p2putils.CountStream(nodes[0].Host(), nodes[1].ID(), p2putils.Protocol(protocolID), p2putils.Direction(network.DirOutbound))) // Now attempt to create another 100 outbound stream to the same destination by calling CreateStream streamCount := 100 @@ -193,7 +193,7 @@ func testCreateStream(t *testing.T, sporkId flow.Identifier, unicasts []protocol } require.Eventually(t, func() bool { - return streamCount == p2putils.CountStream(nodes[0].Host(), nodes[1].ID(), protocolID, network.DirOutbound) + return streamCount == p2putils.CountStream(nodes[0].Host(), nodes[1].ID(), p2putils.Protocol(protocolID), p2putils.Direction(network.DirOutbound)) }, 5*time.Second, 100*time.Millisecond, "could not create streams on time") // checks that the number of connections is 1 despite the number of streams; i.e., all streams are created on the same connection @@ -235,8 +235,8 @@ func TestCreateStream_FallBack(t *testing.T) { // Assert that there is no outbound stream to the target yet (neither default nor preferred) defaultProtocolId := protocols.FlowProtocolID(sporkId) preferredProtocolId := protocols.FlowGzipProtocolId(sporkId) - require.Equal(t, 0, p2putils.CountStream(thisNode.Host(), otherNode.ID(), defaultProtocolId, network.DirOutbound)) - require.Equal(t, 0, p2putils.CountStream(thisNode.Host(), otherNode.ID(), preferredProtocolId, network.DirOutbound)) + require.Equal(t, 0, p2putils.CountStream(thisNode.Host(), otherNode.ID(), p2putils.Protocol(defaultProtocolId), p2putils.Direction(network.DirOutbound))) + require.Equal(t, 0, p2putils.CountStream(thisNode.Host(), otherNode.ID(), p2putils.Protocol(preferredProtocolId), p2putils.Direction(network.DirOutbound))) // Now attempt to create another 100 outbound stream to the same destination by calling CreateStream streamCount := 10 @@ -265,11 +265,11 @@ func TestCreateStream_FallBack(t *testing.T) { // wait for the stream to be created on the default protocol id. require.Eventually(t, func() bool { - return streamCount == p2putils.CountStream(nodes[0].Host(), nodes[1].ID(), defaultProtocolId, network.DirOutbound) + return streamCount == p2putils.CountStream(nodes[0].Host(), nodes[1].ID(), p2putils.Protocol(defaultProtocolId), p2putils.Direction(network.DirOutbound)) }, 5*time.Second, 100*time.Millisecond, "could not create streams on time") // no stream must be created on the preferred protocol id - require.Equal(t, 0, p2putils.CountStream(thisNode.Host(), otherNode.ID(), preferredProtocolId, network.DirOutbound)) + require.Equal(t, 0, p2putils.CountStream(thisNode.Host(), otherNode.ID(), p2putils.Protocol(preferredProtocolId), p2putils.Direction(network.DirOutbound))) // checks that the number of connections is 1 despite the number of streams; i.e., all streams are created on the same connection require.Len(t, nodes[0].Host().Network().Conns(), 1) @@ -366,16 +366,16 @@ func TestNoBackoffWhenCreatingStream(t *testing.T) { someGraceTime := 100 * time.Millisecond totalWaitTime := maxTimeToWait + someGraceTime - //each CreateStream() call may try to connect up to MaxConnectAttempt (3) times. + // each CreateStream() call may try to connect up to MaxConnectAttempt (3) times. - //there are 2 scenarios that we need to account for: + // there are 2 scenarios that we need to account for: // - //1. machines where a timeout occurs on the first connection attempt - this can be due to local firewall rules or other processes running on the machine. + // 1. machines where a timeout occurs on the first connection attempt - this can be due to local firewall rules or other processes running on the machine. // In this case, we need to create a scenario where a backoff would have normally occured. This is why we initiate a second connection attempt. // Libp2p remembers the peer we are trying to connect to between CreateStream() calls and would have initiated a backoff if backoff wasn't turned off. // The second CreateStream() call will make a second connection attempt MaxConnectAttempt times and that should never result in a backoff error. // - //2. machines where a timeout does NOT occur on the first connection attempt - this is on CI machines and some local dev machines without a firewall / too many other processes. + // 2. machines where a timeout does NOT occur on the first connection attempt - this is on CI machines and some local dev machines without a firewall / too many other processes. // In this case, there will be MaxConnectAttempt (3) connection attempts on the first CreateStream() call and MaxConnectAttempt (3) attempts on the second CreateStream() call. // make two separate stream creation attempt and assert that no connection back off happened diff --git a/network/p2p/p2pnode/resourceManager_test.go b/network/p2p/p2pnode/resourceManager_test.go index a6adac64219..a5597e660cf 100644 --- a/network/p2p/p2pnode/resourceManager_test.go +++ b/network/p2p/p2pnode/resourceManager_test.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "sync" + "sync/atomic" "testing" "time" @@ -79,7 +80,7 @@ func TestCreateStream_InboundConnResourceLimit(t *testing.T) { unittest.RequireReturnsBefore(t, allStreamsCreated.Wait, 2*time.Second, "could not create streams on time") require.Len(t, receiver.Host().Network().ConnsToPeer(sender.ID()), 1) - actualNumOfStreams := p2putils.CountStream(sender.Host(), receiver.ID(), defaultProtocolID, network.DirOutbound) + actualNumOfStreams := p2putils.CountStream(sender.Host(), receiver.ID(), p2putils.Protocol(defaultProtocolID), p2putils.Direction(network.DirOutbound)) require.Equal(t, expectedNumOfStreams, int64(actualNumOfStreams), @@ -175,7 +176,7 @@ func TestCreateStream_SystemStreamLimit_NotEnforced(t *testing.T) { totalStreams := 0 for i, sender := range senders { - actualNumOfStreams := p2putils.CountStream(sender.Host(), receiver.ID(), defaultProtocolID, network.DirOutbound) + actualNumOfStreams := p2putils.CountStream(sender.Host(), receiver.ID(), p2putils.Protocol(defaultProtocolID), p2putils.Direction(network.DirOutbound)) t.Logf("sender %d has %d streams", i, actualNumOfStreams) assert.Equalf(t, int64(maxInboundStreamPerPeer-1), @@ -193,4 +194,401 @@ func TestCreateStream_SystemStreamLimit_NotEnforced(t *testing.T) { "expected to create more than %d number of streams got %d", l.ToPartialLimitConfig().Stream.StreamsInbound, totalStreams) + + totalTrackedStreams := 0 + require.NoError(t, resourceManagerRcv.ViewTransient(func(scope network.ResourceScope) error { + t.Logf("transient scope; inbound stream count %d", scope.Stat().NumStreamsInbound) + totalTrackedStreams += scope.Stat().NumStreamsInbound + return nil + })) + + require.NoError(t, resourceManagerRcv.ViewProtocol(defaultProtocolID, func(scope network.ProtocolScope) error { + t.Logf("protocol scope for %s; inbound stream count %d", defaultProtocolID, scope.Stat().NumStreamsInbound) + totalTrackedStreams += scope.Stat().NumStreamsInbound + return nil + })) + + for _, sender := range senders { + require.NoError(t, resourceManagerRcv.ViewPeer(sender.ID(), func(scope network.PeerScope) error { + t.Logf("peer scope for %s; inbound stream count %d", sender.ID(), scope.Stat().NumStreamsInbound) + totalTrackedStreams += scope.Stat().NumStreamsInbound + return nil + })) + } + + require.NoError(t, resourceManagerRcv.ViewSystem(func(scope network.ResourceScope) error { + t.Logf("system scope; inbound stream count %d", scope.Stat().NumStreamsInbound) + totalTrackedStreams += scope.Stat().NumStreamsInbound + return nil + })) + + require.NoError(t, resourceManagerRcv.ViewTransient(func(scope network.ResourceScope) error { + t.Logf("transient scope; inbound stream count %d", scope.Stat().NumStreamsInbound) + totalTrackedStreams += scope.Stat().NumStreamsInbound + return nil + })) + + require.NoError(t, resourceManagerRcv.ViewProtocol(defaultProtocolID, func(scope network.ProtocolScope) error { + t.Logf("protocol scope for %s; inbound stream count %d", defaultProtocolID, scope.Stat().NumStreamsInbound) + totalTrackedStreams += scope.Stat().NumStreamsInbound + return nil + })) + + for _, sender := range senders { + require.NoError(t, resourceManagerRcv.ViewPeer(sender.ID(), func(scope network.PeerScope) error { + t.Logf("peer scope for %s; inbound stream count %d", sender.ID(), scope.Stat().NumStreamsInbound) + totalTrackedStreams += scope.Stat().NumStreamsInbound + return nil + })) + } + + t.Logf("total tracked streams %d", totalTrackedStreams) +} + +// TestCreateStream_SystemStreamLimit_NotEnforced is a re-production of a hypothetical bug where the system-wide inbound stream limit of libp2p resource management +// was not being enforced. The purpose of this test is to share with the libp2p community as well as to evaluate the existence of the bug on +// future libp2p versions. +// Test scenario works as follows: +// - We have 30 senders and 1 receiver. +// - The senders are running with a resource manager that allows infinite number of streams; so that they can create as many streams as they want. +// - The receiver is running with a resource manager with base limits and no scaling. +// - The test reads the peer protocol default limits for inbound streams at receiver; say x; which is the limit for the number of inbound streams from each sender on a +// specific protocol. +// - Each sender creates x-1 streams to the receiver on a specific protocol. This is done to ensure that the receiver has x-1 streams from each sender; a total of +// 30*(x-1) streams at the receiver. +// - Test first ensures that numerically 30 * (x - 1) > max system-wide inbound stream limit; i.e., the total number of streams created by all senders is greater than +// the system-wide limit. +// - Then each sender creates x - 1 streams concurrently to the receiver. +// - At the end of the test we ensure that the total number of streams created by all senders is greater than the system-wide limit; which should not be the case if the +// system-wide limit is being enforced. +func TestCreateStream_ResourceAllocation(t *testing.T) { + idProvider := mockmodule.NewIdentityProvider(t) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + signalerCtx := irrecoverable.NewMockSignalerContext(t, ctx) + + sporkID := unittest.IdentifierFixture() + + resourceManagerSnd, err := rcmgr.NewResourceManager(rcmgr.NewFixedLimiter(rcmgr.InfiniteLimits)) + require.NoError(t, err) + sender, senderId := p2ptest.NodeFixture(t, + sporkID, + t.Name(), + idProvider, + p2ptest.WithResourceManager(resourceManagerSnd), + p2ptest.WithCreateStreamRetryDelay(10*time.Millisecond)) + + limits := rcmgr.DefaultLimits + libp2p.SetDefaultServiceLimits(&limits) + + l := limits.Scale(0, 0) + resourceManagerRcv, err := rcmgr.NewResourceManager(rcmgr.NewFixedLimiter(l)) + require.NoError(t, err) + receiver, id2 := p2ptest.NodeFixture(t, + sporkID, + t.Name(), + idProvider, + p2ptest.WithResourceManager(resourceManagerRcv), + p2ptest.WithCreateStreamRetryDelay(10*time.Millisecond)) + + idProvider.On("ByPeerID", sender.ID()).Return(&senderId, true).Maybe() + idProvider.On("ByPeerID", receiver.ID()).Return(&id2, true).Maybe() + + p2ptest.StartNodes(t, signalerCtx, []p2p.LibP2PNode{sender, receiver}) + defer p2ptest.StopNodes(t, []p2p.LibP2PNode{sender, receiver}, cancel) + + p2ptest.LetNodesDiscoverEachOther(t, signalerCtx, []p2p.LibP2PNode{sender, receiver}, flow.IdentityList{&senderId, &id2}) + + defaultProtocolID := protocols.FlowProtocolID(sporkID) + maxInboundStreamPerPeer := l.ToPartialLimitConfig().ProtocolPeerDefault.StreamsInbound + maxSystemInboundStream := l.ToPartialLimitConfig().System.StreamsInbound + + t.Log("max allowed inbound stream from each sender to receiver (per protocol)", maxInboundStreamPerPeer) + t.Log("max protocol peer limit", l.ToPartialLimitConfig().ProtocolDefault.StreamsInbound) + t.Log("max allowed inbound stream across all peers and protocols at receiver (system-wide)", maxSystemInboundStream) + + for i := 0; i < 100; i++ { + streamCreated := make(chan struct{}) + go func() { + err := sender.OpenProtectedStream(ctx, receiver.ID(), t.Name(), func(stream network.Stream) error { + close(streamCreated) + <-ctx.Done() + return nil + + }) + require.NoError(t, err, "error creating stream") + }() + <-streamCreated + + t.Logf("created stream %d", i) + outStreamCnt := p2putils.CountStream(sender.Host(), receiver.ID(), p2putils.Protocol(defaultProtocolID), p2putils.Direction(network.DirOutbound)) + inStreamCnt := p2putils.CountStream(receiver.Host(), sender.ID(), p2putils.Protocol(defaultProtocolID), p2putils.Direction(network.DirInbound)) + t.Logf("outbound stream count %d", outStreamCnt) + t.Logf("inbound stream count %d", inStreamCnt) + require.NoError(t, resourceManagerRcv.ViewTransient(func(scope network.ResourceScope) error { + t.Logf("transient scope; inbound stream count %d; inbound connections; %d", scope.Stat().NumStreamsInbound, scope.Stat().NumConnsInbound) + return nil + })) + + require.NoError(t, resourceManagerRcv.ViewProtocol(defaultProtocolID, func(scope network.ProtocolScope) error { + t.Logf("protocol scope; inbound stream count %d; inbound connections; %d", scope.Stat().NumStreamsInbound, scope.Stat().NumConnsInbound) + return nil + })) + + require.NoError(t, resourceManagerRcv.ViewSystem(func(scope network.ResourceScope) error { + t.Logf("system scope; inbound stream count %d; inbound connections; %d", scope.Stat().NumStreamsInbound, scope.Stat().NumConnsInbound) + return nil + })) + + } + + require.NoError(t, resourceManagerRcv.ViewTransient(func(scope network.ResourceScope) error { + t.Logf("transient scope; inbound stream count %d; inbound connections; %d", scope.Stat().NumStreamsInbound, scope.Stat().NumConnsInbound) + return nil + })) + + require.NoError(t, resourceManagerRcv.ViewProtocol(defaultProtocolID, func(scope network.ProtocolScope) error { + t.Logf("protocol scope; inbound stream count %d; inbound connections; %d", scope.Stat().NumStreamsInbound, scope.Stat().NumConnsInbound) + return nil + })) + + require.NoError(t, resourceManagerRcv.ViewSystem(func(scope network.ResourceScope) error { + t.Logf("system scope; inbound stream count %d; inbound connections; %d", scope.Stat().NumStreamsInbound, scope.Stat().NumConnsInbound) + return nil + })) + + // require.NoError(t, resourceManagerRcv.ViewSystem(func(scope network.ResourceScope) error { + // t.Logf("system scope") + // t.Logf("inbound stream count %d", scope.Stat().NumStreamsInbound) + // return nil + // })) + + // unittest.RequireReturnsBefore(t, allStreamsCreated.Wait, 2*time.Second, "could not create streams on time") + // + // totalStreams := 0 + // for i, sender := range senders { + // actualNumOfStreams := p2putils.CountStream(sender.Host(), receiver.ID(), defaultProtocolID, network.DirOutbound) + // t.Logf("sender %d has %d streams", i, actualNumOfStreams) + // assert.Equalf(t, + // int64(maxInboundStreamPerPeer-1), + // int64(actualNumOfStreams), + // "expected to create %d number of streams got %d", + // int64(maxInboundStreamPerPeer-1), + // actualNumOfStreams) + // totalStreams += actualNumOfStreams + // } + // + // // when system-wide limit is not enforced, the total number of streams created by all senders should be greater than the system-wide limit. + // require.Greaterf(t, + // totalStreams, + // l.ToPartialLimitConfig().Stream.StreamsInbound, + // "expected to create more than %d number of streams got %d", + // l.ToPartialLimitConfig().Stream.StreamsInbound, + // totalStreams) + // + // require.NoError(t, resourceManagerRcv.ViewProtocol(defaultProtocolID, func(scope network.ProtocolScope) error { + // t.Logf("protocol scope for %s", defaultProtocolID) + // t.Logf("inbound stream count %d", scope.Stat().NumStreamsInbound) + // return nil + // })) + // + // require.NoError(t, resourceManagerRcv.ViewSystem(func(scope network.ResourceScope) error { + // t.Logf("system scope") + // t.Logf("inbound stream count %d", scope.Stat().NumStreamsInbound) + // return nil + // })) +} + +// TestCreateStream_SystemStreamLimit_NotEnforced is a re-production of a hypothetical bug where the system-wide inbound stream limit of libp2p resource management +// was not being enforced. The purpose of this test is to share with the libp2p community as well as to evaluate the existence of the bug on +// future libp2p versions. +// Test scenario works as follows: +// - We have 30 senders and 1 receiver. +// - The senders are running with a resource manager that allows infinite number of streams; so that they can create as many streams as they want. +// - The receiver is running with a resource manager with base limits and no scaling. +// - The test reads the peer protocol default limits for inbound streams at receiver; say x; which is the limit for the number of inbound streams from each sender on a +// specific protocol. +// - Each sender creates x-1 streams to the receiver on a specific protocol. This is done to ensure that the receiver has x-1 streams from each sender; a total of +// 30*(x-1) streams at the receiver. +// - Test first ensures that numerically 30 * (x - 1) > max system-wide inbound stream limit; i.e., the total number of streams created by all senders is greater than +// the system-wide limit. +// - Then each sender creates x - 1 streams concurrently to the receiver. +// - At the end of the test we ensure that the total number of streams created by all senders is greater than the system-wide limit; which should not be the case if the +// system-wide limit is being enforced. +func TestCreateStream_PeerLimit_Enforced(t *testing.T) { + nodeCount := 10 + buff := 0 + maxStreamPerPeer := 5 + maxStreamProtocol := nodeCount * maxStreamPerPeer + maxStreamPeerProtocol := maxStreamPerPeer * maxStreamProtocol + maxTransient := nodeCount + maxSystemStream := nodeCount + + idProvider := mockmodule.NewIdentityProvider(t) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + signalerCtx := irrecoverable.NewMockSignalerContext(t, ctx) + + sporkID := unittest.IdentifierFixture() + + // sender nodes will have infinite stream limit to ensure that they can create as many streams as they want. + resourceManagerSnd, err := rcmgr.NewResourceManager(rcmgr.NewFixedLimiter(rcmgr.InfiniteLimits)) + require.NoError(t, err) + senders, senderIds := p2ptest.NodesFixture(t, + sporkID, + t.Name(), + nodeCount, + idProvider, + p2ptest.WithResourceManager(resourceManagerSnd), + p2ptest.WithCreateStreamRetryDelay(10*time.Millisecond)) + + // receiver node will run with default limits and no scaling. + limits := rcmgr.DefaultLimits + libp2p.SetDefaultServiceLimits(&limits) + l := limits.Scale(0, 0) + cfg := rcmgr.PartialLimitConfig{ + System: rcmgr.ResourceLimits{ + StreamsInbound: rcmgr.LimitVal(maxSystemStream), + ConnsInbound: rcmgr.LimitVal(nodeCount), + }, + Transient: rcmgr.ResourceLimits{ + ConnsInbound: rcmgr.LimitVal(nodeCount), + StreamsInbound: rcmgr.LimitVal(maxTransient), + }, + ProtocolDefault: rcmgr.ResourceLimits{ + StreamsInbound: rcmgr.LimitVal(maxStreamProtocol + buff), + }, + ProtocolPeerDefault: rcmgr.ResourceLimits{ + StreamsInbound: rcmgr.LimitVal(maxStreamPeerProtocol + buff), + }, + PeerDefault: rcmgr.ResourceLimits{ + StreamsInbound: rcmgr.LimitVal(maxStreamPerPeer + buff), + }, + Conn: rcmgr.ResourceLimits{ + StreamsInbound: rcmgr.LimitVal(maxStreamPerPeer + buff), + }, + Stream: rcmgr.ResourceLimits{ + StreamsInbound: rcmgr.LimitVal(maxStreamPerPeer + buff), + }, + } + l = cfg.Build(l) + resourceManagerRcv, err := rcmgr.NewResourceManager(rcmgr.NewFixedLimiter(l)) + require.NoError(t, err) + receiver, id2 := p2ptest.NodeFixture(t, + sporkID, + t.Name(), + idProvider, + p2ptest.WithResourceManager(resourceManagerRcv), + p2ptest.WithCreateStreamRetryDelay(10*time.Millisecond)) + + for i, sender := range senders { + idProvider.On("ByPeerID", sender.ID()).Return(senderIds[i], true).Maybe() + } + idProvider.On("ByPeerID", receiver.ID()).Return(&id2, true).Maybe() + + nodes := append(senders, receiver) + ids := append(senderIds, &id2) + + p2ptest.StartNodes(t, signalerCtx, nodes) + defer p2ptest.StopNodes(t, nodes, cancel) + + p2ptest.LetNodesDiscoverEachOther(t, signalerCtx, nodes, ids) + + var allStreamsCreated sync.WaitGroup + defaultProtocolID := protocols.FlowProtocolID(sporkID) + // maxTransientPerPeer := l.ToPartialLimitConfig().Transient.StreamsInbound + // + // t.Log("max allowed inbound stream from each sender to receiver (per protocol)", maxInboundStreamPerPeer, "max transient", maxTransientPerPeer) + + totalStreamsCreated := int64(0) + for sIndex := range senders { + for i := int64(0); i < int64(maxStreamPerPeer); i++ { + if i >= int64(maxSystemStream) { + // we reached the system-wide limit; no need to create more streams; as stream creation may fail; we re-examine pressure on system-wide limit later. + break + } + allStreamsCreated.Add(1) + go func(sIndex int) { + defer allStreamsCreated.Done() + sender := senders[sIndex] + _, err := sender.Host().NewStream(ctx, receiver.ID(), defaultProtocolID) + require.NoError(t, err, "error creating stream") + atomic.AddInt64(&totalStreamsCreated, 1) + }(sIndex) + } + } + + unittest.RequireReturnsBefore(t, allStreamsCreated.Wait, 2*time.Second, "could not create streams on time") + + require.NoError(t, resourceManagerRcv.ViewTransient(func(scope network.ResourceScope) error { + // number of in-transient streams must be less than the max transient limit + require.Less(t, int64(scope.Stat().NumStreamsInbound), int64(maxTransient)) + + // number of in-transient streams must be less than or equal the total number of streams created. + require.LessOrEqual(t, int64(scope.Stat().NumStreamsInbound), int64(totalStreamsCreated)) + // t.Logf("transient scope; inbound stream count %d; inbound connections; %d", scope.Stat().NumStreamsInbound, scope.Stat().NumConnsInbound) + return nil + })) + + require.NoError(t, resourceManagerRcv.ViewSystem(func(scope network.ResourceScope) error { + t.Logf("system scope; inbound stream count %d; inbound connections; %d", scope.Stat().NumStreamsInbound, scope.Stat().NumConnsInbound) + return nil + })) + + for i, sender := range senders { + actualNumOfStreams := p2putils.CountStream(receiver.Host(), sender.ID(), p2putils.Direction(network.DirInbound)) + t.Logf("sender %d has %d streams", i, actualNumOfStreams) + // require.Equalf(t, + // int64(maxStreamPerPeer), + // int64(actualNumOfStreams), + // "expected to create %d number of streams got %d", + // int64(maxStreamPerPeer), + // actualNumOfStreams) + } + + // now the test goes beyond the protocol-peer limit and tries to create one more stream from each sender. + // this should cause receiver to close all streams from the sender and disconnect from the sender. + for sIndex := range senders { + for i := int64(0); i < int64(100); i++ { + allStreamsCreated.Add(1) + go func(sIndex int) { + defer allStreamsCreated.Done() + sender := senders[sIndex] + _, _ = sender.Host().NewStream(ctx, receiver.ID(), defaultProtocolID) + }(sIndex) + } + } + + unittest.RequireReturnsBefore(t, allStreamsCreated.Wait, 2*time.Second, "could not create streams on time") + + t.Log("-----") + total := 0 + for i, sender := range senders { + actualNumOfStreams := p2putils.CountStream(receiver.Host(), sender.ID(), p2putils.Direction(network.DirInbound)) + t.Logf("sender %d has %d streams", i, actualNumOfStreams) + // require.Equalf(t, + // int64(0), + // int64(actualNumOfStreams), + // "expected to create %d number of streams got %d", + // int64(0), + // actualNumOfStreams) + total += actualNumOfStreams + } + + require.NoError(t, resourceManagerRcv.ViewTransient(func(scope network.ResourceScope) error { + t.Logf("transient scope; inbound stream count %d; inbound connections; %d", scope.Stat().NumStreamsInbound, scope.Stat().NumConnsInbound) + return nil + })) + + require.NoError(t, resourceManagerRcv.ViewProtocol(defaultProtocolID, func(scope network.ProtocolScope) error { + t.Logf("protocol scope; inbound stream count %d; inbound connections; %d", scope.Stat().NumStreamsInbound, scope.Stat().NumConnsInbound) + return nil + })) + + require.NoError(t, resourceManagerRcv.ViewSystem(func(scope network.ResourceScope) error { + t.Logf("system scope; inbound stream count %d; inbound connections; %d", scope.Stat().NumStreamsInbound, scope.Stat().NumConnsInbound) + return nil + })) + + t.Logf("total streams %d", total) } From f80def47faceb70d7ad4a4274fb99c2ba7308172 Mon Sep 17 00:00:00 2001 From: Yahya Hassanzadeh Date: Thu, 19 Oct 2023 09:52:01 -0700 Subject: [PATCH 04/29] wip --- network/p2p/p2pnode/resourceManager_test.go | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/network/p2p/p2pnode/resourceManager_test.go b/network/p2p/p2pnode/resourceManager_test.go index a5597e660cf..e922a10e913 100644 --- a/network/p2p/p2pnode/resourceManager_test.go +++ b/network/p2p/p2pnode/resourceManager_test.go @@ -496,10 +496,8 @@ func TestCreateStream_PeerLimit_Enforced(t *testing.T) { var allStreamsCreated sync.WaitGroup defaultProtocolID := protocols.FlowProtocolID(sporkID) - // maxTransientPerPeer := l.ToPartialLimitConfig().Transient.StreamsInbound - // - // t.Log("max allowed inbound stream from each sender to receiver (per protocol)", maxInboundStreamPerPeer, "max transient", maxTransientPerPeer) + // creates max(maxStreamPerPeer * nodeCount, maxSystemStream) streams from each sender to the receiver; breaks as soon as the system-wide limit is reached. totalStreamsCreated := int64(0) for sIndex := range senders { for i := int64(0); i < int64(maxStreamPerPeer); i++ { @@ -535,16 +533,17 @@ func TestCreateStream_PeerLimit_Enforced(t *testing.T) { return nil })) - for i, sender := range senders { + totalInboundStreams := 0 + for _, sender := range senders { actualNumOfStreams := p2putils.CountStream(receiver.Host(), sender.ID(), p2putils.Direction(network.DirInbound)) - t.Logf("sender %d has %d streams", i, actualNumOfStreams) - // require.Equalf(t, - // int64(maxStreamPerPeer), - // int64(actualNumOfStreams), - // "expected to create %d number of streams got %d", - // int64(maxStreamPerPeer), - // actualNumOfStreams) + // t.Logf("sender %d has %d streams", i, actualNumOfStreams) + require.LessOrEqual(t, int64(actualNumOfStreams), int64(maxStreamPerPeer)) + totalInboundStreams += actualNumOfStreams } + // sanity check; the total number of inbound streams must be less than or equal to the system-wide limit. + // TODO: this must be a hard equal check; but falls short; to be shared with libp2p community. + // Failing at this line means the system-wide limit is not being enforced. + require.LessOrEqual(t, totalInboundStreams, maxSystemStream) // now the test goes beyond the protocol-peer limit and tries to create one more stream from each sender. // this should cause receiver to close all streams from the sender and disconnect from the sender. From 4853e1b4725c83862dcdba987c7a7387bd52aca4 Mon Sep 17 00:00:00 2001 From: Yahya Hassanzadeh Date: Thu, 19 Oct 2023 11:33:10 -0700 Subject: [PATCH 05/29] cleans up tests --- network/p2p/p2pnode/resourceManager_test.go | 456 ++++---------------- 1 file changed, 92 insertions(+), 364 deletions(-) diff --git a/network/p2p/p2pnode/resourceManager_test.go b/network/p2p/p2pnode/resourceManager_test.go index e922a10e913..beaeb23b99a 100644 --- a/network/p2p/p2pnode/resourceManager_test.go +++ b/network/p2p/p2pnode/resourceManager_test.go @@ -11,7 +11,6 @@ import ( "github.com/libp2p/go-libp2p" "github.com/libp2p/go-libp2p/core/network" rcmgr "github.com/libp2p/go-libp2p/p2p/host/resource-manager" - "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/onflow/flow-go/model/flow" @@ -87,316 +86,39 @@ func TestCreateStream_InboundConnResourceLimit(t *testing.T) { fmt.Sprintf("expected to create %d number of streams got %d", expectedNumOfStreams, actualNumOfStreams)) } -// TestCreateStream_SystemStreamLimit_NotEnforced is a re-production of a hypothetical bug where the system-wide inbound stream limit of libp2p resource management -// was not being enforced. The purpose of this test is to share with the libp2p community as well as to evaluate the existence of the bug on -// future libp2p versions. -// Test scenario works as follows: -// - We have 30 senders and 1 receiver. -// - The senders are running with a resource manager that allows infinite number of streams; so that they can create as many streams as they want. -// - The receiver is running with a resource manager with base limits and no scaling. -// - The test reads the peer protocol default limits for inbound streams at receiver; say x; which is the limit for the number of inbound streams from each sender on a -// specific protocol. -// - Each sender creates x-1 streams to the receiver on a specific protocol. This is done to ensure that the receiver has x-1 streams from each sender; a total of -// 30*(x-1) streams at the receiver. -// - Test first ensures that numerically 30 * (x - 1) > max system-wide inbound stream limit; i.e., the total number of streams created by all senders is greater than -// the system-wide limit. -// - Then each sender creates x - 1 streams concurrently to the receiver. -// - At the end of the test we ensure that the total number of streams created by all senders is greater than the system-wide limit; which should not be the case if the -// system-wide limit is being enforced. -func TestCreateStream_SystemStreamLimit_NotEnforced(t *testing.T) { - nodeCount := 30 +type testPeerLimitConfig struct { + // nodeCount is the number of nodes in the test. + nodeCount int - idProvider := mockmodule.NewIdentityProvider(t) - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - signalerCtx := irrecoverable.NewMockSignalerContext(t, ctx) + // maxInboundPeerStream is the maximum number of inbound streams from a single peer to the receiver. + maxInboundPeerStream int - sporkID := unittest.IdentifierFixture() + // maxInboundStreamProtocol is the maximum number of inbound streams at the receiver using a specific protocol; it accumulates all streams from all senders. + maxInboundStreamProtocol int - resourceManagerSnd, err := rcmgr.NewResourceManager(rcmgr.NewFixedLimiter(rcmgr.InfiniteLimits)) - require.NoError(t, err) - senders, senderIds := p2ptest.NodesFixture(t, sporkID, t.Name(), nodeCount, - idProvider, - p2ptest.WithResourceManager(resourceManagerSnd), - p2ptest.WithCreateStreamRetryDelay(10*time.Millisecond)) - - limits := rcmgr.DefaultLimits - libp2p.SetDefaultServiceLimits(&limits) - - l := limits.Scale(0, 0) - resourceManagerRcv, err := rcmgr.NewResourceManager(rcmgr.NewFixedLimiter(l)) - require.NoError(t, err) - receiver, id2 := p2ptest.NodeFixture( - t, - sporkID, - t.Name(), - idProvider, - p2ptest.WithResourceManager(resourceManagerRcv), - p2ptest.WithCreateStreamRetryDelay(10*time.Millisecond)) - - for i, sender := range senders { - idProvider.On("ByPeerID", sender.ID()).Return(senderIds[i], true).Maybe() - } - idProvider.On("ByPeerID", receiver.ID()).Return(&id2, true).Maybe() - - p2ptest.StartNodes(t, signalerCtx, append(senders, receiver)) - defer p2ptest.StopNodes(t, append(senders, receiver), cancel) + // maxInboundStreamPeerProtocol is the maximum number of inbound streams at the receiver from a single peer using a specific protocol. + maxInboundStreamPeerProtocol int - p2ptest.LetNodesDiscoverEachOther(t, signalerCtx, append(senders, receiver), append(senderIds, &id2)) - - var allStreamsCreated sync.WaitGroup - defaultProtocolID := protocols.FlowProtocolID(sporkID) - maxInboundStreamPerPeer := l.ToPartialLimitConfig().ProtocolPeerDefault.StreamsInbound - maxSystemInboundStream := l.ToPartialLimitConfig().System.StreamsInbound + // maxInboundStreamTransient is the maximum number of inbound transient streams at the receiver; it accumulates all streams from all senders across all protocols. + // transient streams are those that are not associated fully with a peer and protocol. + maxInboundStreamTransient int - t.Log("max allowed inbound stream from each sender to receiver (per protocol)", maxInboundStreamPerPeer) - t.Log("max allowed inbound stream across all peers and protocols at receiver (system-wide)", maxSystemInboundStream) - - // sanity check; if each peer creates maxInboundStreamPerPeer-1 streams, and we assume there the maxSystemInboundStream is not enforced; then to validate the hypothesis we need - // to ensure that (maxInboundStreamPerPeer - 1) * nodeCount > maxSystemInboundStream, i.e., if each peer creates maxInboundStreamPerPeer-1 streams, then the total number of streams - // end up being greater than the system-wide limit. - require.Greaterf(t, - int64(maxInboundStreamPerPeer-1)*int64(nodeCount), - int64(maxSystemInboundStream), - "(maxInboundStreamPerPeer - 1) * nodeCount should be greater than maxSystemInboundStream") - - for sIndex := range senders { - sender := senders[sIndex] - for i := int64(0); i < int64(maxInboundStreamPerPeer-1); i++ { - allStreamsCreated.Add(1) - go func() { - defer allStreamsCreated.Done() - _, err := sender.Host().NewStream(ctx, receiver.ID(), defaultProtocolID) - require.NoError(t, err, "error creating stream") - }() - } - } - - unittest.RequireReturnsBefore(t, allStreamsCreated.Wait, 2*time.Second, "could not create streams on time") - - totalStreams := 0 - for i, sender := range senders { - actualNumOfStreams := p2putils.CountStream(sender.Host(), receiver.ID(), p2putils.Protocol(defaultProtocolID), p2putils.Direction(network.DirOutbound)) - t.Logf("sender %d has %d streams", i, actualNumOfStreams) - assert.Equalf(t, - int64(maxInboundStreamPerPeer-1), - int64(actualNumOfStreams), - "expected to create %d number of streams got %d", - int64(maxInboundStreamPerPeer-1), - actualNumOfStreams) - totalStreams += actualNumOfStreams - } - - // when system-wide limit is not enforced, the total number of streams created by all senders should be greater than the system-wide limit. - require.Greaterf(t, - totalStreams, - l.ToPartialLimitConfig().Stream.StreamsInbound, - "expected to create more than %d number of streams got %d", - l.ToPartialLimitConfig().Stream.StreamsInbound, - totalStreams) - - totalTrackedStreams := 0 - require.NoError(t, resourceManagerRcv.ViewTransient(func(scope network.ResourceScope) error { - t.Logf("transient scope; inbound stream count %d", scope.Stat().NumStreamsInbound) - totalTrackedStreams += scope.Stat().NumStreamsInbound - return nil - })) - - require.NoError(t, resourceManagerRcv.ViewProtocol(defaultProtocolID, func(scope network.ProtocolScope) error { - t.Logf("protocol scope for %s; inbound stream count %d", defaultProtocolID, scope.Stat().NumStreamsInbound) - totalTrackedStreams += scope.Stat().NumStreamsInbound - return nil - })) - - for _, sender := range senders { - require.NoError(t, resourceManagerRcv.ViewPeer(sender.ID(), func(scope network.PeerScope) error { - t.Logf("peer scope for %s; inbound stream count %d", sender.ID(), scope.Stat().NumStreamsInbound) - totalTrackedStreams += scope.Stat().NumStreamsInbound - return nil - })) - } - - require.NoError(t, resourceManagerRcv.ViewSystem(func(scope network.ResourceScope) error { - t.Logf("system scope; inbound stream count %d", scope.Stat().NumStreamsInbound) - totalTrackedStreams += scope.Stat().NumStreamsInbound - return nil - })) - - require.NoError(t, resourceManagerRcv.ViewTransient(func(scope network.ResourceScope) error { - t.Logf("transient scope; inbound stream count %d", scope.Stat().NumStreamsInbound) - totalTrackedStreams += scope.Stat().NumStreamsInbound - return nil - })) - - require.NoError(t, resourceManagerRcv.ViewProtocol(defaultProtocolID, func(scope network.ProtocolScope) error { - t.Logf("protocol scope for %s; inbound stream count %d", defaultProtocolID, scope.Stat().NumStreamsInbound) - totalTrackedStreams += scope.Stat().NumStreamsInbound - return nil - })) - - for _, sender := range senders { - require.NoError(t, resourceManagerRcv.ViewPeer(sender.ID(), func(scope network.PeerScope) error { - t.Logf("peer scope for %s; inbound stream count %d", sender.ID(), scope.Stat().NumStreamsInbound) - totalTrackedStreams += scope.Stat().NumStreamsInbound - return nil - })) - } - - t.Logf("total tracked streams %d", totalTrackedStreams) + // maxInboundStreamSystem is the maximum number of inbound streams at the receiver; it accumulates all streams from all senders across all protocols. + maxInboundStreamSystem int } -// TestCreateStream_SystemStreamLimit_NotEnforced is a re-production of a hypothetical bug where the system-wide inbound stream limit of libp2p resource management -// was not being enforced. The purpose of this test is to share with the libp2p community as well as to evaluate the existence of the bug on -// future libp2p versions. -// Test scenario works as follows: -// - We have 30 senders and 1 receiver. -// - The senders are running with a resource manager that allows infinite number of streams; so that they can create as many streams as they want. -// - The receiver is running with a resource manager with base limits and no scaling. -// - The test reads the peer protocol default limits for inbound streams at receiver; say x; which is the limit for the number of inbound streams from each sender on a -// specific protocol. -// - Each sender creates x-1 streams to the receiver on a specific protocol. This is done to ensure that the receiver has x-1 streams from each sender; a total of -// 30*(x-1) streams at the receiver. -// - Test first ensures that numerically 30 * (x - 1) > max system-wide inbound stream limit; i.e., the total number of streams created by all senders is greater than -// the system-wide limit. -// - Then each sender creates x - 1 streams concurrently to the receiver. -// - At the end of the test we ensure that the total number of streams created by all senders is greater than the system-wide limit; which should not be the case if the -// system-wide limit is being enforced. -func TestCreateStream_ResourceAllocation(t *testing.T) { - idProvider := mockmodule.NewIdentityProvider(t) - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - signalerCtx := irrecoverable.NewMockSignalerContext(t, ctx) - - sporkID := unittest.IdentifierFixture() - - resourceManagerSnd, err := rcmgr.NewResourceManager(rcmgr.NewFixedLimiter(rcmgr.InfiniteLimits)) - require.NoError(t, err) - sender, senderId := p2ptest.NodeFixture(t, - sporkID, - t.Name(), - idProvider, - p2ptest.WithResourceManager(resourceManagerSnd), - p2ptest.WithCreateStreamRetryDelay(10*time.Millisecond)) - - limits := rcmgr.DefaultLimits - libp2p.SetDefaultServiceLimits(&limits) - - l := limits.Scale(0, 0) - resourceManagerRcv, err := rcmgr.NewResourceManager(rcmgr.NewFixedLimiter(l)) - require.NoError(t, err) - receiver, id2 := p2ptest.NodeFixture(t, - sporkID, - t.Name(), - idProvider, - p2ptest.WithResourceManager(resourceManagerRcv), - p2ptest.WithCreateStreamRetryDelay(10*time.Millisecond)) - - idProvider.On("ByPeerID", sender.ID()).Return(&senderId, true).Maybe() - idProvider.On("ByPeerID", receiver.ID()).Return(&id2, true).Maybe() - - p2ptest.StartNodes(t, signalerCtx, []p2p.LibP2PNode{sender, receiver}) - defer p2ptest.StopNodes(t, []p2p.LibP2PNode{sender, receiver}, cancel) - - p2ptest.LetNodesDiscoverEachOther(t, signalerCtx, []p2p.LibP2PNode{sender, receiver}, flow.IdentityList{&senderId, &id2}) - - defaultProtocolID := protocols.FlowProtocolID(sporkID) - maxInboundStreamPerPeer := l.ToPartialLimitConfig().ProtocolPeerDefault.StreamsInbound - maxSystemInboundStream := l.ToPartialLimitConfig().System.StreamsInbound - - t.Log("max allowed inbound stream from each sender to receiver (per protocol)", maxInboundStreamPerPeer) - t.Log("max protocol peer limit", l.ToPartialLimitConfig().ProtocolDefault.StreamsInbound) - t.Log("max allowed inbound stream across all peers and protocols at receiver (system-wide)", maxSystemInboundStream) - - for i := 0; i < 100; i++ { - streamCreated := make(chan struct{}) - go func() { - err := sender.OpenProtectedStream(ctx, receiver.ID(), t.Name(), func(stream network.Stream) error { - close(streamCreated) - <-ctx.Done() - return nil - - }) - require.NoError(t, err, "error creating stream") - }() - <-streamCreated - - t.Logf("created stream %d", i) - outStreamCnt := p2putils.CountStream(sender.Host(), receiver.ID(), p2putils.Protocol(defaultProtocolID), p2putils.Direction(network.DirOutbound)) - inStreamCnt := p2putils.CountStream(receiver.Host(), sender.ID(), p2putils.Protocol(defaultProtocolID), p2putils.Direction(network.DirInbound)) - t.Logf("outbound stream count %d", outStreamCnt) - t.Logf("inbound stream count %d", inStreamCnt) - require.NoError(t, resourceManagerRcv.ViewTransient(func(scope network.ResourceScope) error { - t.Logf("transient scope; inbound stream count %d; inbound connections; %d", scope.Stat().NumStreamsInbound, scope.Stat().NumConnsInbound) - return nil - })) - - require.NoError(t, resourceManagerRcv.ViewProtocol(defaultProtocolID, func(scope network.ProtocolScope) error { - t.Logf("protocol scope; inbound stream count %d; inbound connections; %d", scope.Stat().NumStreamsInbound, scope.Stat().NumConnsInbound) - return nil - })) - - require.NoError(t, resourceManagerRcv.ViewSystem(func(scope network.ResourceScope) error { - t.Logf("system scope; inbound stream count %d; inbound connections; %d", scope.Stat().NumStreamsInbound, scope.Stat().NumConnsInbound) - return nil - })) - - } - - require.NoError(t, resourceManagerRcv.ViewTransient(func(scope network.ResourceScope) error { - t.Logf("transient scope; inbound stream count %d; inbound connections; %d", scope.Stat().NumStreamsInbound, scope.Stat().NumConnsInbound) - return nil - })) - - require.NoError(t, resourceManagerRcv.ViewProtocol(defaultProtocolID, func(scope network.ProtocolScope) error { - t.Logf("protocol scope; inbound stream count %d; inbound connections; %d", scope.Stat().NumStreamsInbound, scope.Stat().NumConnsInbound) - return nil - })) - - require.NoError(t, resourceManagerRcv.ViewSystem(func(scope network.ResourceScope) error { - t.Logf("system scope; inbound stream count %d; inbound connections; %d", scope.Stat().NumStreamsInbound, scope.Stat().NumConnsInbound) - return nil - })) - - // require.NoError(t, resourceManagerRcv.ViewSystem(func(scope network.ResourceScope) error { - // t.Logf("system scope") - // t.Logf("inbound stream count %d", scope.Stat().NumStreamsInbound) - // return nil - // })) - - // unittest.RequireReturnsBefore(t, allStreamsCreated.Wait, 2*time.Second, "could not create streams on time") - // - // totalStreams := 0 - // for i, sender := range senders { - // actualNumOfStreams := p2putils.CountStream(sender.Host(), receiver.ID(), defaultProtocolID, network.DirOutbound) - // t.Logf("sender %d has %d streams", i, actualNumOfStreams) - // assert.Equalf(t, - // int64(maxInboundStreamPerPeer-1), - // int64(actualNumOfStreams), - // "expected to create %d number of streams got %d", - // int64(maxInboundStreamPerPeer-1), - // actualNumOfStreams) - // totalStreams += actualNumOfStreams - // } - // - // // when system-wide limit is not enforced, the total number of streams created by all senders should be greater than the system-wide limit. - // require.Greaterf(t, - // totalStreams, - // l.ToPartialLimitConfig().Stream.StreamsInbound, - // "expected to create more than %d number of streams got %d", - // l.ToPartialLimitConfig().Stream.StreamsInbound, - // totalStreams) - // - // require.NoError(t, resourceManagerRcv.ViewProtocol(defaultProtocolID, func(scope network.ProtocolScope) error { - // t.Logf("protocol scope for %s", defaultProtocolID) - // t.Logf("inbound stream count %d", scope.Stat().NumStreamsInbound) - // return nil - // })) - // - // require.NoError(t, resourceManagerRcv.ViewSystem(func(scope network.ResourceScope) error { - // t.Logf("system scope") - // t.Logf("inbound stream count %d", scope.Stat().NumStreamsInbound) - // return nil - // })) +func TestCreateStream_InboundStreamResourceLimit(t *testing.T) { + t.Run("loose-hierarchical-structure", func(t *testing.T) { + // loose hierarchical structure refers to case where maxInboundStreamSystem = maxInboundStreamTransient = maxInboundStreamProtocol > maxInboundStreamPeerProtocol = maxInboundPeerStream. + testCreateStreamInboundStreamResourceLimits(t, &testPeerLimitConfig{ + nodeCount: 10, // 10 nodes + maxInboundPeerStream: 5, // each can create 5 streams to the receiver + maxInboundStreamProtocol: 50, // which accounts for 50 streams from each sender to the receiver using the unicast protocol + maxInboundStreamPeerProtocol: 5, // each peer on using the unicast protocol can create 5 streams to the receiver + maxInboundStreamTransient: 50, // the total number of transient streams from all senders to the receiver is 50 + maxInboundStreamSystem: 50, // the total number of streams from all senders to the receiver is 50 + }) + }) } // TestCreateStream_SystemStreamLimit_NotEnforced is a re-production of a hypothetical bug where the system-wide inbound stream limit of libp2p resource management @@ -415,15 +137,7 @@ func TestCreateStream_ResourceAllocation(t *testing.T) { // - Then each sender creates x - 1 streams concurrently to the receiver. // - At the end of the test we ensure that the total number of streams created by all senders is greater than the system-wide limit; which should not be the case if the // system-wide limit is being enforced. -func TestCreateStream_PeerLimit_Enforced(t *testing.T) { - nodeCount := 10 - buff := 0 - maxStreamPerPeer := 5 - maxStreamProtocol := nodeCount * maxStreamPerPeer - maxStreamPeerProtocol := maxStreamPerPeer * maxStreamProtocol - maxTransient := nodeCount - maxSystemStream := nodeCount - +func testCreateStreamInboundStreamResourceLimits(t *testing.T, cfg *testPeerLimitConfig) { idProvider := mockmodule.NewIdentityProvider(t) ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -436,8 +150,7 @@ func TestCreateStream_PeerLimit_Enforced(t *testing.T) { require.NoError(t, err) senders, senderIds := p2ptest.NodesFixture(t, sporkID, - t.Name(), - nodeCount, + t.Name(), cfg.nodeCount, idProvider, p2ptest.WithResourceManager(resourceManagerSnd), p2ptest.WithCreateStreamRetryDelay(10*time.Millisecond)) @@ -446,32 +159,32 @@ func TestCreateStream_PeerLimit_Enforced(t *testing.T) { limits := rcmgr.DefaultLimits libp2p.SetDefaultServiceLimits(&limits) l := limits.Scale(0, 0) - cfg := rcmgr.PartialLimitConfig{ + partial := rcmgr.PartialLimitConfig{ System: rcmgr.ResourceLimits{ - StreamsInbound: rcmgr.LimitVal(maxSystemStream), - ConnsInbound: rcmgr.LimitVal(nodeCount), + StreamsInbound: rcmgr.LimitVal(cfg.maxInboundStreamSystem), + ConnsInbound: rcmgr.LimitVal(cfg.nodeCount), }, Transient: rcmgr.ResourceLimits{ - ConnsInbound: rcmgr.LimitVal(nodeCount), - StreamsInbound: rcmgr.LimitVal(maxTransient), + ConnsInbound: rcmgr.LimitVal(cfg.nodeCount), + StreamsInbound: rcmgr.LimitVal(cfg.maxInboundStreamTransient), }, ProtocolDefault: rcmgr.ResourceLimits{ - StreamsInbound: rcmgr.LimitVal(maxStreamProtocol + buff), + StreamsInbound: rcmgr.LimitVal(cfg.maxInboundStreamProtocol), }, ProtocolPeerDefault: rcmgr.ResourceLimits{ - StreamsInbound: rcmgr.LimitVal(maxStreamPeerProtocol + buff), + StreamsInbound: rcmgr.LimitVal(cfg.maxInboundStreamPeerProtocol), }, PeerDefault: rcmgr.ResourceLimits{ - StreamsInbound: rcmgr.LimitVal(maxStreamPerPeer + buff), + StreamsInbound: rcmgr.LimitVal(cfg.maxInboundPeerStream), }, Conn: rcmgr.ResourceLimits{ - StreamsInbound: rcmgr.LimitVal(maxStreamPerPeer + buff), + StreamsInbound: rcmgr.LimitVal(cfg.maxInboundPeerStream), }, Stream: rcmgr.ResourceLimits{ - StreamsInbound: rcmgr.LimitVal(maxStreamPerPeer + buff), + StreamsInbound: rcmgr.LimitVal(cfg.maxInboundPeerStream), }, } - l = cfg.Build(l) + l = partial.Build(l) resourceManagerRcv, err := rcmgr.NewResourceManager(rcmgr.NewFixedLimiter(l)) require.NoError(t, err) receiver, id2 := p2ptest.NodeFixture(t, @@ -497,11 +210,11 @@ func TestCreateStream_PeerLimit_Enforced(t *testing.T) { var allStreamsCreated sync.WaitGroup defaultProtocolID := protocols.FlowProtocolID(sporkID) - // creates max(maxStreamPerPeer * nodeCount, maxSystemStream) streams from each sender to the receiver; breaks as soon as the system-wide limit is reached. + // creates max(maxInboundPeerStream * nodeCount, maxInboundStreamSystem) streams from each sender to the receiver; breaks as soon as the system-wide limit is reached. totalStreamsCreated := int64(0) for sIndex := range senders { - for i := int64(0); i < int64(maxStreamPerPeer); i++ { - if i >= int64(maxSystemStream) { + for i := int64(0); i < int64(cfg.maxInboundPeerStream); i++ { + if i >= int64(cfg.maxInboundStreamSystem) { // we reached the system-wide limit; no need to create more streams; as stream creation may fail; we re-examine pressure on system-wide limit later. break } @@ -520,7 +233,7 @@ func TestCreateStream_PeerLimit_Enforced(t *testing.T) { require.NoError(t, resourceManagerRcv.ViewTransient(func(scope network.ResourceScope) error { // number of in-transient streams must be less than the max transient limit - require.Less(t, int64(scope.Stat().NumStreamsInbound), int64(maxTransient)) + require.Less(t, int64(scope.Stat().NumStreamsInbound), int64(cfg.maxInboundStreamTransient)) // number of in-transient streams must be less than or equal the total number of streams created. require.LessOrEqual(t, int64(scope.Stat().NumStreamsInbound), int64(totalStreamsCreated)) @@ -537,57 +250,72 @@ func TestCreateStream_PeerLimit_Enforced(t *testing.T) { for _, sender := range senders { actualNumOfStreams := p2putils.CountStream(receiver.Host(), sender.ID(), p2putils.Direction(network.DirInbound)) // t.Logf("sender %d has %d streams", i, actualNumOfStreams) - require.LessOrEqual(t, int64(actualNumOfStreams), int64(maxStreamPerPeer)) + require.LessOrEqual(t, int64(actualNumOfStreams), int64(cfg.maxInboundPeerStream)) totalInboundStreams += actualNumOfStreams } // sanity check; the total number of inbound streams must be less than or equal to the system-wide limit. // TODO: this must be a hard equal check; but falls short; to be shared with libp2p community. // Failing at this line means the system-wide limit is not being enforced. - require.LessOrEqual(t, totalInboundStreams, maxSystemStream) + require.LessOrEqual(t, totalInboundStreams, cfg.maxInboundStreamSystem) - // now the test goes beyond the protocol-peer limit and tries to create one more stream from each sender. - // this should cause receiver to close all streams from the sender and disconnect from the sender. + // now the stress testing with each sender making `maxInboundStreamSystem` concurrent streams to the receiver. for sIndex := range senders { - for i := int64(0); i < int64(100); i++ { + for i := int64(0); i < int64(cfg.maxInboundStreamSystem); i++ { allStreamsCreated.Add(1) go func(sIndex int) { defer allStreamsCreated.Done() sender := senders[sIndex] + // we don't care about the error here; as we are trying to create more streams than the system-wide limit; so we expect some of the stream creations to fail. _, _ = sender.Host().NewStream(ctx, receiver.ID(), defaultProtocolID) }(sIndex) } } - unittest.RequireReturnsBefore(t, allStreamsCreated.Wait, 2*time.Second, "could not create streams on time") + unittest.RequireReturnsBefore(t, allStreamsCreated.Wait, 2*time.Second, "could not create (stress-testing) streams on time") - t.Log("-----") - total := 0 - for i, sender := range senders { + totalInboundStreams = 0 + for _, sender := range senders { actualNumOfStreams := p2putils.CountStream(receiver.Host(), sender.ID(), p2putils.Direction(network.DirInbound)) - t.Logf("sender %d has %d streams", i, actualNumOfStreams) - // require.Equalf(t, - // int64(0), - // int64(actualNumOfStreams), - // "expected to create %d number of streams got %d", - // int64(0), - // actualNumOfStreams) - total += actualNumOfStreams + // t.Logf("sender %d has %d streams", i, actualNumOfStreams) + require.LessOrEqual(t, actualNumOfStreams, cfg.maxInboundPeerStream) + require.LessOrEqual(t, actualNumOfStreams, cfg.maxInboundStreamPeerProtocol) + totalInboundStreams += actualNumOfStreams } - - require.NoError(t, resourceManagerRcv.ViewTransient(func(scope network.ResourceScope) error { - t.Logf("transient scope; inbound stream count %d; inbound connections; %d", scope.Stat().NumStreamsInbound, scope.Stat().NumConnsInbound) - return nil - })) - - require.NoError(t, resourceManagerRcv.ViewProtocol(defaultProtocolID, func(scope network.ProtocolScope) error { - t.Logf("protocol scope; inbound stream count %d; inbound connections; %d", scope.Stat().NumStreamsInbound, scope.Stat().NumConnsInbound) - return nil - })) - - require.NoError(t, resourceManagerRcv.ViewSystem(func(scope network.ResourceScope) error { - t.Logf("system scope; inbound stream count %d; inbound connections; %d", scope.Stat().NumStreamsInbound, scope.Stat().NumConnsInbound) - return nil - })) - - t.Logf("total streams %d", total) + // sanity check; the total number of inbound streams must be less than or equal to the system-wide limit. + // TODO: this must be a hard equal check; but falls short; to be shared with libp2p community. + // Failing at this line means the system-wide limit is not being enforced. + require.LessOrEqual(t, totalInboundStreams, cfg.maxInboundStreamSystem) + require.LessOrEqual(t, totalInboundStreams, cfg.maxInboundStreamTransient) + + // t.Log("-----") + // total := 0 + // for i, sender := range senders { + // actualNumOfStreams := p2putils.CountStream(receiver.Host(), sender.ID(), p2putils.Direction(network.DirInbound)) + // t.Logf("sender %d has %d streams", i, actualNumOfStreams) + // // require.Equalf(t, + // // int64(0), + // // int64(actualNumOfStreams), + // // "expected to create %d number of streams got %d", + // // int64(0), + // // actualNumOfStreams) + // total += actualNumOfStreams + // } + // + // require.NoError(t, resourceManagerRcv.ViewTransient(func(scope network.ResourceScope) error { + // t.Logf("transient scope; inbound stream count %d; inbound connections; %d", scope.Stat().NumStreamsInbound, scope.Stat().NumConnsInbound) + // return nil + // })) + // + // require.NoError(t, resourceManagerRcv.ViewProtocol(defaultProtocolID, func(scope network.ProtocolScope) error { + // t.Logf("protocol scope; inbound stream count %d; inbound connections; %d", scope.Stat().NumStreamsInbound, scope.Stat().NumConnsInbound) + // return nil + // })) + // + // require.NoError(t, resourceManagerRcv.ViewSystem(func(scope network.ResourceScope) error { + // t.Logf("system scope; inbound stream count %d; inbound connections; %d", scope.Stat().NumStreamsInbound, scope.Stat().NumConnsInbound) + // return nil + // })) + // + // t.Logf("total streams %d", total) + // } } From 2aff7d4c514ac4a573698fe52666f0e6c7b1b5ec Mon Sep 17 00:00:00 2001 From: Yahya Hassanzadeh Date: Thu, 19 Oct 2023 11:40:11 -0700 Subject: [PATCH 06/29] adds test for peer limits greater than system limit --- network/p2p/p2pnode/resourceManager_test.go | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/network/p2p/p2pnode/resourceManager_test.go b/network/p2p/p2pnode/resourceManager_test.go index beaeb23b99a..37177bc3084 100644 --- a/network/p2p/p2pnode/resourceManager_test.go +++ b/network/p2p/p2pnode/resourceManager_test.go @@ -119,6 +119,17 @@ func TestCreateStream_InboundStreamResourceLimit(t *testing.T) { maxInboundStreamSystem: 50, // the total number of streams from all senders to the receiver is 50 }) }) + t.Run("peer-limit-greater-than-system", func(t *testing.T) { + // the case where peer and protocol-level limits are higher than the system-wide limit. + testCreateStreamInboundStreamResourceLimits(t, &testPeerLimitConfig{ + nodeCount: 10, + maxInboundPeerStream: 500, + maxInboundStreamProtocol: 500, + maxInboundStreamPeerProtocol: 500, + maxInboundStreamTransient: 500, + maxInboundStreamSystem: 5, + }) + }) } // TestCreateStream_SystemStreamLimit_NotEnforced is a re-production of a hypothetical bug where the system-wide inbound stream limit of libp2p resource management @@ -211,10 +222,12 @@ func testCreateStreamInboundStreamResourceLimits(t *testing.T, cfg *testPeerLimi defaultProtocolID := protocols.FlowProtocolID(sporkID) // creates max(maxInboundPeerStream * nodeCount, maxInboundStreamSystem) streams from each sender to the receiver; breaks as soon as the system-wide limit is reached. - totalStreamsCreated := int64(0) + totalStreamsCreated := int64(0) // total number of streams successfully created. + totalStreamAttempted := int64(0) // total number of stream creation attempts. for sIndex := range senders { for i := int64(0); i < int64(cfg.maxInboundPeerStream); i++ { - if i >= int64(cfg.maxInboundStreamSystem) { + totalStreamAttempted++ + if totalStreamAttempted >= int64(cfg.maxInboundStreamSystem) { // we reached the system-wide limit; no need to create more streams; as stream creation may fail; we re-examine pressure on system-wide limit later. break } From 185358c68096b088267d7bf0f5ad3f1b69e91af0 Mon Sep 17 00:00:00 2001 From: Yahya Hassanzadeh Date: Thu, 19 Oct 2023 14:12:51 -0700 Subject: [PATCH 07/29] implements all test cases --- network/internal/p2putils/utils.go | 6 +- network/p2p/p2pnode/resourceManager_test.go | 249 ++++++++++++++------ 2 files changed, 183 insertions(+), 72 deletions(-) diff --git a/network/internal/p2putils/utils.go b/network/internal/p2putils/utils.go index f5bc4f01e4c..744dd183350 100644 --- a/network/internal/p2putils/utils.go +++ b/network/internal/p2putils/utils.go @@ -133,11 +133,11 @@ func All() FilterOption { // It returns a slice of network.Stream values that match the filtering criteria. func FilterStream(host host.Host, targetID peer.ID, options ...FilterOption) []network.Stream { var filteredStreams []network.Stream - + const discardTheProtocol = "discard-the-protocol" // default values opts := FilterOptions{ dir: network.DirUnknown, // by default, consider both inbound and outbound streams - protocol: "", // by default, consider streams of all protocol IDs + protocol: discardTheProtocol, // by default, consider streams of all protocol IDs all: false, // by default, return just the first matching stream } @@ -155,7 +155,7 @@ func FilterStream(host host.Host, targetID peer.ID, options ...FilterOption) []n streams := conn.GetStreams() for _, stream := range streams { if (opts.dir == network.DirUnknown || stream.Stat().Direction == opts.dir) && - (opts.protocol == "" || stream.Protocol() == opts.protocol) { + (opts.protocol == discardTheProtocol || stream.Protocol() == opts.protocol) { filteredStreams = append(filteredStreams, stream) if !opts.all { return filteredStreams diff --git a/network/p2p/p2pnode/resourceManager_test.go b/network/p2p/p2pnode/resourceManager_test.go index 37177bc3084..b729b5f6892 100644 --- a/network/p2p/p2pnode/resourceManager_test.go +++ b/network/p2p/p2pnode/resourceManager_test.go @@ -3,8 +3,8 @@ package p2pnode_test import ( "context" "fmt" + "math" "sync" - "sync/atomic" "testing" "time" @@ -105,31 +105,163 @@ type testPeerLimitConfig struct { // maxInboundStreamSystem is the maximum number of inbound streams at the receiver; it accumulates all streams from all senders across all protocols. maxInboundStreamSystem int + + // unknownProtocol when set to true will cause senders to use an unknown protocol ID when creating streams. + unknownProtocol bool } -func TestCreateStream_InboundStreamResourceLimit(t *testing.T) { - t.Run("loose-hierarchical-structure", func(t *testing.T) { - // loose hierarchical structure refers to case where maxInboundStreamSystem = maxInboundStreamTransient = maxInboundStreamProtocol > maxInboundStreamPeerProtocol = maxInboundPeerStream. - testCreateStreamInboundStreamResourceLimits(t, &testPeerLimitConfig{ - nodeCount: 10, // 10 nodes - maxInboundPeerStream: 5, // each can create 5 streams to the receiver - maxInboundStreamProtocol: 50, // which accounts for 50 streams from each sender to the receiver using the unicast protocol - maxInboundStreamPeerProtocol: 5, // each peer on using the unicast protocol can create 5 streams to the receiver - maxInboundStreamTransient: 50, // the total number of transient streams from all senders to the receiver is 50 - maxInboundStreamSystem: 50, // the total number of streams from all senders to the receiver is 50 - }) - }) - t.Run("peer-limit-greater-than-system", func(t *testing.T) { - // the case where peer and protocol-level limits are higher than the system-wide limit. - testCreateStreamInboundStreamResourceLimits(t, &testPeerLimitConfig{ - nodeCount: 10, - maxInboundPeerStream: 500, - maxInboundStreamProtocol: 500, - maxInboundStreamPeerProtocol: 500, - maxInboundStreamTransient: 500, - maxInboundStreamSystem: 5, - }) - }) +// maxLimit returns the maximum limit across all limits. +func (t testPeerLimitConfig) maxLimit() int { + max := t.maxInboundPeerStream + if t.maxInboundStreamProtocol > max { + max = t.maxInboundStreamProtocol + } + if t.maxInboundStreamPeerProtocol > max { + max = t.maxInboundStreamPeerProtocol + } + if t.maxInboundStreamTransient > max { + max = t.maxInboundStreamTransient + } + if t.maxInboundStreamSystem > max { + max = t.maxInboundStreamSystem + } + return max +} + +// baseCreateStreamInboundStreamResourceLimitConfig returns a testPeerLimitConfig with default values. +func baseCreateStreamInboundStreamResourceLimitConfig() *testPeerLimitConfig { + return &testPeerLimitConfig{ + nodeCount: 10, + maxInboundPeerStream: 100, + maxInboundStreamProtocol: 100, + maxInboundStreamPeerProtocol: 100, + maxInboundStreamTransient: 100, + maxInboundStreamSystem: 100, + } +} + +func TestCreateStream_DefaultConfig(t *testing.T) { + testCreateStreamInboundStreamResourceLimits(t, baseCreateStreamInboundStreamResourceLimitConfig()) +} + +func TestCreateStream_MinPeerLimit(t *testing.T) { + base := baseCreateStreamInboundStreamResourceLimitConfig() + base.maxInboundPeerStream = 1 + testCreateStreamInboundStreamResourceLimits(t, base) +} + +func TestCreateStream_MaxPeerLimit(t *testing.T) { + base := baseCreateStreamInboundStreamResourceLimitConfig() + base.maxInboundPeerStream = math.MaxInt + testCreateStreamInboundStreamResourceLimits(t, base) +} + +func TestCreateStream_MinProtocolLimit(t *testing.T) { + base := baseCreateStreamInboundStreamResourceLimitConfig() + base.maxInboundStreamProtocol = 1 + testCreateStreamInboundStreamResourceLimits(t, base) +} + +func TestCreateStream_MaxProtocolLimit(t *testing.T) { + base := baseCreateStreamInboundStreamResourceLimitConfig() + base.maxInboundStreamProtocol = math.MaxInt + testCreateStreamInboundStreamResourceLimits(t, base) +} + +func TestCreateStream_MinPeerProtocolLimit(t *testing.T) { + unittest.SkipUnless(t, + unittest.TEST_TODO, + "max inbound stream peer protocol is not preserved; can be partially due to count steam not counting inbound streams on a protocol") + base := baseCreateStreamInboundStreamResourceLimitConfig() + base.maxInboundStreamPeerProtocol = 1 + testCreateStreamInboundStreamResourceLimits(t, base) +} + +func TestCreateStream_MaxPeerProtocolLimit(t *testing.T) { + base := baseCreateStreamInboundStreamResourceLimitConfig() + base.maxInboundStreamPeerProtocol = math.MaxInt + testCreateStreamInboundStreamResourceLimits(t, base) +} + +func TestCreateStream_MinTransientLimit(t *testing.T) { + base := baseCreateStreamInboundStreamResourceLimitConfig() + base.maxInboundStreamTransient = 1 + testCreateStreamInboundStreamResourceLimits(t, base) +} + +func TestCreateStream_MaxTransientLimit(t *testing.T) { + base := baseCreateStreamInboundStreamResourceLimitConfig() + base.maxInboundStreamTransient = math.MaxInt + testCreateStreamInboundStreamResourceLimits(t, base) +} + +func TestCreateStream_MinSystemLimit(t *testing.T) { + base := baseCreateStreamInboundStreamResourceLimitConfig() + base.maxInboundStreamSystem = 1 + testCreateStreamInboundStreamResourceLimits(t, base) +} + +func TestCreateStream_MaxSystemLimit(t *testing.T) { + base := baseCreateStreamInboundStreamResourceLimitConfig() + base.maxInboundStreamSystem = math.MaxInt + testCreateStreamInboundStreamResourceLimits(t, base) +} + +func TestCreateStream_DefaultConfigWithUnknownProtocol(t *testing.T) { + unittest.SkipUnless(t, + unittest.TEST_TODO, + "limits are not enforced when using an unknown protocol ID") + base := baseCreateStreamInboundStreamResourceLimitConfig() + base.unknownProtocol = true + testCreateStreamInboundStreamResourceLimits(t, base) +} + +func TestCreateStream_PeerLimitLessThanPeerProtocolLimit(t *testing.T) { + // the case where peer-level limit is lower than the peer-protocol-level limit. + base := baseCreateStreamInboundStreamResourceLimitConfig() + base.maxInboundPeerStream = 5 // each peer can only create 5 streams. + base.maxInboundStreamPeerProtocol = 10 // each peer can create 10 streams on a specific protocol (but should still be limited by the peer-level limit). + testCreateStreamInboundStreamResourceLimits(t, base) +} + +func TestCreateStream_PeerLimitGreaterThanPeerProtocolLimit(t *testing.T) { + // the case where peer-level limit is higher than the peer-protocol-level limit. + unittest.SkipUnless(t, + unittest.TEST_TODO, + "max inbound stream peer protocol is not preserved; can be partially due to count steam not counting inbound streams on a protocol") + base := baseCreateStreamInboundStreamResourceLimitConfig() + base.maxInboundPeerStream = 10 // each peer can create 10 streams. + base.maxInboundStreamPeerProtocol = 5 // each peer can create 5 streams on a specific protocol. + base.maxInboundStreamProtocol = 100 // overall limit is 100 streams on a specific protocol (across all peers). + base.maxInboundStreamTransient = 1000 // overall limit is 1000 transient streams. + base.maxInboundStreamSystem = 1000 // overall limit is 1000 system-wide streams. + testCreateStreamInboundStreamResourceLimits(t, base) +} + +func TestCreateStream_ProtocolLimitLessThanPeerProtocolLimit(t *testing.T) { + // the case where protocol-level limit is lower than the peer-protocol-level limit. + base := baseCreateStreamInboundStreamResourceLimitConfig() + base.maxInboundStreamProtocol = 5 // each peer can create 5 streams on a specific protocol. + base.maxInboundStreamPeerProtocol = 10 // each peer can create 10 streams on a specific protocol (but should still be limited by the protocol-level limit). + testCreateStreamInboundStreamResourceLimits(t, base) +} + +func TestCreateStream_ProtocolLimitGreaterThanPeerProtocolLimit(t *testing.T) { + // the case where protocol-level limit is higher than the peer-protocol-level limit. + base := baseCreateStreamInboundStreamResourceLimitConfig() + base.maxInboundStreamProtocol = 10 // overall limit is 10 streams on a specific protocol (across all peers). + base.maxInboundStreamPeerProtocol = 5 // each peer can create 5 streams on a specific protocol. + base.maxInboundStreamTransient = 1000 // overall limit is 1000 transient streams. + base.maxInboundStreamSystem = 1000 // overall limit is 1000 system-wide streams. + testCreateStreamInboundStreamResourceLimits(t, base) +} + +func TestCreateStream_TransientLimitLessThanPeerProtocolLimit(t *testing.T) { + // the case where transient-level limit is lower than the peer-protocol-level limit. + base := baseCreateStreamInboundStreamResourceLimitConfig() + base.maxInboundStreamTransient = 5 // overall limit is 5 transient streams (across all peers). + base.maxInboundStreamPeerProtocol = 10 // each peer can create 10 streams on a specific protocol (but should still be limited by the transient-level limit). + testCreateStreamInboundStreamResourceLimits(t, base) } // TestCreateStream_SystemStreamLimit_NotEnforced is a re-production of a hypothetical bug where the system-wide inbound stream limit of libp2p resource management @@ -219,13 +351,19 @@ func testCreateStreamInboundStreamResourceLimits(t *testing.T, cfg *testPeerLimi p2ptest.LetNodesDiscoverEachOther(t, signalerCtx, nodes, ids) var allStreamsCreated sync.WaitGroup - defaultProtocolID := protocols.FlowProtocolID(sporkID) - // creates max(maxInboundPeerStream * nodeCount, maxInboundStreamSystem) streams from each sender to the receiver; breaks as soon as the system-wide limit is reached. - totalStreamsCreated := int64(0) // total number of streams successfully created. + protocolID := protocols.FlowProtocolID(sporkID) + if cfg.unknownProtocol { + protocolID = protocols.FlowProtocolID(unittest.IdentifierFixture()) + } + + // creates max(maxInboundStreamPeerProtocol * nodeCount, maxInboundStreamSystem) streams from each sender to the receiver; breaks as soon as the system-wide limit is reached. totalStreamAttempted := int64(0) // total number of stream creation attempts. + + streamListMu := sync.Mutex{} // mutex to protect the streamsList. + streamsList := make([]network.Stream, 0) // list of all streams created to avoid garbage collection. for sIndex := range senders { - for i := int64(0); i < int64(cfg.maxInboundPeerStream); i++ { + for i := int64(0); i < int64(cfg.maxInboundStreamPeerProtocol); i++ { totalStreamAttempted++ if totalStreamAttempted >= int64(cfg.maxInboundStreamSystem) { // we reached the system-wide limit; no need to create more streams; as stream creation may fail; we re-examine pressure on system-wide limit later. @@ -235,9 +373,15 @@ func testCreateStreamInboundStreamResourceLimits(t *testing.T, cfg *testPeerLimi go func(sIndex int) { defer allStreamsCreated.Done() sender := senders[sIndex] - _, err := sender.Host().NewStream(ctx, receiver.ID(), defaultProtocolID) - require.NoError(t, err, "error creating stream") - atomic.AddInt64(&totalStreamsCreated, 1) + s, err := sender.Host().NewStream(ctx, receiver.ID(), protocolID) + if err != nil { + return + } + + require.NotNil(t, s) + streamListMu.Lock() + streamsList = append(streamsList, s) + streamListMu.Unlock() }(sIndex) } } @@ -245,11 +389,11 @@ func testCreateStreamInboundStreamResourceLimits(t *testing.T, cfg *testPeerLimi unittest.RequireReturnsBefore(t, allStreamsCreated.Wait, 2*time.Second, "could not create streams on time") require.NoError(t, resourceManagerRcv.ViewTransient(func(scope network.ResourceScope) error { - // number of in-transient streams must be less than the max transient limit - require.Less(t, int64(scope.Stat().NumStreamsInbound), int64(cfg.maxInboundStreamTransient)) + // number of in-transient streams must be less than or equal to the max transient limit + require.LessOrEqual(t, int64(scope.Stat().NumStreamsInbound), int64(cfg.maxInboundStreamTransient)) // number of in-transient streams must be less than or equal the total number of streams created. - require.LessOrEqual(t, int64(scope.Stat().NumStreamsInbound), int64(totalStreamsCreated)) + require.LessOrEqual(t, int64(scope.Stat().NumStreamsInbound), int64(len(streamsList))) // t.Logf("transient scope; inbound stream count %d; inbound connections; %d", scope.Stat().NumStreamsInbound, scope.Stat().NumConnsInbound) return nil })) @@ -279,7 +423,7 @@ func testCreateStreamInboundStreamResourceLimits(t *testing.T, cfg *testPeerLimi defer allStreamsCreated.Done() sender := senders[sIndex] // we don't care about the error here; as we are trying to create more streams than the system-wide limit; so we expect some of the stream creations to fail. - _, _ = sender.Host().NewStream(ctx, receiver.ID(), defaultProtocolID) + _, _ = sender.Host().NewStream(ctx, receiver.ID(), protocolID) }(sIndex) } } @@ -288,8 +432,7 @@ func testCreateStreamInboundStreamResourceLimits(t *testing.T, cfg *testPeerLimi totalInboundStreams = 0 for _, sender := range senders { - actualNumOfStreams := p2putils.CountStream(receiver.Host(), sender.ID(), p2putils.Direction(network.DirInbound)) - // t.Logf("sender %d has %d streams", i, actualNumOfStreams) + actualNumOfStreams := p2putils.CountStream(receiver.Host(), sender.ID(), p2putils.Direction(network.DirInbound), p2putils.Protocol("")) require.LessOrEqual(t, actualNumOfStreams, cfg.maxInboundPeerStream) require.LessOrEqual(t, actualNumOfStreams, cfg.maxInboundStreamPeerProtocol) totalInboundStreams += actualNumOfStreams @@ -299,36 +442,4 @@ func testCreateStreamInboundStreamResourceLimits(t *testing.T, cfg *testPeerLimi // Failing at this line means the system-wide limit is not being enforced. require.LessOrEqual(t, totalInboundStreams, cfg.maxInboundStreamSystem) require.LessOrEqual(t, totalInboundStreams, cfg.maxInboundStreamTransient) - - // t.Log("-----") - // total := 0 - // for i, sender := range senders { - // actualNumOfStreams := p2putils.CountStream(receiver.Host(), sender.ID(), p2putils.Direction(network.DirInbound)) - // t.Logf("sender %d has %d streams", i, actualNumOfStreams) - // // require.Equalf(t, - // // int64(0), - // // int64(actualNumOfStreams), - // // "expected to create %d number of streams got %d", - // // int64(0), - // // actualNumOfStreams) - // total += actualNumOfStreams - // } - // - // require.NoError(t, resourceManagerRcv.ViewTransient(func(scope network.ResourceScope) error { - // t.Logf("transient scope; inbound stream count %d; inbound connections; %d", scope.Stat().NumStreamsInbound, scope.Stat().NumConnsInbound) - // return nil - // })) - // - // require.NoError(t, resourceManagerRcv.ViewProtocol(defaultProtocolID, func(scope network.ProtocolScope) error { - // t.Logf("protocol scope; inbound stream count %d; inbound connections; %d", scope.Stat().NumStreamsInbound, scope.Stat().NumConnsInbound) - // return nil - // })) - // - // require.NoError(t, resourceManagerRcv.ViewSystem(func(scope network.ResourceScope) error { - // t.Logf("system scope; inbound stream count %d; inbound connections; %d", scope.Stat().NumStreamsInbound, scope.Stat().NumConnsInbound) - // return nil - // })) - // - // t.Logf("total streams %d", total) - // } } From f501b7287a23fb181d29d29f0a46fe1ac062eed7 Mon Sep 17 00:00:00 2001 From: Yahya Hassanzadeh Date: Thu, 19 Oct 2023 15:32:07 -0700 Subject: [PATCH 08/29] cleans up the tests --- network/p2p/p2pnode/resourceManager_test.go | 95 ++++++++------------- 1 file changed, 35 insertions(+), 60 deletions(-) diff --git a/network/p2p/p2pnode/resourceManager_test.go b/network/p2p/p2pnode/resourceManager_test.go index b729b5f6892..d7ca60bee40 100644 --- a/network/p2p/p2pnode/resourceManager_test.go +++ b/network/p2p/p2pnode/resourceManager_test.go @@ -112,17 +112,20 @@ type testPeerLimitConfig struct { // maxLimit returns the maximum limit across all limits. func (t testPeerLimitConfig) maxLimit() int { - max := t.maxInboundPeerStream - if t.maxInboundStreamProtocol > max { + max := 0 + if t.maxInboundPeerStream > max && t.maxInboundPeerStream != math.MaxInt { + max = t.maxInboundPeerStream + } + if t.maxInboundStreamProtocol > max && t.maxInboundStreamProtocol != math.MaxInt { max = t.maxInboundStreamProtocol } - if t.maxInboundStreamPeerProtocol > max { + if t.maxInboundStreamPeerProtocol > max && t.maxInboundStreamPeerProtocol != math.MaxInt { max = t.maxInboundStreamPeerProtocol } - if t.maxInboundStreamTransient > max { + if t.maxInboundStreamTransient > max && t.maxInboundStreamTransient != math.MaxInt { max = t.maxInboundStreamTransient } - if t.maxInboundStreamSystem > max { + if t.maxInboundStreamSystem > max && t.maxInboundStreamSystem != math.MaxInt { max = t.maxInboundStreamSystem } return max @@ -157,6 +160,9 @@ func TestCreateStream_MaxPeerLimit(t *testing.T) { } func TestCreateStream_MinProtocolLimit(t *testing.T) { + unittest.SkipUnless(t, + unittest.TEST_TODO, + "max inbound stream protocol is not preserved; can be partially due to count steam not counting inbound streams on a protocol") base := baseCreateStreamInboundStreamResourceLimitConfig() base.maxInboundStreamProtocol = 1 testCreateStreamInboundStreamResourceLimits(t, base) @@ -202,6 +208,9 @@ func TestCreateStream_MinSystemLimit(t *testing.T) { } func TestCreateStream_MaxSystemLimit(t *testing.T) { + unittest.SkipUnless(t, + unittest.TEST_TODO, + "max inbound stream protocol is not preserved; can be partially due to count steam not counting inbound streams on a protocol") base := baseCreateStreamInboundStreamResourceLimitConfig() base.maxInboundStreamSystem = math.MaxInt testCreateStreamInboundStreamResourceLimits(t, base) @@ -239,6 +248,9 @@ func TestCreateStream_PeerLimitGreaterThanPeerProtocolLimit(t *testing.T) { } func TestCreateStream_ProtocolLimitLessThanPeerProtocolLimit(t *testing.T) { + unittest.SkipUnless(t, + unittest.TEST_TODO, + "max inbound stream peer protocol is not preserved; can be partially due to count steam not counting inbound streams on a protocol") // the case where protocol-level limit is lower than the peer-protocol-level limit. base := baseCreateStreamInboundStreamResourceLimitConfig() base.maxInboundStreamProtocol = 5 // each peer can create 5 streams on a specific protocol. @@ -264,22 +276,13 @@ func TestCreateStream_TransientLimitLessThanPeerProtocolLimit(t *testing.T) { testCreateStreamInboundStreamResourceLimits(t, base) } -// TestCreateStream_SystemStreamLimit_NotEnforced is a re-production of a hypothetical bug where the system-wide inbound stream limit of libp2p resource management -// was not being enforced. The purpose of this test is to share with the libp2p community as well as to evaluate the existence of the bug on -// future libp2p versions. -// Test scenario works as follows: -// - We have 30 senders and 1 receiver. -// - The senders are running with a resource manager that allows infinite number of streams; so that they can create as many streams as they want. -// - The receiver is running with a resource manager with base limits and no scaling. -// - The test reads the peer protocol default limits for inbound streams at receiver; say x; which is the limit for the number of inbound streams from each sender on a -// specific protocol. -// - Each sender creates x-1 streams to the receiver on a specific protocol. This is done to ensure that the receiver has x-1 streams from each sender; a total of -// 30*(x-1) streams at the receiver. -// - Test first ensures that numerically 30 * (x - 1) > max system-wide inbound stream limit; i.e., the total number of streams created by all senders is greater than -// the system-wide limit. -// - Then each sender creates x - 1 streams concurrently to the receiver. -// - At the end of the test we ensure that the total number of streams created by all senders is greater than the system-wide limit; which should not be the case if the -// system-wide limit is being enforced. +// testCreateStreamInboundStreamResourceLimits tests the inbound stream limits for a given testPeerLimitConfig. It creates +// a number of senders and a single receiver. The receiver will have a resource manager with the given limits. +// The senders will have a resource manager with infinite limits to ensure that they can create as many streams as they want. +// The test will create a number of streams from each sender to the receiver. The test will then check that the limits are +// being enforced correctly. +// The number of streams is determined by the maxLimit() of the testPeerLimitConfig, which is the maximum limit across all limits (peer, protocol, transient, system), excluding +// the math.MaxInt limits. func testCreateStreamInboundStreamResourceLimits(t *testing.T, cfg *testPeerLimitConfig) { idProvider := mockmodule.NewIdentityProvider(t) ctx, cancel := context.WithCancel(context.Background()) @@ -357,24 +360,20 @@ func testCreateStreamInboundStreamResourceLimits(t *testing.T, cfg *testPeerLimi protocolID = protocols.FlowProtocolID(unittest.IdentifierFixture()) } - // creates max(maxInboundStreamPeerProtocol * nodeCount, maxInboundStreamSystem) streams from each sender to the receiver; breaks as soon as the system-wide limit is reached. - totalStreamAttempted := int64(0) // total number of stream creation attempts. + loadLimit := cfg.maxLimit() + require.Greaterf(t, loadLimit, 0, "test limit must be greater than 0; got %d", loadLimit) streamListMu := sync.Mutex{} // mutex to protect the streamsList. streamsList := make([]network.Stream, 0) // list of all streams created to avoid garbage collection. for sIndex := range senders { - for i := int64(0); i < int64(cfg.maxInboundStreamPeerProtocol); i++ { - totalStreamAttempted++ - if totalStreamAttempted >= int64(cfg.maxInboundStreamSystem) { - // we reached the system-wide limit; no need to create more streams; as stream creation may fail; we re-examine pressure on system-wide limit later. - break - } + for i := 0; i < loadLimit; i++ { allStreamsCreated.Add(1) go func(sIndex int) { defer allStreamsCreated.Done() sender := senders[sIndex] s, err := sender.Host().NewStream(ctx, receiver.ID(), protocolID) if err != nil { + // we don't care about the error here; as we are trying to break a limit; so we expect some of the stream creations to fail. return } @@ -388,58 +387,34 @@ func testCreateStreamInboundStreamResourceLimits(t *testing.T, cfg *testPeerLimi unittest.RequireReturnsBefore(t, allStreamsCreated.Wait, 2*time.Second, "could not create streams on time") + // transient sanity-check require.NoError(t, resourceManagerRcv.ViewTransient(func(scope network.ResourceScope) error { // number of in-transient streams must be less than or equal to the max transient limit require.LessOrEqual(t, int64(scope.Stat().NumStreamsInbound), int64(cfg.maxInboundStreamTransient)) // number of in-transient streams must be less than or equal the total number of streams created. require.LessOrEqual(t, int64(scope.Stat().NumStreamsInbound), int64(len(streamsList))) - // t.Logf("transient scope; inbound stream count %d; inbound connections; %d", scope.Stat().NumStreamsInbound, scope.Stat().NumConnsInbound) return nil })) + // system-wide limit sanity-check require.NoError(t, resourceManagerRcv.ViewSystem(func(scope network.ResourceScope) error { - t.Logf("system scope; inbound stream count %d; inbound connections; %d", scope.Stat().NumStreamsInbound, scope.Stat().NumConnsInbound) + require.LessOrEqual(t, int64(scope.Stat().NumStreamsInbound), int64(cfg.maxInboundStreamSystem), "system-wide limit is not being enforced") return nil })) totalInboundStreams := 0 for _, sender := range senders { actualNumOfStreams := p2putils.CountStream(receiver.Host(), sender.ID(), p2putils.Direction(network.DirInbound)) - // t.Logf("sender %d has %d streams", i, actualNumOfStreams) + // number of inbound streams must be less than or equal to the peer-level limit for each sender. require.LessOrEqual(t, int64(actualNumOfStreams), int64(cfg.maxInboundPeerStream)) + require.LessOrEqual(t, int64(actualNumOfStreams), int64(cfg.maxInboundStreamPeerProtocol)) totalInboundStreams += actualNumOfStreams } // sanity check; the total number of inbound streams must be less than or equal to the system-wide limit. // TODO: this must be a hard equal check; but falls short; to be shared with libp2p community. // Failing at this line means the system-wide limit is not being enforced. require.LessOrEqual(t, totalInboundStreams, cfg.maxInboundStreamSystem) - - // now the stress testing with each sender making `maxInboundStreamSystem` concurrent streams to the receiver. - for sIndex := range senders { - for i := int64(0); i < int64(cfg.maxInboundStreamSystem); i++ { - allStreamsCreated.Add(1) - go func(sIndex int) { - defer allStreamsCreated.Done() - sender := senders[sIndex] - // we don't care about the error here; as we are trying to create more streams than the system-wide limit; so we expect some of the stream creations to fail. - _, _ = sender.Host().NewStream(ctx, receiver.ID(), protocolID) - }(sIndex) - } - } - - unittest.RequireReturnsBefore(t, allStreamsCreated.Wait, 2*time.Second, "could not create (stress-testing) streams on time") - - totalInboundStreams = 0 - for _, sender := range senders { - actualNumOfStreams := p2putils.CountStream(receiver.Host(), sender.ID(), p2putils.Direction(network.DirInbound), p2putils.Protocol("")) - require.LessOrEqual(t, actualNumOfStreams, cfg.maxInboundPeerStream) - require.LessOrEqual(t, actualNumOfStreams, cfg.maxInboundStreamPeerProtocol) - totalInboundStreams += actualNumOfStreams - } - // sanity check; the total number of inbound streams must be less than or equal to the system-wide limit. - // TODO: this must be a hard equal check; but falls short; to be shared with libp2p community. - // Failing at this line means the system-wide limit is not being enforced. - require.LessOrEqual(t, totalInboundStreams, cfg.maxInboundStreamSystem) - require.LessOrEqual(t, totalInboundStreams, cfg.maxInboundStreamTransient) + // sanity check; the total number of inbound streams must be less than or equal to the protocol-level limit. + require.LessOrEqual(t, totalInboundStreams, cfg.maxInboundStreamProtocol) } From 113e50aa79c9aeb13b2ec30de8e506ff653f068d Mon Sep 17 00:00:00 2001 From: Yahya Hassanzadeh Date: Thu, 19 Oct 2023 16:04:48 -0700 Subject: [PATCH 09/29] wires config --- config/default-config.yml | 13 +++ network/netconf/flags.go | 131 +++++++++++++++++++++++-------- network/p2p/p2pconf/gossipsub.go | 26 +++++- 3 files changed, 136 insertions(+), 34 deletions(-) diff --git a/config/default-config.yml b/config/default-config.yml index ba771885a1b..1809474485f 100644 --- a/config/default-config.yml +++ b/config/default-config.yml @@ -58,6 +58,19 @@ network-config: # Without this limit peers can end up in a state where there exists n number of connections per peer which # can lead to resource exhaustion of the libp2p node. libp2p-peer-base-limits-conns-inbound: 1 + # maximum number of inbound system-wide limit for streams, across all peers and protocols + # Note that streams are ephemeral and are created and destroyed intermittently. + libp2p-inbound-stream-limit-system: 15_000 + # maximum number of inbound transient limit for streams, across all streams that are not yet fully opened and associated with a protocol. + # Note that streams are ephemeral and are created and destroyed intermittently. + libp2p-inbound-stream-limit-transient: 15_000 + # maximum number of inbound limit for streams for each protocol across all peers; this is a per-protocol limit. We expect at least + # three protocols per node; gossipsub, unicast, and dht. Note that streams are ephemeral and are created and destroyed intermittently. + libp2p-inbound-stream-limit-protocol: 5000 + # maximum number of inbound streams from each peer across all protocols. + libp2p-inbound-stream-limit-peer: 1000 + # maximum number of inbound streams from each peer for each protocol. + libp2p-inbound-stream-limit-protocol-peer: 500 # Connection manager config # HighWatermark and LowWatermark govern the number of connections are maintained by the ConnManager. # When the peer count exceeds the HighWatermark, as many peers will be pruned (and diff --git a/network/netconf/flags.go b/network/netconf/flags.go index 5dad90c65bd..9c53ca6c348 100644 --- a/network/netconf/flags.go +++ b/network/netconf/flags.go @@ -33,9 +33,15 @@ const ( bandwidthRateLimit = "unicast-bandwidth-rate-limit" bandwidthBurstLimit = "unicast-bandwidth-burst-limit" // resource manager config - memoryLimitRatio = "libp2p-memory-limit-ratio" - fileDescriptorsRatio = "libp2p-file-descriptors-ratio" - peerBaseLimitConnsInbound = "libp2p-peer-base-limits-conns-inbound" + memoryLimitRatio = "libp2p-memory-limit-ratio" + fileDescriptorsRatio = "libp2p-file-descriptors-ratio" + peerBaseLimitConnsInbound = "libp2p-peer-base-limits-conns-inbound" + inboundStreamLimitSystem = "libp2p-inbound-stream-limit-system" + inboundStreamLimitPeer = "libp2p-inbound-stream-limit-peer" + inboundStreamLimitProtocol = "libp2p-inbound-stream-limit-protocol" + inboundStreamLimitProtocolPeer = "libp2p-inbound-stream-limit-protocol-peer" + inboundStreamLimitTransient = "libp2p-inbound-stream-limit-transient" + // connection manager highWatermark = "libp2p-high-watermark" lowWatermark = "libp2p-low-watermark" @@ -101,6 +107,11 @@ func AllFlagNames() []string { memoryLimitRatio, fileDescriptorsRatio, peerBaseLimitConnsInbound, + inboundStreamLimitSystem, + inboundStreamLimitPeer, + inboundStreamLimitProtocol, + inboundStreamLimitProtocolPeer, + inboundStreamLimitTransient, highWatermark, lowWatermark, gracePeriod, @@ -148,28 +159,52 @@ func InitializeNetworkFlags(flags *pflag.FlagSet, config *Config) { flags.Duration(peerUpdateInterval, config.PeerUpdateInterval, "how often to refresh the peer connections for the node") flags.Duration(unicastMessageTimeout, config.UnicastMessageTimeout, "how long a unicast transmission can take to complete") // unicast manager options - flags.Duration(unicastCreateStreamRetryDelay, config.UnicastConfig.CreateStreamBackoffDelay, "initial backoff delay between failing to establish a connection with another node and retrying, "+ - "this delay increases exponentially with the number of subsequent failures to establish a connection.") - flags.Duration(unicastDialBackoffDelay, config.UnicastConfig.DialInProgressBackoffDelay, "initial backoff delay between failing to establish a connection with another node and retrying, "+ - "this delay increases exponentially with the number of subsequent failures to establish a connection.") - flags.Duration(unicastDialInProgressBackoffDelay, config.UnicastConfig.DialInProgressBackoffDelay, "initial backoff delay for concurrent stream creations to a remote peer when there is no exising connection and a dial is in progress. "+ - "this delay increases exponentially with the number of subsequent failure attempts") - flags.Uint64(unicastStreamZeroRetryResetThreshold, config.UnicastConfig.StreamZeroRetryResetThreshold, "reset stream creation retry budget from zero to the maximum after consecutive successful streams reach this threshold.") - flags.Duration(unicastDialZeroRetryResetThreshold, config.UnicastConfig.DialZeroRetryResetThreshold, "reset dial retry budget if the last successful dial is longer than this threshold.") + flags.Duration(unicastCreateStreamRetryDelay, + config.UnicastConfig.CreateStreamBackoffDelay, + "initial backoff delay between failing to establish a connection with another node and retrying, "+ + "this delay increases exponentially with the number of subsequent failures to establish a connection.") + flags.Duration(unicastDialBackoffDelay, + config.UnicastConfig.DialInProgressBackoffDelay, + "initial backoff delay between failing to establish a connection with another node and retrying, "+ + "this delay increases exponentially with the number of subsequent failures to establish a connection.") + flags.Duration(unicastDialInProgressBackoffDelay, + config.UnicastConfig.DialInProgressBackoffDelay, + "initial backoff delay for concurrent stream creations to a remote peer when there is no exising connection and a dial is in progress. "+ + "this delay increases exponentially with the number of subsequent failure attempts") + flags.Uint64(unicastStreamZeroRetryResetThreshold, + config.UnicastConfig.StreamZeroRetryResetThreshold, + "reset stream creation retry budget from zero to the maximum after consecutive successful streams reach this threshold.") + flags.Duration(unicastDialZeroRetryResetThreshold, + config.UnicastConfig.DialZeroRetryResetThreshold, + "reset dial retry budget if the last successful dial is longer than this threshold.") flags.Uint64(unicastMaxDialRetryAttemptTimes, config.UnicastConfig.MaxDialRetryAttemptTimes, "maximum attempts to establish a unicast connection.") flags.Uint64(unicastMaxStreamCreationRetryAttemptTimes, config.UnicastConfig.MaxStreamCreationRetryAttemptTimes, "max attempts to create a unicast stream.") - flags.Uint32(unicastDialConfigCacheSize, config.UnicastConfig.DialConfigCacheSize, "cache size of the dial config cache, recommended to be big enough to accommodate the entire nodes in the network.") + flags.Uint32(unicastDialConfigCacheSize, + config.UnicastConfig.DialConfigCacheSize, + "cache size of the dial config cache, recommended to be big enough to accommodate the entire nodes in the network.") // unicast stream handler rate limits flags.Int(messageRateLimit, config.UnicastConfig.UnicastRateLimitersConfig.MessageRateLimit, "maximum number of unicast messages that a peer can send per second") - flags.Int(bandwidthRateLimit, config.UnicastConfig.UnicastRateLimitersConfig.BandwidthRateLimit, "bandwidth size in bytes a peer is allowed to send via unicast streams per second") + flags.Int(bandwidthRateLimit, + config.UnicastConfig.UnicastRateLimitersConfig.BandwidthRateLimit, + "bandwidth size in bytes a peer is allowed to send via unicast streams per second") flags.Int(bandwidthBurstLimit, config.UnicastConfig.UnicastRateLimitersConfig.BandwidthBurstLimit, "bandwidth size in bytes a peer is allowed to send at one time") - flags.Duration(lockoutDuration, config.UnicastConfig.UnicastRateLimitersConfig.LockoutDuration, "the number of seconds a peer will be forced to wait before being allowed to successful reconnect to the node after being rate limited") + flags.Duration(lockoutDuration, + config.UnicastConfig.UnicastRateLimitersConfig.LockoutDuration, + "the number of seconds a peer will be forced to wait before being allowed to successful reconnect to the node after being rate limited") flags.Bool(dryRun, config.UnicastConfig.UnicastRateLimitersConfig.DryRun, "disable peer disconnects and connections gating when rate limiting peers") // resource manager cli flags flags.Float64(fileDescriptorsRatio, config.ResourceManagerConfig.FileDescriptorsRatio, "ratio of available file descriptors to be used by libp2p (in (0,1])") flags.Float64(memoryLimitRatio, config.ResourceManagerConfig.MemoryLimitRatio, "ratio of available memory to be used by libp2p (in (0,1])") flags.Int(peerBaseLimitConnsInbound, config.ResourceManagerConfig.PeerBaseLimitConnsInbound, "the maximum amount of allowed inbound connections per peer") + flags.Int(inboundStreamLimitSystem, config.ResourceManagerConfig.InboundStream.System, "the system-wide limit on the number of inbound streams") + flags.Int(inboundStreamLimitPeer, config.ResourceManagerConfig.InboundStream.Peer, "the limit on the number of inbound streams per peer (over all protocols)") + flags.Int(inboundStreamLimitProtocol, config.ResourceManagerConfig.InboundStream.Protocol, "the limit on the number of inbound streams per protocol (over all peers)") + flags.Int(inboundStreamLimitProtocolPeer, config.ResourceManagerConfig.InboundStream.ProtocolPeer, "the limit on the number of inbound streams per protocol per peer") + flags.Int(inboundStreamLimitTransient, + config.ResourceManagerConfig.InboundStream.Transient, + "the transient limit on the number of inbound streams (applied to streams that are not associated with a peer or protocol yet)") + // connection manager flags.Int(lowWatermark, config.ConnectionManagerConfig.LowWatermark, "low watermarking for libp2p connection manager") flags.Int(highWatermark, config.ConnectionManagerConfig.HighWatermark, "high watermarking for libp2p connection manager") @@ -182,30 +217,64 @@ func InitializeNetworkFlags(flags *pflag.FlagSet, config *Config) { flags.Uint32(rpcSentTrackerQueueCacheSize, config.GossipSubConfig.RPCSentTrackerQueueCacheSize, "cache size of the rpc sent tracker worker queue.") flags.Int(rpcSentTrackerNumOfWorkers, config.GossipSubConfig.RpcSentTrackerNumOfWorkers, "number of workers for the rpc sent tracker worker pool.") // gossipsub RPC control message validation limits used for validation configuration and rate limiting - flags.Int(validationInspectorNumberOfWorkers, config.GossipSubConfig.GossipSubRPCInspectorsConfig.GossipSubRPCValidationInspectorConfigs.NumberOfWorkers, "number of gossupsub RPC control message validation inspector component workers") - flags.Uint32(validationInspectorInspectMessageQueueCacheSize, config.GossipSubConfig.GossipSubRPCInspectorsConfig.GossipSubRPCValidationInspectorConfigs.CacheSize, "cache size for gossipsub RPC validation inspector events worker pool queue.") - flags.Uint32(validationInspectorClusterPrefixedTopicsReceivedCacheSize, config.GossipSubConfig.GossipSubRPCInspectorsConfig.GossipSubRPCValidationInspectorConfigs.ClusterPrefixedControlMsgsReceivedCacheSize, "cache size for gossipsub RPC validation inspector cluster prefix received tracker.") - flags.Float64(validationInspectorClusterPrefixedTopicsReceivedCacheDecay, config.GossipSubConfig.GossipSubRPCInspectorsConfig.GossipSubRPCValidationInspectorConfigs.ClusterPrefixedControlMsgsReceivedCacheDecay, "the decay value used to decay cluster prefix received topics received cached counters.") - flags.Float64(validationInspectorClusterPrefixHardThreshold, config.GossipSubConfig.GossipSubRPCInspectorsConfig.GossipSubRPCValidationInspectorConfigs.ClusterPrefixHardThreshold, "the maximum number of cluster-prefixed control messages allowed to be processed when the active cluster id is unset or a mismatch is detected, exceeding this threshold will result in node penalization by gossipsub.") + flags.Int(validationInspectorNumberOfWorkers, + config.GossipSubConfig.GossipSubRPCInspectorsConfig.GossipSubRPCValidationInspectorConfigs.NumberOfWorkers, + "number of gossupsub RPC control message validation inspector component workers") + flags.Uint32(validationInspectorInspectMessageQueueCacheSize, + config.GossipSubConfig.GossipSubRPCInspectorsConfig.GossipSubRPCValidationInspectorConfigs.CacheSize, + "cache size for gossipsub RPC validation inspector events worker pool queue.") + flags.Uint32(validationInspectorClusterPrefixedTopicsReceivedCacheSize, + config.GossipSubConfig.GossipSubRPCInspectorsConfig.GossipSubRPCValidationInspectorConfigs.ClusterPrefixedControlMsgsReceivedCacheSize, + "cache size for gossipsub RPC validation inspector cluster prefix received tracker.") + flags.Float64(validationInspectorClusterPrefixedTopicsReceivedCacheDecay, + config.GossipSubConfig.GossipSubRPCInspectorsConfig.GossipSubRPCValidationInspectorConfigs.ClusterPrefixedControlMsgsReceivedCacheDecay, + "the decay value used to decay cluster prefix received topics received cached counters.") + flags.Float64(validationInspectorClusterPrefixHardThreshold, + config.GossipSubConfig.GossipSubRPCInspectorsConfig.GossipSubRPCValidationInspectorConfigs.ClusterPrefixHardThreshold, + "the maximum number of cluster-prefixed control messages allowed to be processed when the active cluster id is unset or a mismatch is detected, exceeding this threshold will result in node penalization by gossipsub.") // gossipsub RPC control message metrics observer inspector configuration - flags.Int(metricsInspectorNumberOfWorkers, config.GossipSubConfig.GossipSubRPCInspectorsConfig.GossipSubRPCMetricsInspectorConfigs.NumberOfWorkers, "cache size for gossipsub RPC metrics inspector events worker pool queue.") - flags.Uint32(metricsInspectorCacheSize, config.GossipSubConfig.GossipSubRPCInspectorsConfig.GossipSubRPCMetricsInspectorConfigs.CacheSize, "cache size for gossipsub RPC metrics inspector events worker pool.") + flags.Int(metricsInspectorNumberOfWorkers, + config.GossipSubConfig.GossipSubRPCInspectorsConfig.GossipSubRPCMetricsInspectorConfigs.NumberOfWorkers, + "cache size for gossipsub RPC metrics inspector events worker pool queue.") + flags.Uint32(metricsInspectorCacheSize, + config.GossipSubConfig.GossipSubRPCInspectorsConfig.GossipSubRPCMetricsInspectorConfigs.CacheSize, + "cache size for gossipsub RPC metrics inspector events worker pool.") // networking event notifications - flags.Uint32(gossipSubRPCInspectorNotificationCacheSize, config.GossipSubConfig.GossipSubRPCInspectorsConfig.GossipSubRPCInspectorNotificationCacheSize, "cache size for notification events from gossipsub rpc inspector") + flags.Uint32(gossipSubRPCInspectorNotificationCacheSize, + config.GossipSubConfig.GossipSubRPCInspectorsConfig.GossipSubRPCInspectorNotificationCacheSize, + "cache size for notification events from gossipsub rpc inspector") // application layer spam prevention (alsp) protocol flags.Bool(alspDisabled, config.AlspConfig.DisablePenalty, "disable the penalty mechanism of the alsp protocol. default value (recommended) is false") flags.Uint32(alspSpamRecordCacheSize, config.AlspConfig.SpamRecordCacheSize, "size of spam record cache, recommended to be 10x the number of authorized nodes") flags.Uint32(alspSpamRecordQueueSize, config.AlspConfig.SpamReportQueueSize, "size of spam report queue, recommended to be 100x the number of authorized nodes") - flags.Duration(alspHearBeatInterval, config.AlspConfig.HearBeatInterval, "interval between two consecutive heartbeat events at alsp, recommended to leave it as default unless you know what you are doing.") + flags.Duration(alspHearBeatInterval, + config.AlspConfig.HearBeatInterval, + "interval between two consecutive heartbeat events at alsp, recommended to leave it as default unless you know what you are doing.") - flags.Int(ihaveMaxSampleSize, config.GossipSubConfig.GossipSubRPCInspectorsConfig.GossipSubRPCValidationInspectorConfigs.IHaveRPCInspectionConfig.MaxSampleSize, "max number of ihaves to sample when performing validation") - flags.Int(ihaveMaxMessageIDSampleSize, config.GossipSubConfig.GossipSubRPCInspectorsConfig.GossipSubRPCValidationInspectorConfigs.IHaveRPCInspectionConfig.MaxMessageIDSampleSize, "max number of message ids to sample when performing validation per ihave") - flags.Int(controlMessageMaxSampleSize, config.GossipSubConfig.GossipSubRPCInspectorsConfig.GossipSubRPCValidationInspectorConfigs.GraftPruneMessageMaxSampleSize, "max number of control messages to sample when performing validation on GRAFT and PRUNE message types") - flags.Uint(iwantMaxSampleSize, config.GossipSubConfig.GossipSubRPCInspectorsConfig.GossipSubRPCValidationInspectorConfigs.IWantRPCInspectionConfig.MaxSampleSize, "max number of iwants to sample when performing validation") - flags.Int(iwantMaxMessageIDSampleSize, config.GossipSubConfig.GossipSubRPCInspectorsConfig.GossipSubRPCValidationInspectorConfigs.IWantRPCInspectionConfig.MaxMessageIDSampleSize, "max number of message ids to sample when performing validation per iwant") - flags.Float64(iwantCacheMissThreshold, config.GossipSubConfig.GossipSubRPCInspectorsConfig.GossipSubRPCValidationInspectorConfigs.IWantRPCInspectionConfig.CacheMissThreshold, "max number of iwants to sample when performing validation") - flags.Int(iwantCacheMissCheckSize, config.GossipSubConfig.GossipSubRPCInspectorsConfig.GossipSubRPCValidationInspectorConfigs.IWantRPCInspectionConfig.CacheMissCheckSize, "the iWants size at which message id cache misses will be checked") - flags.Float64(iwantDuplicateMsgIDThreshold, config.GossipSubConfig.GossipSubRPCInspectorsConfig.GossipSubRPCValidationInspectorConfigs.IWantRPCInspectionConfig.DuplicateMsgIDThreshold, "max allowed duplicate message IDs in a single iWant control message") + flags.Int(ihaveMaxSampleSize, + config.GossipSubConfig.GossipSubRPCInspectorsConfig.GossipSubRPCValidationInspectorConfigs.IHaveRPCInspectionConfig.MaxSampleSize, + "max number of ihaves to sample when performing validation") + flags.Int(ihaveMaxMessageIDSampleSize, + config.GossipSubConfig.GossipSubRPCInspectorsConfig.GossipSubRPCValidationInspectorConfigs.IHaveRPCInspectionConfig.MaxMessageIDSampleSize, + "max number of message ids to sample when performing validation per ihave") + flags.Int(controlMessageMaxSampleSize, + config.GossipSubConfig.GossipSubRPCInspectorsConfig.GossipSubRPCValidationInspectorConfigs.GraftPruneMessageMaxSampleSize, + "max number of control messages to sample when performing validation on GRAFT and PRUNE message types") + flags.Uint(iwantMaxSampleSize, + config.GossipSubConfig.GossipSubRPCInspectorsConfig.GossipSubRPCValidationInspectorConfigs.IWantRPCInspectionConfig.MaxSampleSize, + "max number of iwants to sample when performing validation") + flags.Int(iwantMaxMessageIDSampleSize, + config.GossipSubConfig.GossipSubRPCInspectorsConfig.GossipSubRPCValidationInspectorConfigs.IWantRPCInspectionConfig.MaxMessageIDSampleSize, + "max number of message ids to sample when performing validation per iwant") + flags.Float64(iwantCacheMissThreshold, + config.GossipSubConfig.GossipSubRPCInspectorsConfig.GossipSubRPCValidationInspectorConfigs.IWantRPCInspectionConfig.CacheMissThreshold, + "max number of iwants to sample when performing validation") + flags.Int(iwantCacheMissCheckSize, + config.GossipSubConfig.GossipSubRPCInspectorsConfig.GossipSubRPCValidationInspectorConfigs.IWantRPCInspectionConfig.CacheMissCheckSize, + "the iWants size at which message id cache misses will be checked") + flags.Float64(iwantDuplicateMsgIDThreshold, + config.GossipSubConfig.GossipSubRPCInspectorsConfig.GossipSubRPCValidationInspectorConfigs.IWantRPCInspectionConfig.DuplicateMsgIDThreshold, + "max allowed duplicate message IDs in a single iWant control message") } // SetAliases this func sets an aliases for each CLI flag defined for network config overrides to it's corresponding diff --git a/network/p2p/p2pconf/gossipsub.go b/network/p2p/p2pconf/gossipsub.go index 683dff67fdc..62cde38ff73 100644 --- a/network/p2p/p2pconf/gossipsub.go +++ b/network/p2p/p2pconf/gossipsub.go @@ -8,9 +8,29 @@ import ( // The resource manager is used to limit the number of open connections and streams (as well as any other resources // used by libp2p) for each peer. type ResourceManagerConfig struct { - MemoryLimitRatio float64 `mapstructure:"libp2p-memory-limit-ratio"` // maximum allowed fraction of memory to be allocated by the libp2p resources in (0,1] - FileDescriptorsRatio float64 `mapstructure:"libp2p-file-descriptors-ratio"` // maximum allowed fraction of file descriptors to be allocated by the libp2p resources in (0,1] - PeerBaseLimitConnsInbound int `mapstructure:"libp2p-peer-base-limits-conns-inbound"` // the maximum amount of allowed inbound connections per peer + InboundStream InboundStreamLimit `mapstructure:",squash"` + MemoryLimitRatio float64 `mapstructure:"libp2p-memory-limit-ratio"` // maximum allowed fraction of memory to be allocated by the libp2p resources in (0,1] + FileDescriptorsRatio float64 `mapstructure:"libp2p-file-descriptors-ratio"` // maximum allowed fraction of file descriptors to be allocated by the libp2p resources in (0,1] + PeerBaseLimitConnsInbound int `mapstructure:"libp2p-peer-base-limits-conns-inbound"` // the maximum amount of allowed inbound connections per peer +} + +// InboundStreamLimit is the configuration for the inbound stream limit. The inbound stream limit is used to limit the +// number of inbound streams that can be opened by the node. +type InboundStreamLimit struct { + // the system-wide limit on the number of inbound streams + System int `validate:"gt=0" mapstructure:"libp2p-inbound-stream-limit-system"` + + // Transient is the transient limit on the number of inbound streams (applied to streams that are not associated with a peer or protocol yet) + Transient int `validate:"gt=0" mapstructure:"libp2p-inbound-stream-limit-transient"` + + // Protocol is the limit on the number of inbound streams per protocol (over all peers). + Protocol int `validate:"gt=0" mapstructure:"libp2p-inbound-stream-limit-protocol"` + + // Peer is the limit on the number of inbound streams per peer (over all protocols). + Peer int `validate:"gt=0" mapstructure:"libp2p-inbound-stream-limit-peer"` + + // ProtocolPeer is the limit on the number of inbound streams per protocol per peer. + ProtocolPeer int `validate:"gt=0" mapstructure:"libp2p-inbound-stream-limit-protocol-peer"` } // GossipSubConfig is the configuration for the GossipSub pubsub implementation. From 4745ab05793e4f04aefa60b405bd204eb40be9d6 Mon Sep 17 00:00:00 2001 From: Yahya Hassanzadeh Date: Thu, 19 Oct 2023 16:19:25 -0700 Subject: [PATCH 10/29] adds utility function for applying inbound limits --- network/p2p/p2pbuilder/utils.go | 61 +++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/network/p2p/p2pbuilder/utils.go b/network/p2p/p2pbuilder/utils.go index 2e05bbd5b84..232ea571d7d 100644 --- a/network/p2p/p2pbuilder/utils.go +++ b/network/p2p/p2pbuilder/utils.go @@ -10,6 +10,7 @@ import ( "github.com/onflow/flow-go/module" "github.com/onflow/flow-go/network/p2p" + "github.com/onflow/flow-go/network/p2p/p2pconf" "github.com/onflow/flow-go/network/p2p/p2plogging" ) @@ -123,3 +124,63 @@ func (l *limitConfigLogger) logPeerProtocolLimits(p map[protocol.ID]rcmgr.Resour lg.Info().Msg("protocol peer limits set") } } + +// ApplyInboundStreamLimits applies the inbound stream limits to the concrete limit config. The concrete limit config is assumed coming from scaling the +// base limit config by the scaling factor. The inbound stream limits are applied to the concrete limit config if the concrete limit config is greater than +// the inbound stream limits. +// The inbound limits are assumed coming from the config file. +// Args: +// +// logger: the logger to log the applied limits. +// concrete: the concrete limit config. +// limit: the inbound stream limits. +// +// Returns: +// +// a copy of the concrete limit config with the inbound stream limits applied and overridden. +func ApplyInboundStreamLimits(logger zerolog.Logger, concrete rcmgr.ConcreteLimitConfig, limit p2pconf.InboundStreamLimit) rcmgr.ConcreteLimitConfig { + c := concrete.ToPartialLimitConfig() + + partial := rcmgr.PartialLimitConfig{} + lg := logger.With().Logger() + + if int(c.System.StreamsInbound) > limit.System { + lg = lg.With().Int("concrete_system_inbound_streams", int(c.System.StreamsInbound)).Int("partial_system_inbound_streams", limit.System).Logger() + partial.System.StreamsInbound = rcmgr.LimitVal(limit.System) + } + + if int(c.Transient.StreamsInbound) > limit.Transient { + lg = lg.With().Int("concrete_transient_inbound_streams", int(c.Transient.StreamsInbound)).Int("partial_transient_inbound_streams", limit.Transient).Logger() + partial.Transient.StreamsInbound = rcmgr.LimitVal(limit.Transient) + } + + if int(c.ProtocolDefault.StreamsInbound) > limit.Protocol { + lg = lg.With().Int("concrete_protocol_default_inbound_streams", int(c.ProtocolDefault.StreamsInbound)).Int("partial_protocol_default_inbound_streams", + limit.Protocol).Logger() + partial.ProtocolDefault.StreamsInbound = rcmgr.LimitVal(limit.Protocol) + } + + if int(c.PeerDefault.StreamsInbound) > limit.Peer { + lg = lg.With().Int("concrete_peer_default_inbound_streams", int(c.PeerDefault.StreamsInbound)).Int("partial_peer_default_inbound_streams", limit.Peer).Logger() + partial.PeerDefault.StreamsInbound = rcmgr.LimitVal(limit.Peer) + } + + if int(c.ProtocolPeerDefault.StreamsInbound) > limit.ProtocolPeer { + lg = lg.With().Int("concrete_protocol_peer_default_inbound_streams", + int(c.ProtocolPeerDefault.StreamsInbound)).Int("partial_protocol_peer_default_inbound_streams", limit.ProtocolPeer).Logger() + partial.ProtocolPeerDefault.StreamsInbound = rcmgr.LimitVal(limit.ProtocolPeer) + } + + if int(c.Stream.StreamsInbound) > limit.Peer { + lg = lg.With().Int("concrete_stream_inbound_streams", int(c.Stream.StreamsInbound)).Int("partial_stream_inbound_streams", limit.Peer).Logger() + partial.ServiceDefault.StreamsInbound = rcmgr.LimitVal(limit.Peer) + } + + if int(c.Conn.StreamsInbound) > limit.Peer { + lg = lg.With().Int("concrete_conn_inbound_streams", int(c.Conn.StreamsInbound)).Int("partial_conn_inbound_streams", limit.Peer).Logger() + partial.ServicePeerDefault.StreamsInbound = rcmgr.LimitVal(limit.Peer) + } + + lg.Info().Msg("inbound stream limits applied") + return partial.Build(concrete) +} From dae85dabd16d273d2e7ad2e818042e1ccf6b4509 Mon Sep 17 00:00:00 2001 From: Yahya Hassanzadeh Date: Thu, 19 Oct 2023 16:48:08 -0700 Subject: [PATCH 11/29] adds tests --- network/p2p/p2pbuilder/libp2pNodeBuilder.go | 7 ++- network/p2p/p2pbuilder/libp2pscaler_test.go | 67 +++++++++++++++++++++ 2 files changed, 71 insertions(+), 3 deletions(-) diff --git a/network/p2p/p2pbuilder/libp2pNodeBuilder.go b/network/p2p/p2pbuilder/libp2pNodeBuilder.go index 5bbcf4f6ed3..ddac941a2b5 100644 --- a/network/p2p/p2pbuilder/libp2pNodeBuilder.go +++ b/network/p2p/p2pbuilder/libp2pNodeBuilder.go @@ -223,8 +223,9 @@ func (builder *LibP2PNodeBuilder) Build() (p2p.LibP2PNode, error) { return nil, fmt.Errorf("could not get allowed file descriptors: %w", err) } limits.PeerBaseLimit.ConnsInbound = builder.resourceManagerCfg.PeerBaseLimitConnsInbound - l := limits.Scale(mem, fd) - mgr, err := rcmgr.NewResourceManager(rcmgr.NewFixedLimiter(l), rcmgr.WithMetrics(builder.metricsConfig.Metrics)) + scaledLimits := limits.Scale(mem, fd) + appliedLimits := ApplyInboundStreamLimits(builder.logger, scaledLimits, builder.resourceManagerCfg.InboundStream) + mgr, err := rcmgr.NewResourceManager(rcmgr.NewFixedLimiter(appliedLimits), rcmgr.WithMetrics(builder.metricsConfig.Metrics)) if err != nil { return nil, fmt.Errorf("could not create libp2p resource manager: %w", err) } @@ -233,7 +234,7 @@ func (builder *LibP2PNodeBuilder) Build() (p2p.LibP2PNode, error) { Int64("allowed_memory", mem). Int("allowed_file_descriptors", fd). Msg("allowed memory and file descriptors are fetched from the system") - NewLimitConfigLogger(builder.logger).LogResourceManagerLimits(l) + NewLimitConfigLogger(builder.logger).LogResourceManagerLimits(appliedLimits) opts = append(opts, libp2p.ResourceManager(mgr)) builder.logger.Info().Msg("libp2p resource manager is set to default with metrics") diff --git a/network/p2p/p2pbuilder/libp2pscaler_test.go b/network/p2p/p2pbuilder/libp2pscaler_test.go index 094f8a4e700..a952193a715 100644 --- a/network/p2p/p2pbuilder/libp2pscaler_test.go +++ b/network/p2p/p2pbuilder/libp2pscaler_test.go @@ -3,8 +3,13 @@ package p2pbuilder import ( "testing" + "github.com/libp2p/go-libp2p" + rcmgr "github.com/libp2p/go-libp2p/p2p/host/resource-manager" "github.com/pbnjay/memory" "github.com/stretchr/testify/require" + + "github.com/onflow/flow-go/config" + "github.com/onflow/flow-go/utils/unittest" ) func TestAllowedMemoryScale(t *testing.T) { @@ -102,3 +107,65 @@ func TestAllowedFileDescriptorsScale(t *testing.T) { require.NoError(t, err) require.Equal(t, fd/10000, s) } + +// TestApplyInboundStreamLimits tests that the inbound stream limits are applied correctly, i.e., the limits from the config file +// are applied to the concrete limit config when the concrete limit config is greater than the limits from the config file. +func TestApplyInboundStreamLimits(t *testing.T) { + cfg, err := config.DefaultConfig() + require.NoError(t, err) + + mem, err := AllowedMemory(cfg.NetworkConfig.ResourceManagerConfig.MemoryLimitRatio) + require.NoError(t, err) + + fd, err := AllowedFileDescriptors(cfg.NetworkConfig.FileDescriptorsRatio) + require.NoError(t, err) + limits := rcmgr.DefaultLimits + libp2p.SetDefaultServiceLimits(&limits) + scaled := limits.Scale(mem, fd) + + concrete := rcmgr.PartialLimitConfig{ + System: rcmgr.ResourceLimits{ + // intentionally sets to 1 to test that it is not overridden. + StreamsInbound: 1, + }, + Transient: rcmgr.ResourceLimits{ + // sets it higher than the default to test that it is overridden. + StreamsInbound: rcmgr.LimitVal(cfg.NetworkConfig.ResourceManagerConfig.InboundStream.Transient + 1), + }, + ProtocolDefault: rcmgr.ResourceLimits{ + // sets it higher than the default to test that it is overridden. + StreamsInbound: rcmgr.LimitVal(cfg.NetworkConfig.ResourceManagerConfig.InboundStream.Protocol + 1), + }, + ProtocolPeerDefault: rcmgr.ResourceLimits{ + StreamsInbound: 1, // intentionally sets to 1 to test that it is not overridden. + }, + PeerDefault: rcmgr.ResourceLimits{ + StreamsInbound: rcmgr.LimitVal(cfg.NetworkConfig.ResourceManagerConfig.InboundStream.Peer + 1), + }, + Conn: rcmgr.ResourceLimits{ + StreamsInbound: 1, // intentionally sets to 1 to test that it is not overridden. + }, + Stream: rcmgr.ResourceLimits{ + StreamsInbound: 1, // intentionally sets to 1 to test that it is not overridden. + }, + }.Build(scaled) + + // apply inbound stream limits from the config file. + applied := ApplyInboundStreamLimits(unittest.Logger(), concrete, cfg.NetworkConfig.ResourceManagerConfig.InboundStream) + + // check that the applied limits are overridden. + // transient limit should be overridden. + require.Equal(t, int(cfg.NetworkConfig.ResourceManagerConfig.InboundStream.Transient), int(applied.ToPartialLimitConfig().Transient.StreamsInbound)) + // protocol default limit should be overridden. + require.Equal(t, int(cfg.NetworkConfig.ResourceManagerConfig.InboundStream.Protocol), int(applied.ToPartialLimitConfig().ProtocolDefault.StreamsInbound)) + // peer default limit should be overridden. + require.Equal(t, int(cfg.NetworkConfig.ResourceManagerConfig.InboundStream.Peer), int(applied.ToPartialLimitConfig().PeerDefault.StreamsInbound)) + // protocol peer default limit should not be overridden. + require.Equal(t, int(1), int(applied.ToPartialLimitConfig().ProtocolPeerDefault.StreamsInbound)) + // conn limit should not be overridden. + require.Equal(t, int(1), int(applied.ToPartialLimitConfig().Conn.StreamsInbound)) + // stream limit should not be overridden. + require.Equal(t, int(1), int(applied.ToPartialLimitConfig().Stream.StreamsInbound)) + // system limit should not be overridden. + require.Equal(t, int(1), int(applied.ToPartialLimitConfig().System.StreamsInbound)) +} From a99bcb9ac79f5fc32822d14ba24d425b7fef1e8f Mon Sep 17 00:00:00 2001 From: Yahya Hassanzadeh Date: Thu, 19 Oct 2023 17:41:27 -0700 Subject: [PATCH 12/29] fixes tests --- network/p2p/p2pbuilder/libp2pNodeBuilder.go | 7 +++--- network/p2p/p2pbuilder/libp2pscaler_test.go | 10 +++++++- network/p2p/p2pbuilder/utils.go | 28 +++++++++++++++++++++ 3 files changed, 41 insertions(+), 4 deletions(-) diff --git a/network/p2p/p2pbuilder/libp2pNodeBuilder.go b/network/p2p/p2pbuilder/libp2pNodeBuilder.go index ddac941a2b5..3c768b44ad6 100644 --- a/network/p2p/p2pbuilder/libp2pNodeBuilder.go +++ b/network/p2p/p2pbuilder/libp2pNodeBuilder.go @@ -222,9 +222,10 @@ func (builder *LibP2PNodeBuilder) Build() (p2p.LibP2PNode, error) { if err != nil { return nil, fmt.Errorf("could not get allowed file descriptors: %w", err) } - limits.PeerBaseLimit.ConnsInbound = builder.resourceManagerCfg.PeerBaseLimitConnsInbound - scaledLimits := limits.Scale(mem, fd) - appliedLimits := ApplyInboundStreamLimits(builder.logger, scaledLimits, builder.resourceManagerCfg.InboundStream) + + // scales the default limits by the allowed memory and file descriptors and applies the inbound connection and stream limits. + appliedLimits := ApplyInboundConnectionLimits(builder.logger, limits.Scale(mem, fd), builder.resourceManagerCfg.PeerBaseLimitConnsInbound) + appliedLimits = ApplyInboundStreamLimits(builder.logger, appliedLimits, builder.resourceManagerCfg.InboundStream) mgr, err := rcmgr.NewResourceManager(rcmgr.NewFixedLimiter(appliedLimits), rcmgr.WithMetrics(builder.metricsConfig.Metrics)) if err != nil { return nil, fmt.Errorf("could not create libp2p resource manager: %w", err) diff --git a/network/p2p/p2pbuilder/libp2pscaler_test.go b/network/p2p/p2pbuilder/libp2pscaler_test.go index a952193a715..176b3a6bb3c 100644 --- a/network/p2p/p2pbuilder/libp2pscaler_test.go +++ b/network/p2p/p2pbuilder/libp2pscaler_test.go @@ -110,7 +110,7 @@ func TestAllowedFileDescriptorsScale(t *testing.T) { // TestApplyInboundStreamLimits tests that the inbound stream limits are applied correctly, i.e., the limits from the config file // are applied to the concrete limit config when the concrete limit config is greater than the limits from the config file. -func TestApplyInboundStreamLimits(t *testing.T) { +func TestApplyInboundStreamAndConnectionLimits(t *testing.T) { cfg, err := config.DefaultConfig() require.NoError(t, err) @@ -135,6 +135,8 @@ func TestApplyInboundStreamLimits(t *testing.T) { ProtocolDefault: rcmgr.ResourceLimits{ // sets it higher than the default to test that it is overridden. StreamsInbound: rcmgr.LimitVal(cfg.NetworkConfig.ResourceManagerConfig.InboundStream.Protocol + 1), + // intentionally sets it lower than the default to test that it is not overridden. + ConnsInbound: rcmgr.LimitVal(cfg.NetworkConfig.ResourceManagerConfig.PeerBaseLimitConnsInbound - 1), }, ProtocolPeerDefault: rcmgr.ResourceLimits{ StreamsInbound: 1, // intentionally sets to 1 to test that it is not overridden. @@ -153,6 +155,9 @@ func TestApplyInboundStreamLimits(t *testing.T) { // apply inbound stream limits from the config file. applied := ApplyInboundStreamLimits(unittest.Logger(), concrete, cfg.NetworkConfig.ResourceManagerConfig.InboundStream) + // then applies the peer base limit connections from the config file. + applied = ApplyInboundConnectionLimits(unittest.Logger(), applied, cfg.NetworkConfig.ResourceManagerConfig.PeerBaseLimitConnsInbound) + // check that the applied limits are overridden. // transient limit should be overridden. require.Equal(t, int(cfg.NetworkConfig.ResourceManagerConfig.InboundStream.Transient), int(applied.ToPartialLimitConfig().Transient.StreamsInbound)) @@ -168,4 +173,7 @@ func TestApplyInboundStreamLimits(t *testing.T) { require.Equal(t, int(1), int(applied.ToPartialLimitConfig().Stream.StreamsInbound)) // system limit should not be overridden. require.Equal(t, int(1), int(applied.ToPartialLimitConfig().System.StreamsInbound)) + + // check that the applied peer base limit connections are overridden. + require.Equal(t, int(cfg.NetworkConfig.ResourceManagerConfig.PeerBaseLimitConnsInbound), int(applied.ToPartialLimitConfig().PeerDefault.ConnsInbound)) } diff --git a/network/p2p/p2pbuilder/utils.go b/network/p2p/p2pbuilder/utils.go index 232ea571d7d..5af43ba9e81 100644 --- a/network/p2p/p2pbuilder/utils.go +++ b/network/p2p/p2pbuilder/utils.go @@ -184,3 +184,31 @@ func ApplyInboundStreamLimits(logger zerolog.Logger, concrete rcmgr.ConcreteLimi lg.Info().Msg("inbound stream limits applied") return partial.Build(concrete) } + +// ApplyInboundConnectionLimits applies the inbound connection limits to the concrete limit config. The concrete limit config is assumed coming from scaling the +// base limit config by the scaling factor. The inbound connection limits are applied to the concrete limit config if the concrete limit config is greater than +// the inbound connection limits. +// The inbound limits are assumed coming from the config file. +// Args: +// +// logger: the logger to log the applied limits. +// concrete: the concrete limit config. +// peerLimit: the inbound connection limit from each remote peer. +// +// Returns: +// +// a copy of the concrete limit config with the inbound connection limits applied and overridden. +func ApplyInboundConnectionLimits(logger zerolog.Logger, concrete rcmgr.ConcreteLimitConfig, peerLimit int) rcmgr.ConcreteLimitConfig { + c := concrete.ToPartialLimitConfig() + + partial := rcmgr.PartialLimitConfig{} + lg := logger.With().Logger() + + if int(c.PeerDefault.ConnsInbound) > peerLimit { + lg = lg.With().Int("concrete_peer_inbound_conns", int(c.PeerDefault.ConnsInbound)).Int("partial_peer_inbound_conns", peerLimit).Logger() + partial.PeerDefault.ConnsInbound = rcmgr.LimitVal(peerLimit) + } + + lg.Info().Msg("inbound connection limits applied") + return partial.Build(concrete) +} From ac06fd009466ddf2fb2d7d9db4f458588ac96b32 Mon Sep 17 00:00:00 2001 From: Yahya Hassanzadeh Date: Fri, 20 Oct 2023 09:47:57 -0700 Subject: [PATCH 13/29] fixes lint --- network/p2p/p2pnode/libp2pNode_test.go | 106 ------------------------- 1 file changed, 106 deletions(-) diff --git a/network/p2p/p2pnode/libp2pNode_test.go b/network/p2p/p2pnode/libp2pNode_test.go index b4eaf4f6496..9ec66082a0e 100644 --- a/network/p2p/p2pnode/libp2pNode_test.go +++ b/network/p2p/p2pnode/libp2pNode_test.go @@ -285,112 +285,6 @@ func TestCreateStream_SinglePairwiseConnection(t *testing.T) { close(streamChan) } -// TestCreateStream_SinglePeerDial ensures that the unicast manager only attempts to dial a peer once, retries dialing a peer the expected max amount of times when an -// error is encountered and retries creating the stream the expected max amount of times when unicast.ErrDialInProgress is encountered. -func TestCreateStream_SinglePeerDial(t *testing.T) { - createStreamRetries := atomic.NewInt64(0) - dialPeerRetries := atomic.NewInt64(0) - hook := zerolog.HookFunc(func(e *zerolog.Event, level zerolog.Level, message string) { - if level == zerolog.WarnLevel { - switch { - case strings.Contains(message, "retrying create stream, dial to peer in progress"): - createStreamRetries.Inc() - case strings.Contains(message, "retrying peer dialing"): - dialPeerRetries.Inc() - } - } - }) - logger := zerolog.New(os.Stdout).Level(zerolog.InfoLevel).Hook(hook) - idProvider := mockmodule.NewIdentityProvider(t) - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - signalerCtx := irrecoverable.NewMockSignalerContext(t, ctx) - - sporkID := unittest.IdentifierFixture() - - // mock metrics we expected only a single call to CreateStream to initiate the dialing to the peer, which will result in 3 failed attempts - // the next call to CreateStream will encounter a DialInProgress error which will result in 3 failed attempts - m := mockmodule.NewNetworkMetrics(t) - m.On("OnPeerDialFailure", mock.Anything, 3).Once() - m.On("OnStreamCreationFailure", mock.Anything, mock.Anything).Twice().Run(func(args mock.Arguments) { - attempts := args.Get(1).(int) - // We expect OnCreateStream to be called twice: once in each separate call to CreateStream. The first call that initializes - // the peer dialing should not attempt to retry CreateStream because all peer dialing attempts will be made which will not - // return the DialInProgress err that kicks off the CreateStream retries so we expect attempts to be 1 in this case. In the - // second call to CreateStream we expect all 3 attempts to be made as we wait for the DialInProgress to complete, in this case - // we expect attempts to be 3. Thus we only expect this method to be called twice with either 1 or 3 attempts. - require.False(t, attempts != 1 && attempts != 3, fmt.Sprintf("expected either 1 or 3 attempts got %d", attempts)) - }) - - sender, id1 := p2ptest.NodeFixture( - t, - sporkID, - t.Name(), - idProvider, - p2ptest.WithConnectionGater(p2ptest.NewConnectionGater(idProvider, func(pid peer.ID) error { - // avoid connection gating outbound messages on sender - return nil - })), - // add very small delay so that when the sender attempts to create multiple streams - // the func fails fast before the first routine can finish the peer dialing retries - // this prevents us from making another call to dial peer - p2ptest.WithCreateStreamRetryDelay(10*time.Millisecond), - p2ptest.WithLogger(logger), - p2ptest.WithMetricsCollector(m)) - - receiver, id2 := p2ptest.NodeFixture( - t, - sporkID, - t.Name(), - idProvider, - p2ptest.WithConnectionGater(p2ptest.NewConnectionGater(idProvider, func(pid peer.ID) error { - // connection gate all incoming connections forcing the senders unicast manager to perform retries - return fmt.Errorf("gate keep") - })), - p2ptest.WithCreateStreamRetryDelay(10*time.Millisecond), - p2ptest.WithLogger(logger)) - - idProvider.On("ByPeerID", sender.ID()).Return(&id1, true).Maybe() - idProvider.On("ByPeerID", receiver.ID()).Return(&id2, true).Maybe() - - p2ptest.StartNodes(t, signalerCtx, []p2p.LibP2PNode{sender, receiver}) - defer p2ptest.StopNodes(t, []p2p.LibP2PNode{sender, receiver}, cancel) - - var wg sync.WaitGroup - wg.Add(2) - // attempt to create two concurrent streams - go func() { - defer wg.Done() - err := sender.OpenProtectedStream(ctx, receiver.ID(), t.Name(), func(stream network.Stream) error { - return nil - }) - require.Error(t, err) - }() - go func() { - defer wg.Done() - err := sender.OpenProtectedStream(ctx, receiver.ID(), t.Name(), func(stream network.Stream) error { - return nil - }) - require.Error(t, err) - }() - - unittest.RequireReturnsBefore(t, wg.Wait, 3*time.Second, "cannot create streams on time") - - // we expect a single routine to start attempting to dial thus the number of retries - // before failure should be at most p2pnode.MaxConnectAttempt - expectedNumOfDialRetries := int64(p2pnode.MaxConnectAttempt) - // we expect the second routine to retry creating a stream p2pnode.MaxConnectAttempt when dialing is in progress - expectedCreateStreamRetries := int64(p2pnode.MaxConnectAttempt) - require.Equal(t, - expectedNumOfDialRetries, - dialPeerRetries.Load(), - fmt.Sprintf("expected %d dial peer retries got %d", expectedNumOfDialRetries, dialPeerRetries.Load())) - require.Equal(t, - expectedCreateStreamRetries, - createStreamRetries.Load(), - fmt.Sprintf("expected %d dial peer retries got %d", expectedCreateStreamRetries, createStreamRetries.Load())) -} - // createStreams will attempt to create n number of streams concurrently between each combination of node pairs. func createConcurrentStreams(t *testing.T, ctx context.Context, nodes []p2p.LibP2PNode, ids flow.IdentityList, n int, streams chan network.Stream, done chan struct{}) { defer close(done) From f0efa0b0c81a84af7ef5594f274568e924a25bd9 Mon Sep 17 00:00:00 2001 From: "Yahya Hassanzadeh, Ph.D" Date: Mon, 23 Oct 2023 12:32:21 -0700 Subject: [PATCH 14/29] Update network/p2p/p2pnode/resourceManager_test.go Co-authored-by: Khalil Claybon --- network/p2p/p2pnode/resourceManager_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/network/p2p/p2pnode/resourceManager_test.go b/network/p2p/p2pnode/resourceManager_test.go index d7ca60bee40..89aeb73bba2 100644 --- a/network/p2p/p2pnode/resourceManager_test.go +++ b/network/p2p/p2pnode/resourceManager_test.go @@ -210,7 +210,7 @@ func TestCreateStream_MinSystemLimit(t *testing.T) { func TestCreateStream_MaxSystemLimit(t *testing.T) { unittest.SkipUnless(t, unittest.TEST_TODO, - "max inbound stream protocol is not preserved; can be partially due to count steam not counting inbound streams on a protocol") + "max inbound stream protocol is not preserved; can be partially due to count stream not counting inbound streams on a protocol") base := baseCreateStreamInboundStreamResourceLimitConfig() base.maxInboundStreamSystem = math.MaxInt testCreateStreamInboundStreamResourceLimits(t, base) From 10f394d64625420d6645b6d0f21dc0045924d614 Mon Sep 17 00:00:00 2001 From: "Yahya Hassanzadeh, Ph.D" Date: Mon, 23 Oct 2023 12:33:04 -0700 Subject: [PATCH 15/29] Update network/p2p/p2pnode/resourceManager_test.go Co-authored-by: Khalil Claybon --- network/p2p/p2pnode/resourceManager_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/network/p2p/p2pnode/resourceManager_test.go b/network/p2p/p2pnode/resourceManager_test.go index 89aeb73bba2..2a04e0aef8d 100644 --- a/network/p2p/p2pnode/resourceManager_test.go +++ b/network/p2p/p2pnode/resourceManager_test.go @@ -177,7 +177,7 @@ func TestCreateStream_MaxProtocolLimit(t *testing.T) { func TestCreateStream_MinPeerProtocolLimit(t *testing.T) { unittest.SkipUnless(t, unittest.TEST_TODO, - "max inbound stream peer protocol is not preserved; can be partially due to count steam not counting inbound streams on a protocol") + "max inbound stream peer protocol is not preserved; can be partially due to count stream not counting inbound streams on a protocol") base := baseCreateStreamInboundStreamResourceLimitConfig() base.maxInboundStreamPeerProtocol = 1 testCreateStreamInboundStreamResourceLimits(t, base) From ce9fb463b90bacac76667619b61f8d39a4e08482 Mon Sep 17 00:00:00 2001 From: "Yahya Hassanzadeh, Ph.D" Date: Mon, 23 Oct 2023 12:33:12 -0700 Subject: [PATCH 16/29] Update network/p2p/p2pnode/resourceManager_test.go Co-authored-by: Khalil Claybon --- network/p2p/p2pnode/resourceManager_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/network/p2p/p2pnode/resourceManager_test.go b/network/p2p/p2pnode/resourceManager_test.go index 2a04e0aef8d..1d89998de12 100644 --- a/network/p2p/p2pnode/resourceManager_test.go +++ b/network/p2p/p2pnode/resourceManager_test.go @@ -162,7 +162,7 @@ func TestCreateStream_MaxPeerLimit(t *testing.T) { func TestCreateStream_MinProtocolLimit(t *testing.T) { unittest.SkipUnless(t, unittest.TEST_TODO, - "max inbound stream protocol is not preserved; can be partially due to count steam not counting inbound streams on a protocol") + "max inbound stream protocol is not preserved; can be partially due to count stream not counting inbound streams on a protocol") base := baseCreateStreamInboundStreamResourceLimitConfig() base.maxInboundStreamProtocol = 1 testCreateStreamInboundStreamResourceLimits(t, base) From 5c0ebd4af50c874151262e534954bee627762cdf Mon Sep 17 00:00:00 2001 From: "Yahya Hassanzadeh, Ph.D" Date: Mon, 23 Oct 2023 12:33:20 -0700 Subject: [PATCH 17/29] Update network/p2p/p2pnode/resourceManager_test.go Co-authored-by: Khalil Claybon --- network/p2p/p2pnode/resourceManager_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/network/p2p/p2pnode/resourceManager_test.go b/network/p2p/p2pnode/resourceManager_test.go index 1d89998de12..1c5263b448d 100644 --- a/network/p2p/p2pnode/resourceManager_test.go +++ b/network/p2p/p2pnode/resourceManager_test.go @@ -250,7 +250,7 @@ func TestCreateStream_PeerLimitGreaterThanPeerProtocolLimit(t *testing.T) { func TestCreateStream_ProtocolLimitLessThanPeerProtocolLimit(t *testing.T) { unittest.SkipUnless(t, unittest.TEST_TODO, - "max inbound stream peer protocol is not preserved; can be partially due to count steam not counting inbound streams on a protocol") + "max inbound stream peer protocol is not preserved; can be partially due to count stream not counting inbound streams on a protocol") // the case where protocol-level limit is lower than the peer-protocol-level limit. base := baseCreateStreamInboundStreamResourceLimitConfig() base.maxInboundStreamProtocol = 5 // each peer can create 5 streams on a specific protocol. From 2d046d8666ac61e998a4aa592bc7367e7e7183ff Mon Sep 17 00:00:00 2001 From: "Yahya Hassanzadeh, Ph.D" Date: Mon, 23 Oct 2023 12:33:34 -0700 Subject: [PATCH 18/29] Update network/p2p/p2pnode/resourceManager_test.go Co-authored-by: Khalil Claybon --- network/p2p/p2pnode/resourceManager_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/network/p2p/p2pnode/resourceManager_test.go b/network/p2p/p2pnode/resourceManager_test.go index 1c5263b448d..c521f23e367 100644 --- a/network/p2p/p2pnode/resourceManager_test.go +++ b/network/p2p/p2pnode/resourceManager_test.go @@ -237,7 +237,7 @@ func TestCreateStream_PeerLimitGreaterThanPeerProtocolLimit(t *testing.T) { // the case where peer-level limit is higher than the peer-protocol-level limit. unittest.SkipUnless(t, unittest.TEST_TODO, - "max inbound stream peer protocol is not preserved; can be partially due to count steam not counting inbound streams on a protocol") + "max inbound stream peer protocol is not preserved; can be partially due to count stream not counting inbound streams on a protocol") base := baseCreateStreamInboundStreamResourceLimitConfig() base.maxInboundPeerStream = 10 // each peer can create 10 streams. base.maxInboundStreamPeerProtocol = 5 // each peer can create 5 streams on a specific protocol. From 7936f72b37b85956d8cce02bb061ad7e5585b06c Mon Sep 17 00:00:00 2001 From: "Yahya Hassanzadeh, Ph.D" Date: Mon, 23 Oct 2023 12:35:38 -0700 Subject: [PATCH 19/29] Update network/internal/p2putils/utils.go Co-authored-by: Khalil Claybon --- network/internal/p2putils/utils.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/network/internal/p2putils/utils.go b/network/internal/p2putils/utils.go index 744dd183350..6ca15b7b5f0 100644 --- a/network/internal/p2putils/utils.go +++ b/network/internal/p2putils/utils.go @@ -133,7 +133,7 @@ func All() FilterOption { // It returns a slice of network.Stream values that match the filtering criteria. func FilterStream(host host.Host, targetID peer.ID, options ...FilterOption) []network.Stream { var filteredStreams []network.Stream - const discardTheProtocol = "discard-the-protocol" + const allProtocols = "*" // default values opts := FilterOptions{ dir: network.DirUnknown, // by default, consider both inbound and outbound streams From 0314cde84b79dd082c984f705259fdf925707f8e Mon Sep 17 00:00:00 2001 From: Yahya Hassanzadeh Date: Mon, 23 Oct 2023 12:36:31 -0700 Subject: [PATCH 20/29] fixes build issue --- network/internal/p2putils/utils.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/network/internal/p2putils/utils.go b/network/internal/p2putils/utils.go index 6ca15b7b5f0..0ec8b8aba11 100644 --- a/network/internal/p2putils/utils.go +++ b/network/internal/p2putils/utils.go @@ -137,7 +137,7 @@ func FilterStream(host host.Host, targetID peer.ID, options ...FilterOption) []n // default values opts := FilterOptions{ dir: network.DirUnknown, // by default, consider both inbound and outbound streams - protocol: discardTheProtocol, // by default, consider streams of all protocol IDs + protocol: allProtocols, // by default, consider streams of all protocol IDs all: false, // by default, return just the first matching stream } @@ -155,7 +155,7 @@ func FilterStream(host host.Host, targetID peer.ID, options ...FilterOption) []n streams := conn.GetStreams() for _, stream := range streams { if (opts.dir == network.DirUnknown || stream.Stat().Direction == opts.dir) && - (opts.protocol == discardTheProtocol || stream.Protocol() == opts.protocol) { + (opts.protocol == allProtocols || stream.Protocol() == opts.protocol) { filteredStreams = append(filteredStreams, stream) if !opts.all { return filteredStreams From a328d8a5ffaf3f7039a72f3660cccd8f607c7c8f Mon Sep 17 00:00:00 2001 From: "Yahya Hassanzadeh, Ph.D" Date: Mon, 23 Oct 2023 12:58:17 -0700 Subject: [PATCH 21/29] Update config/default-config.yml Co-authored-by: Peter Argue <89119817+peterargue@users.noreply.github.com> --- config/default-config.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/config/default-config.yml b/config/default-config.yml index 1809474485f..2b901b0b3db 100644 --- a/config/default-config.yml +++ b/config/default-config.yml @@ -58,13 +58,13 @@ network-config: # Without this limit peers can end up in a state where there exists n number of connections per peer which # can lead to resource exhaustion of the libp2p node. libp2p-peer-base-limits-conns-inbound: 1 - # maximum number of inbound system-wide limit for streams, across all peers and protocols + # maximum number of inbound system-wide streams, across all peers and protocols # Note that streams are ephemeral and are created and destroyed intermittently. libp2p-inbound-stream-limit-system: 15_000 - # maximum number of inbound transient limit for streams, across all streams that are not yet fully opened and associated with a protocol. + # maximum number of inbound transient streams, across all streams that are not yet fully opened and associated with a protocol. # Note that streams are ephemeral and are created and destroyed intermittently. libp2p-inbound-stream-limit-transient: 15_000 - # maximum number of inbound limit for streams for each protocol across all peers; this is a per-protocol limit. We expect at least + # maximum number of inbound streams for each protocol across all peers; this is a per-protocol limit. We expect at least # three protocols per node; gossipsub, unicast, and dht. Note that streams are ephemeral and are created and destroyed intermittently. libp2p-inbound-stream-limit-protocol: 5000 # maximum number of inbound streams from each peer across all protocols. From 3bbaf2470a3825ea4e1ee72f7bc468c4870b3867 Mon Sep 17 00:00:00 2001 From: Yahya Hassanzadeh Date: Mon, 23 Oct 2023 14:08:34 -0700 Subject: [PATCH 22/29] un-exports allowed momory --- network/p2p/p2pbuilder/libp2pNodeBuilder.go | 4 +-- network/p2p/p2pbuilder/libp2pscaler.go | 4 +-- network/p2p/p2pbuilder/libp2pscaler_test.go | 40 ++++++++++----------- 3 files changed, 24 insertions(+), 24 deletions(-) diff --git a/network/p2p/p2pbuilder/libp2pNodeBuilder.go b/network/p2p/p2pbuilder/libp2pNodeBuilder.go index 3c768b44ad6..07dd4110567 100644 --- a/network/p2p/p2pbuilder/libp2pNodeBuilder.go +++ b/network/p2p/p2pbuilder/libp2pNodeBuilder.go @@ -214,11 +214,11 @@ func (builder *LibP2PNodeBuilder) Build() (p2p.LibP2PNode, error) { limits := rcmgr.DefaultLimits libp2p.SetDefaultServiceLimits(&limits) - mem, err := AllowedMemory(builder.resourceManagerCfg.MemoryLimitRatio) + mem, err := allowedMemory(builder.resourceManagerCfg.MemoryLimitRatio) if err != nil { return nil, fmt.Errorf("could not get allowed memory: %w", err) } - fd, err := AllowedFileDescriptors(builder.resourceManagerCfg.FileDescriptorsRatio) + fd, err := allowedFileDescriptors(builder.resourceManagerCfg.FileDescriptorsRatio) if err != nil { return nil, fmt.Errorf("could not get allowed file descriptors: %w", err) } diff --git a/network/p2p/p2pbuilder/libp2pscaler.go b/network/p2p/p2pbuilder/libp2pscaler.go index d6bdaf2ef5a..8612a53d34d 100644 --- a/network/p2p/p2pbuilder/libp2pscaler.go +++ b/network/p2p/p2pbuilder/libp2pscaler.go @@ -16,14 +16,14 @@ func getNumFDs() (int, error) { return int(l.Cur), nil } -func AllowedMemory(scaleFactor float64) (int64, error) { +func allowedMemory(scaleFactor float64) (int64, error) { if scaleFactor <= 0 || scaleFactor > 1 { return 0, fmt.Errorf("memory scale factor must be greater than 0 and less than or equal to 1: %f", scaleFactor) } return int64(math.Floor(float64(memory.TotalMemory()) * scaleFactor)), nil } -func AllowedFileDescriptors(scaleFactor float64) (int, error) { +func allowedFileDescriptors(scaleFactor float64) (int, error) { if scaleFactor <= 0 || scaleFactor > 1 { return 0, fmt.Errorf("fd scale factor must be greater than 0 and less than or equal to 1: %f", scaleFactor) } diff --git a/network/p2p/p2pbuilder/libp2pscaler_test.go b/network/p2p/p2pbuilder/libp2pscaler_test.go index 176b3a6bb3c..dd5cfe48088 100644 --- a/network/p2p/p2pbuilder/libp2pscaler_test.go +++ b/network/p2p/p2pbuilder/libp2pscaler_test.go @@ -17,44 +17,44 @@ func TestAllowedMemoryScale(t *testing.T) { require.True(t, m > 0) // scaling with factor of 1 should return the total memory. - s, err := AllowedMemory(1) + s, err := allowedMemory(1) require.NoError(t, err) require.Equal(t, int64(m), s) // scaling with factor of 0 should return an error. - _, err = AllowedMemory(0) + _, err = allowedMemory(0) require.Error(t, err) // scaling with factor of -1 should return an error. - _, err = AllowedMemory(-1) + _, err = allowedMemory(-1) require.Error(t, err) // scaling with factor of 2 should return an error. - _, err = AllowedMemory(2) + _, err = allowedMemory(2) require.Error(t, err) // scaling with factor of 0.5 should return half the total memory. - s, err = AllowedMemory(0.5) + s, err = allowedMemory(0.5) require.NoError(t, err) require.Equal(t, int64(m/2), s) // scaling with factor of 0.1 should return 10% of the total memory. - s, err = AllowedMemory(0.1) + s, err = allowedMemory(0.1) require.NoError(t, err) require.Equal(t, int64(m/10), s) // scaling with factor of 0.01 should return 1% of the total memory. - s, err = AllowedMemory(0.01) + s, err = allowedMemory(0.01) require.NoError(t, err) require.Equal(t, int64(m/100), s) // scaling with factor of 0.001 should return 0.1% of the total memory. - s, err = AllowedMemory(0.001) + s, err = allowedMemory(0.001) require.NoError(t, err) require.Equal(t, int64(m/1000), s) // scaling with factor of 0.0001 should return 0.01% of the total memory. - s, err = AllowedMemory(0.0001) + s, err = allowedMemory(0.0001) require.NoError(t, err) require.Equal(t, int64(m/10000), s) } @@ -66,44 +66,44 @@ func TestAllowedFileDescriptorsScale(t *testing.T) { require.True(t, fd > 0) // scaling with factor of 1 should return the total file descriptors. - s, err := AllowedFileDescriptors(1) + s, err := allowedFileDescriptors(1) require.NoError(t, err) require.Equal(t, fd, s) // scaling with factor of 0 should return an error. - _, err = AllowedFileDescriptors(0) + _, err = allowedFileDescriptors(0) require.Error(t, err) // scaling with factor of -1 should return an error. - _, err = AllowedFileDescriptors(-1) + _, err = allowedFileDescriptors(-1) require.Error(t, err) // scaling with factor of 2 should return an error. - _, err = AllowedFileDescriptors(2) + _, err = allowedFileDescriptors(2) require.Error(t, err) // scaling with factor of 0.5 should return half the total file descriptors. - s, err = AllowedFileDescriptors(0.5) + s, err = allowedFileDescriptors(0.5) require.NoError(t, err) require.Equal(t, fd/2, s) // scaling with factor of 0.1 should return 10% of the total file descriptors. - s, err = AllowedFileDescriptors(0.1) + s, err = allowedFileDescriptors(0.1) require.NoError(t, err) require.Equal(t, fd/10, s) // scaling with factor of 0.01 should return 1% of the total file descriptors. - s, err = AllowedFileDescriptors(0.01) + s, err = allowedFileDescriptors(0.01) require.NoError(t, err) require.Equal(t, fd/100, s) // scaling with factor of 0.001 should return 0.1% of the total file descriptors. - s, err = AllowedFileDescriptors(0.001) + s, err = allowedFileDescriptors(0.001) require.NoError(t, err) require.Equal(t, fd/1000, s) // scaling with factor of 0.0001 should return 0.01% of the total file descriptors. - s, err = AllowedFileDescriptors(0.0001) + s, err = allowedFileDescriptors(0.0001) require.NoError(t, err) require.Equal(t, fd/10000, s) } @@ -114,10 +114,10 @@ func TestApplyInboundStreamAndConnectionLimits(t *testing.T) { cfg, err := config.DefaultConfig() require.NoError(t, err) - mem, err := AllowedMemory(cfg.NetworkConfig.ResourceManagerConfig.MemoryLimitRatio) + mem, err := allowedMemory(cfg.NetworkConfig.ResourceManagerConfig.MemoryLimitRatio) require.NoError(t, err) - fd, err := AllowedFileDescriptors(cfg.NetworkConfig.FileDescriptorsRatio) + fd, err := allowedFileDescriptors(cfg.NetworkConfig.FileDescriptorsRatio) require.NoError(t, err) limits := rcmgr.DefaultLimits libp2p.SetDefaultServiceLimits(&limits) From df61bacbd012f3f9efa375e726fb0836cf02b954 Mon Sep 17 00:00:00 2001 From: Yahya Hassanzadeh Date: Mon, 23 Oct 2023 14:10:19 -0700 Subject: [PATCH 23/29] un-exports newLimitConfigLogger --- network/p2p/p2pbuilder/libp2pNodeBuilder.go | 2 +- network/p2p/p2pbuilder/utils.go | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/network/p2p/p2pbuilder/libp2pNodeBuilder.go b/network/p2p/p2pbuilder/libp2pNodeBuilder.go index 07dd4110567..0340069a5de 100644 --- a/network/p2p/p2pbuilder/libp2pNodeBuilder.go +++ b/network/p2p/p2pbuilder/libp2pNodeBuilder.go @@ -235,7 +235,7 @@ func (builder *LibP2PNodeBuilder) Build() (p2p.LibP2PNode, error) { Int64("allowed_memory", mem). Int("allowed_file_descriptors", fd). Msg("allowed memory and file descriptors are fetched from the system") - NewLimitConfigLogger(builder.logger).LogResourceManagerLimits(appliedLimits) + newLimitConfigLogger(builder.logger).LogResourceManagerLimits(appliedLimits) opts = append(opts, libp2p.ResourceManager(mgr)) builder.logger.Info().Msg("libp2p resource manager is set to default with metrics") diff --git a/network/p2p/p2pbuilder/utils.go b/network/p2p/p2pbuilder/utils.go index 5af43ba9e81..3f54c886fd5 100644 --- a/network/p2p/p2pbuilder/utils.go +++ b/network/p2p/p2pbuilder/utils.go @@ -33,8 +33,8 @@ type limitConfigLogger struct { logger zerolog.Logger } -// NewLimitConfigLogger creates a new limitConfigLogger. -func NewLimitConfigLogger(logger zerolog.Logger) *limitConfigLogger { +// newLimitConfigLogger creates a new limitConfigLogger. +func newLimitConfigLogger(logger zerolog.Logger) *limitConfigLogger { return &limitConfigLogger{logger: logger} } From dd83fe6a55a89ee813a23484e8f7d528c3e56354 Mon Sep 17 00:00:00 2001 From: Yahya Hassanzadeh Date: Mon, 23 Oct 2023 14:12:59 -0700 Subject: [PATCH 24/29] formats default values greater than 9999 --- config/default-config.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/config/default-config.yml b/config/default-config.yml index 2b901b0b3db..a5a9ba34f84 100644 --- a/config/default-config.yml +++ b/config/default-config.yml @@ -6,7 +6,7 @@ network-config: networking-connection-pruning: true # Preferred unicasts protocols list of unicast protocols in preferred order preferred-unicast-protocols: [ ] - received-message-cache-size: 10e4 + received-message-cache-size: 10_000 peerupdate-interval: 10m unicast-message-timeout: 5s # Unicast create stream retry delay is initial delay used in the exponential backoff for create stream retries @@ -47,7 +47,7 @@ network-config: unicast-dial-in-progress-backoff-delay: 1s # The size of the dial config cache used to keep track of the dial config for each remote peer. The dial config is used to keep track of the dial retry budget for each remote peer. # Recommended to set it to the maximum number of remote peers in the network. - unicast-dial-config-cache-size: 10000 + unicast-dial-config-cache-size: 10_000 # Resource manager config # Maximum allowed fraction of file descriptors to be allocated by the libp2p resources in (0,1] libp2p-memory-limit-ratio: 0.5 # flow default @@ -111,7 +111,7 @@ network-config: # Gossipsub rpc inspectors configs # The size of the queue for notifications about invalid RPC messages - gossipsub-rpc-inspector-notification-cache-size: 10000 + gossipsub-rpc-inspector-notification-cache-size: 10_000 # RPC control message validation inspector configs # Rpc validation inspector number of pool workers gossipsub-rpc-validation-inspector-workers: 5 @@ -154,8 +154,8 @@ network-config: # The size of the queue used by worker pool for the control message metrics inspector gossipsub-rpc-metrics-inspector-cache-size: 100 # Application layer spam prevention - alsp-spam-record-cache-size: 10e3 - alsp-spam-report-queue-size: 10e4 + alsp-spam-record-cache-size: 1000 + alsp-spam-report-queue-size: 10_000 alsp-disable-penalty: false alsp-heart-beat-interval: 1s From 9ce45a9b67e0a256ff2d812646a3314fb81ca302 Mon Sep 17 00:00:00 2001 From: Yahya Hassanzadeh Date: Mon, 23 Oct 2023 16:32:21 -0700 Subject: [PATCH 25/29] bumps up the startup timeout --- network/p2p/test/fixtures.go | 38 +++++++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/network/p2p/test/fixtures.go b/network/p2p/test/fixtures.go index 65fe7558b8c..9e9b8e4c8e8 100644 --- a/network/p2p/test/fixtures.go +++ b/network/p2p/test/fixtures.go @@ -50,13 +50,13 @@ const ( // the test is run in parallel with other tests. Hence, no further increase of the timeout is // expected to be necessary. Any failure to start a node within this timeout is likely to be // caused by a bug in the code. - libp2pNodeStartupTimeout = 5 * time.Second + libp2pNodeStartupTimeout = 10 * time.Second // libp2pNodeStartupTimeout is the timeout for starting a libp2p node in tests. Note that the // timeout has been selected to be large enough to allow for the node to start up on a CI even when // the test is run in parallel with other tests. Hence, no further increase of the timeout is // expected to be necessary. Any failure to start a node within this timeout is likely to be // caused by a bug in the code. - libp2pNodeShutdownTimeout = 5 * time.Second + libp2pNodeShutdownTimeout = 10 * time.Second ) // NetworkingKeyFixtures is a test helper that generates a ECDSA flow key pair. @@ -122,7 +122,9 @@ func NodeFixture( opt(parameters) } - identity := unittest.IdentityFixture(unittest.WithNetworkingKey(parameters.Key.PublicKey()), unittest.WithAddress(parameters.Address), unittest.WithRole(parameters.Role)) + identity := unittest.IdentityFixture(unittest.WithNetworkingKey(parameters.Key.PublicKey()), + unittest.WithAddress(parameters.Address), + unittest.WithRole(parameters.Role)) logger = parameters.Logger.With().Hex("node_id", logging.ID(identity.NodeID)).Logger() @@ -663,7 +665,11 @@ func EnsurePubsubMessageExchange( for i := 0; i < count; i++ { // creates a unique message to be published by the node payload := messageFactory() - outgoingMessageScope, err := message.NewOutgoingScope(flow.IdentifierList{unittest.IdentifierFixture()}, topic, payload, unittest.NetworkCodec().Encode, message.ProtocolTypePubSub) + outgoingMessageScope, err := message.NewOutgoingScope(flow.IdentifierList{unittest.IdentifierFixture()}, + topic, + payload, + unittest.NetworkCodec().Encode, + message.ProtocolTypePubSub) require.NoError(t, err) require.NoError(t, node.Publish(ctx, outgoingMessageScope)) @@ -689,7 +695,14 @@ func EnsurePubsubMessageExchange( // - count: the number of messages to exchange from `sender` to `receiver`. // - messageFactory: a function that creates a unique message to be published by the node. func EnsurePubsubMessageExchangeFromNode( - t *testing.T, ctx context.Context, sender p2p.LibP2PNode, receiverNode p2p.LibP2PNode, receiverIdentifier flow.Identifier, topic channels.Topic, count int, messageFactory func() interface{}, + t *testing.T, + ctx context.Context, + sender p2p.LibP2PNode, + receiverNode p2p.LibP2PNode, + receiverIdentifier flow.Identifier, + topic channels.Topic, + count int, + messageFactory func() interface{}, ) { _, err := sender.Subscribe(topic, validator.TopicValidator(unittest.Logger(), unittest.AllowAllPeerFilter())) require.NoError(t, err) @@ -703,7 +716,11 @@ func EnsurePubsubMessageExchangeFromNode( for i := 0; i < count; i++ { // creates a unique message to be published by the node payload := messageFactory() - outgoingMessageScope, err := message.NewOutgoingScope(flow.IdentifierList{receiverIdentifier}, topic, payload, unittest.NetworkCodec().Encode, message.ProtocolTypePubSub) + outgoingMessageScope, err := message.NewOutgoingScope(flow.IdentifierList{receiverIdentifier}, + topic, + payload, + unittest.NetworkCodec().Encode, + message.ProtocolTypePubSub) require.NoError(t, err) require.NoError(t, sender.Publish(ctx, outgoingMessageScope)) @@ -746,7 +763,14 @@ func EnsureNotConnectedBetweenGroups(t *testing.T, ctx context.Context, groupA [ // - count: the number of messages to exchange from each node. // - messageFactory: a function that creates a unique message to be published by the node. func EnsureNoPubsubMessageExchange( - t *testing.T, ctx context.Context, from []p2p.LibP2PNode, to []p2p.LibP2PNode, toIdentifiers flow.IdentifierList, topic channels.Topic, count int, messageFactory func() interface{}, + t *testing.T, + ctx context.Context, + from []p2p.LibP2PNode, + to []p2p.LibP2PNode, + toIdentifiers flow.IdentifierList, + topic channels.Topic, + count int, + messageFactory func() interface{}, ) { subs := make([]p2p.Subscription, len(to)) tv := validator.TopicValidator(unittest.Logger(), unittest.AllowAllPeerFilter()) From b067caa0bd0ade077e185764398711f3eb06a04b Mon Sep 17 00:00:00 2001 From: Yahya Hassanzadeh Date: Mon, 23 Oct 2023 18:24:20 -0700 Subject: [PATCH 26/29] shortens the skip message --- network/p2p/p2pnode/resourceManager_test.go | 29 +++++++++------------ 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/network/p2p/p2pnode/resourceManager_test.go b/network/p2p/p2pnode/resourceManager_test.go index c521f23e367..00d8c44cee5 100644 --- a/network/p2p/p2pnode/resourceManager_test.go +++ b/network/p2p/p2pnode/resourceManager_test.go @@ -160,9 +160,8 @@ func TestCreateStream_MaxPeerLimit(t *testing.T) { } func TestCreateStream_MinProtocolLimit(t *testing.T) { - unittest.SkipUnless(t, - unittest.TEST_TODO, - "max inbound stream protocol is not preserved; can be partially due to count stream not counting inbound streams on a protocol") + // max inbound protocol is not preserved; can be partially due to count stream not counting inbound streams on a protocol + unittest.SkipUnless(t, unittest.TEST_TODO, "broken test") base := baseCreateStreamInboundStreamResourceLimitConfig() base.maxInboundStreamProtocol = 1 testCreateStreamInboundStreamResourceLimits(t, base) @@ -175,9 +174,8 @@ func TestCreateStream_MaxProtocolLimit(t *testing.T) { } func TestCreateStream_MinPeerProtocolLimit(t *testing.T) { - unittest.SkipUnless(t, - unittest.TEST_TODO, - "max inbound stream peer protocol is not preserved; can be partially due to count stream not counting inbound streams on a protocol") + // max inbound stream peer protocol is not preserved; can be partially due to count stream not counting inbound streams on a protocol + unittest.SkipUnless(t, unittest.TEST_TODO, "broken test") base := baseCreateStreamInboundStreamResourceLimitConfig() base.maxInboundStreamPeerProtocol = 1 testCreateStreamInboundStreamResourceLimits(t, base) @@ -208,18 +206,16 @@ func TestCreateStream_MinSystemLimit(t *testing.T) { } func TestCreateStream_MaxSystemLimit(t *testing.T) { - unittest.SkipUnless(t, - unittest.TEST_TODO, - "max inbound stream protocol is not preserved; can be partially due to count stream not counting inbound streams on a protocol") + // max inbound stream protocol is not preserved; can be partially due to count stream not counting inbound streams on a protocol + unittest.SkipUnless(t, unittest.TEST_TODO, "broken test") base := baseCreateStreamInboundStreamResourceLimitConfig() base.maxInboundStreamSystem = math.MaxInt testCreateStreamInboundStreamResourceLimits(t, base) } func TestCreateStream_DefaultConfigWithUnknownProtocol(t *testing.T) { - unittest.SkipUnless(t, - unittest.TEST_TODO, - "limits are not enforced when using an unknown protocol ID") + // limits are not enforced when using an unknown protocol ID + unittest.SkipUnless(t, unittest.TEST_TODO, "broken test") base := baseCreateStreamInboundStreamResourceLimitConfig() base.unknownProtocol = true testCreateStreamInboundStreamResourceLimits(t, base) @@ -235,9 +231,8 @@ func TestCreateStream_PeerLimitLessThanPeerProtocolLimit(t *testing.T) { func TestCreateStream_PeerLimitGreaterThanPeerProtocolLimit(t *testing.T) { // the case where peer-level limit is higher than the peer-protocol-level limit. - unittest.SkipUnless(t, - unittest.TEST_TODO, - "max inbound stream peer protocol is not preserved; can be partially due to count stream not counting inbound streams on a protocol") + // max inbound stream peer protocol is not preserved; can be partially due to count stream not counting inbound streams on a protocol + unittest.SkipUnless(t, unittest.TEST_TODO, "broken test") base := baseCreateStreamInboundStreamResourceLimitConfig() base.maxInboundPeerStream = 10 // each peer can create 10 streams. base.maxInboundStreamPeerProtocol = 5 // each peer can create 5 streams on a specific protocol. @@ -248,9 +243,9 @@ func TestCreateStream_PeerLimitGreaterThanPeerProtocolLimit(t *testing.T) { } func TestCreateStream_ProtocolLimitLessThanPeerProtocolLimit(t *testing.T) { + // max inbound stream peer protocol is not preserved; can be partially due to count stream not counting inbound streams on a protocol unittest.SkipUnless(t, - unittest.TEST_TODO, - "max inbound stream peer protocol is not preserved; can be partially due to count stream not counting inbound streams on a protocol") + unittest.TEST_TODO, "broken test") // the case where protocol-level limit is lower than the peer-protocol-level limit. base := baseCreateStreamInboundStreamResourceLimitConfig() base.maxInboundStreamProtocol = 5 // each peer can create 5 streams on a specific protocol. From 951672fb092410f0c8975df6b1bfed69cae2e30d Mon Sep 17 00:00:00 2001 From: Yahya Hassanzadeh Date: Mon, 23 Oct 2023 19:02:19 -0700 Subject: [PATCH 27/29] quaranttines TestCreateStream_MaxTransientLimit --- network/p2p/p2pnode/resourceManager_test.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/network/p2p/p2pnode/resourceManager_test.go b/network/p2p/p2pnode/resourceManager_test.go index 00d8c44cee5..743ce015c94 100644 --- a/network/p2p/p2pnode/resourceManager_test.go +++ b/network/p2p/p2pnode/resourceManager_test.go @@ -154,6 +154,7 @@ func TestCreateStream_MinPeerLimit(t *testing.T) { } func TestCreateStream_MaxPeerLimit(t *testing.T) { + base := baseCreateStreamInboundStreamResourceLimitConfig() base.maxInboundPeerStream = math.MaxInt testCreateStreamInboundStreamResourceLimits(t, base) @@ -194,6 +195,7 @@ func TestCreateStream_MinTransientLimit(t *testing.T) { } func TestCreateStream_MaxTransientLimit(t *testing.T) { + unittest.SkipUnless(t, unittest.TEST_TODO, "fails on CI constantly") base := baseCreateStreamInboundStreamResourceLimitConfig() base.maxInboundStreamTransient = math.MaxInt testCreateStreamInboundStreamResourceLimits(t, base) From 488e9e0a629cc50f0bd62a19103fb6cf5875bd21 Mon Sep 17 00:00:00 2001 From: Yahya Hassanzadeh Date: Mon, 23 Oct 2023 21:22:24 -0700 Subject: [PATCH 28/29] reduces test loads --- network/p2p/p2pnode/resourceManager_test.go | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/network/p2p/p2pnode/resourceManager_test.go b/network/p2p/p2pnode/resourceManager_test.go index 743ce015c94..863d6bb2146 100644 --- a/network/p2p/p2pnode/resourceManager_test.go +++ b/network/p2p/p2pnode/resourceManager_test.go @@ -135,11 +135,11 @@ func (t testPeerLimitConfig) maxLimit() int { func baseCreateStreamInboundStreamResourceLimitConfig() *testPeerLimitConfig { return &testPeerLimitConfig{ nodeCount: 10, - maxInboundPeerStream: 100, - maxInboundStreamProtocol: 100, - maxInboundStreamPeerProtocol: 100, - maxInboundStreamTransient: 100, - maxInboundStreamSystem: 100, + maxInboundPeerStream: 20, + maxInboundStreamProtocol: 20, + maxInboundStreamPeerProtocol: 20, + maxInboundStreamTransient: 20, + maxInboundStreamSystem: 20, } } @@ -195,7 +195,6 @@ func TestCreateStream_MinTransientLimit(t *testing.T) { } func TestCreateStream_MaxTransientLimit(t *testing.T) { - unittest.SkipUnless(t, unittest.TEST_TODO, "fails on CI constantly") base := baseCreateStreamInboundStreamResourceLimitConfig() base.maxInboundStreamTransient = math.MaxInt testCreateStreamInboundStreamResourceLimits(t, base) From 4f4edbc09be5743c84d9e404255b68bc5556a887 Mon Sep 17 00:00:00 2001 From: Yahya Hassanzadeh Date: Tue, 24 Oct 2023 06:24:39 -0700 Subject: [PATCH 29/29] reduces load of test --- network/p2p/p2pnode/resourceManager_test.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/network/p2p/p2pnode/resourceManager_test.go b/network/p2p/p2pnode/resourceManager_test.go index 863d6bb2146..bcef3cb8f7f 100644 --- a/network/p2p/p2pnode/resourceManager_test.go +++ b/network/p2p/p2pnode/resourceManager_test.go @@ -134,12 +134,12 @@ func (t testPeerLimitConfig) maxLimit() int { // baseCreateStreamInboundStreamResourceLimitConfig returns a testPeerLimitConfig with default values. func baseCreateStreamInboundStreamResourceLimitConfig() *testPeerLimitConfig { return &testPeerLimitConfig{ - nodeCount: 10, - maxInboundPeerStream: 20, - maxInboundStreamProtocol: 20, - maxInboundStreamPeerProtocol: 20, - maxInboundStreamTransient: 20, - maxInboundStreamSystem: 20, + nodeCount: 5, + maxInboundPeerStream: 10, + maxInboundStreamProtocol: 10, + maxInboundStreamPeerProtocol: 10, + maxInboundStreamTransient: 10, + maxInboundStreamSystem: 10, } }