Skip to content

Commit f76e4f9

Browse files
authored
Hotfix #120 (#136)
* cluster: fix deadlock in cluster synchronisation (#120) For a impressively thorough breakdown of the problem, see: #120 (comment) Huge thanks to @dvic and @KJTsanaktsidis for the report and fix. * readme: credit @dvic and @KJTsanaktsidis
1 parent baa28fc commit f76e4f9

File tree

4 files changed

+55
-1
lines changed

4 files changed

+55
-1
lines changed

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ Further PR's (with tests) are welcome, but please maintain backwards compatibili
3636
* Gracefully recover from a temporarily unreachable server ([details](https://github.com/globalsign/mgo/pull/69))
3737
* Use JSON tags when no explicit BSON are tags set ([details](https://github.com/globalsign/mgo/pull/91))
3838
* Support [$changeStream](https://docs.mongodb.com/manual/changeStreams/) tailing on 3.6+ ([details](https://github.com/globalsign/mgo/pull/97))
39+
* Fix deadlock in cluster synchronisation ([details](https://github.com/globalsign/mgo/issues/120))
3940

4041
---
4142

@@ -46,11 +47,13 @@ Further PR's (with tests) are welcome, but please maintain backwards compatibili
4647
* @carter2000
4748
* @cezarsa
4849
* @drichelson
50+
* @dvic
4951
* @eaglerayp
5052
* @feliixx
5153
* @fmpwizard
5254
* @idy
5355
* @jameinel
56+
* @KJTsanaktsidis
5457
* @gazoon
5558
* @mapete94
5659
* @peterdeka

cluster.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@ func (cluster *mongoCluster) isMaster(socket *mongoSocket, result *isMasterResul
179179
})
180180
})
181181

182-
err := session.Run(cmd, result)
182+
err := session.runOnSocket(socket, cmd, result)
183183
session.Close()
184184
return err
185185
}

cluster_test.go

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1964,6 +1964,41 @@ func (s *S) TestConnectCloseConcurrency(c *C) {
19641964
wg.Wait()
19651965
}
19661966

1967+
func (s *S) TestNoDeadlockOnClose(c *C) {
1968+
if *fast {
1969+
// Unfortunately I seem to need quite a high dial timeout to get this to work
1970+
// on my machine.
1971+
c.Skip("-fast")
1972+
}
1973+
1974+
var shouldStop int32
1975+
atomic.StoreInt32(&shouldStop, 0)
1976+
1977+
listener, err := net.Listen("tcp4", "127.0.0.1:")
1978+
c.Check(err, Equals, nil)
1979+
1980+
go func() {
1981+
for atomic.LoadInt32(&shouldStop) == 0 {
1982+
sock, err := listener.Accept()
1983+
if err != nil {
1984+
// Probs just closed
1985+
continue
1986+
}
1987+
sock.Close()
1988+
}
1989+
}()
1990+
defer func() {
1991+
atomic.StoreInt32(&shouldStop, 1)
1992+
listener.Close()
1993+
}()
1994+
1995+
session, err := mgo.DialWithTimeout(listener.Addr().String(), 10*time.Second)
1996+
// If execution reaches here, the deadlock did not happen and all is OK
1997+
if session != nil {
1998+
session.Close()
1999+
}
2000+
}
2001+
19672002
func (s *S) TestSelectServers(c *C) {
19682003
if !s.versionAtLeast(2, 2) {
19692004
c.Skip("read preferences introduced in 2.2")

session.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -806,6 +806,15 @@ func (db *Database) Run(cmd interface{}, result interface{}) error {
806806
return db.run(socket, cmd, result)
807807
}
808808

809+
// runOnSocket does the same as Run, but guarantees that your command will be run
810+
// on the provided socket instance; if it's unhealthy, you will receive the error
811+
// from it.
812+
func (db *Database) runOnSocket(socket *mongoSocket, cmd interface{}, result interface{}) error {
813+
socket.Acquire()
814+
defer socket.Release()
815+
return db.run(socket, cmd, result)
816+
}
817+
809818
// Credential holds details to authenticate with a MongoDB server.
810819
type Credential struct {
811820
// Username and Password hold the basic details for authentication.
@@ -2270,6 +2279,13 @@ func (s *Session) Run(cmd interface{}, result interface{}) error {
22702279
return s.DB("admin").Run(cmd, result)
22712280
}
22722281

2282+
// runOnSocket does the same as Run, but guarantees that your command will be run
2283+
// on the provided socket instance; if it's unhealthy, you will receive the error
2284+
// from it.
2285+
func (s *Session) runOnSocket(socket *mongoSocket, cmd interface{}, result interface{}) error {
2286+
return s.DB("admin").runOnSocket(socket, cmd, result)
2287+
}
2288+
22732289
// SelectServers restricts communication to servers configured with the
22742290
// given tags. For example, the following statement restricts servers
22752291
// used for reading operations to those with both tag "disk" set to

0 commit comments

Comments
 (0)