From dd774b1b9bae85a15497120256dda3330800a6b9 Mon Sep 17 00:00:00 2001 From: ozraru Date: Sun, 19 Jan 2025 00:19:01 +0900 Subject: [PATCH 1/3] Voice overhaul ## Summary - Use context to manage goroutines and connections - Eliminate busy wait using sync.Cond - Use Voice Gateway Version 8 - Change to new encryption mode - Resume Voice Conenction - Delete reconnect (excludes resume) - It is very rare and it should be handled by an application - Deliver unrecoverable error to the application - Edit some examples to work with these changes ## Breaking changes - misc - Some functions require context.Context in argument - Session - Renamed ChannelVoiceJoinManual to VoiceStateUpdate - Remove ShouldReconnectVoiceOnSessionError - Because reconnecting feature are deleted - VoiceConenction - Removed Ready - Use Status instead - Removed Debug - already deprecated - Removed ChangeChannel - Use Session.VoiceStateUpdate - Removed Close - Kill is alternative, but why not use Disconnect? - Packet - Removed Type - Separated to Flags and PayloadType --- discord.go | 36 +- events.go | 6 +- examples/airhorn/main.go | 9 +- examples/voice_receive/main.go | 12 +- structs.go | 3 - voice.go | 1090 +++++++++++++++++--------------- wsapi.go | 104 ++- 7 files changed, 657 insertions(+), 603 deletions(-) diff --git a/discord.go b/discord.go index 4498c0758..a4f4bbd11 100644 --- a/discord.go +++ b/discord.go @@ -26,28 +26,30 @@ const VERSION = "0.28.1" // New creates a new Discord session with provided token. // If the token is for a bot, it must be prefixed with "Bot " -// e.g. "Bot ..." +// +// e.g. "Bot ..." +// // Or if it is an OAuth2 token, it must be prefixed with "Bearer " -// e.g. "Bearer ..." +// +// e.g. "Bearer ..." func New(token string) (s *Session, err error) { // Create an empty Session interface. s = &Session{ - State: NewState(), - Ratelimiter: NewRatelimiter(), - StateEnabled: true, - Compress: true, - ShouldReconnectOnError: true, - ShouldReconnectVoiceOnSessionError: true, - ShouldRetryOnRateLimit: true, - ShardID: 0, - ShardCount: 1, - MaxRestRetries: 3, - Client: &http.Client{Timeout: (20 * time.Second)}, - Dialer: websocket.DefaultDialer, - UserAgent: "DiscordBot (https://github.com/bwmarrin/discordgo, v" + VERSION + ")", - sequence: new(int64), - LastHeartbeatAck: time.Now().UTC(), + State: NewState(), + Ratelimiter: NewRatelimiter(), + StateEnabled: true, + Compress: true, + ShouldReconnectOnError: true, + ShouldRetryOnRateLimit: true, + ShardID: 0, + ShardCount: 1, + MaxRestRetries: 3, + Client: &http.Client{Timeout: (20 * time.Second)}, + Dialer: websocket.DefaultDialer, + UserAgent: "DiscordBot (https://github.com/bwmarrin/discordgo, v" + VERSION + ")", + sequence: new(int64), + LastHeartbeatAck: time.Now().UTC(), } // Initialize the Identify Package with defaults diff --git a/events.go b/events.go index 6a410fe35..02915a341 100644 --- a/events.go +++ b/events.go @@ -338,9 +338,9 @@ type UserUpdate struct { // VoiceServerUpdate is the data for a VoiceServerUpdate event. type VoiceServerUpdate struct { - Token string `json:"token"` - GuildID string `json:"guild_id"` - Endpoint string `json:"endpoint"` + Token string `json:"token"` + GuildID string `json:"guild_id"` + Endpoint *string `json:"endpoint"` } // VoiceStateUpdate is the data for a VoiceStateUpdate event. diff --git a/examples/airhorn/main.go b/examples/airhorn/main.go index 646f523f8..332f20b03 100644 --- a/examples/airhorn/main.go +++ b/examples/airhorn/main.go @@ -1,6 +1,7 @@ package main import ( + "context" "encoding/binary" "flag" "fmt" @@ -186,7 +187,9 @@ func loadSound() error { func playSound(s *discordgo.Session, guildID, channelID string) (err error) { // Join the provided voice channel. - vc, err := s.ChannelVoiceJoin(guildID, channelID, false, true) + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + vc, err := s.ChannelVoiceJoin(ctx, guildID, channelID, false, false) + cancel() if err != nil { return err } @@ -209,7 +212,9 @@ func playSound(s *discordgo.Session, guildID, channelID string) (err error) { time.Sleep(250 * time.Millisecond) // Disconnect from the provided voice channel. - vc.Disconnect() + ctx, cancel = context.WithTimeout(context.Background(), 10*time.Second) + vc.Disconnect(ctx) + cancel() return nil } diff --git a/examples/voice_receive/main.go b/examples/voice_receive/main.go index e5a925294..fbea9f38c 100644 --- a/examples/voice_receive/main.go +++ b/examples/voice_receive/main.go @@ -1,6 +1,7 @@ package main import ( + "context" "flag" "fmt" "time" @@ -30,7 +31,7 @@ func createPionRTPPacket(p *discordgo.Packet) *rtp.Packet { Header: rtp.Header{ Version: 2, // Taken from Discord voice docs - PayloadType: 0x78, + PayloadType: p.PayloadType, SequenceNumber: p.Sequence, Timestamp: p.Timestamp, SSRC: p.SSRC, @@ -39,7 +40,7 @@ func createPionRTPPacket(p *discordgo.Packet) *rtp.Packet { } } -func handleVoice(c chan *discordgo.Packet) { +func handleVoice(c <-chan *discordgo.Packet) { files := make(map[uint32]media.Writer) for p := range c { file, ok := files[p.SSRC] @@ -83,7 +84,7 @@ func main() { return } - v, err := s.ChannelVoiceJoin(GuildID, ChannelID, true, false) + v, err := s.ChannelVoiceJoin(context.Background(), GuildID, ChannelID, true, false) if err != nil { fmt.Println("failed to join voice channel:", err) return @@ -91,9 +92,10 @@ func main() { go func() { time.Sleep(10 * time.Second) - close(v.OpusRecv) - v.Close() + v.Disconnect(context.Background()) }() handleVoice(v.OpusRecv) + + fmt.Println("exiting...") } diff --git a/structs.go b/structs.go index 23dc4790f..0cf92b30f 100644 --- a/structs.go +++ b/structs.go @@ -42,9 +42,6 @@ type Session struct { // Should the session reconnect the websocket on errors. ShouldReconnectOnError bool - // Should voice connections reconnect on a session reconnect. - ShouldReconnectVoiceOnSessionError bool - // Should the session retry requests when rate limited. ShouldRetryOnRateLimit bool diff --git a/voice.go b/voice.go index e9c89bee1..edf14a59a 100644 --- a/voice.go +++ b/voice.go @@ -10,63 +10,85 @@ package discordgo import ( + "context" + "crypto/aes" + "crypto/cipher" "encoding/binary" "encoding/json" + "errors" "fmt" "net" "strconv" - "strings" "sync" + "sync/atomic" "time" "github.com/gorilla/websocket" - "golang.org/x/crypto/nacl/secretbox" + "golang.org/x/crypto/chacha20poly1305" ) // ------------------------------------------------------------------------------------------------ // Code related to both VoiceConnection Websocket and UDP connections. // ------------------------------------------------------------------------------------------------ +type VoiceConnectionStatus int + +const ( + VoiceConnectionStatusInvalid VoiceConnectionStatus = iota // status not specified, bug? + VoiceConnectionStatusNew // initiating connection + VoiceConnectionStatusConnecting // connecting websocket and udp + VoiceConnectionStatusReady // ready to send/receive audio + VoiceConnectionStatusDead // already dead(error or disconnected normally) +) + // A VoiceConnection struct holds all the data and functions related to a Discord Voice Connection. type VoiceConnection struct { - sync.RWMutex - - Debug bool // If true, print extra logging -- DEPRECATED - LogLevel int - Ready bool // If true, voice is ready to send/receive audio - UserID string - GuildID string - ChannelID string - deaf bool - mute bool - speaking bool - reconnecting bool // If true, voice connection is trying to reconnect - - OpusSend chan []byte // Chan for sending opus audio - OpusRecv chan *Packet // Chan for receiving opus audio - - wsConn *websocket.Conn - wsMutex sync.Mutex - udpConn *net.UDPConn - session *Session + Cond *sync.Cond - sessionID string - token string - endpoint string + // Status of this connection. Please don't change. + // New -> Connecting <-> Ready + // any -> Dead + Status VoiceConnectionStatus - // Used to send a close signal to goroutines - close chan struct{} + // Closed if this VoiceConection status become Dead + Dead <-chan struct{} + dead chan struct{} - // Used to allow blocking until connected - connected chan bool + // contains unrecoverable error + // if not nil, Status should be Dead + Err error - // Used to pass the sessionid from onVoiceStateUpdate - // sessionRecv chan string UNUSED ATM + LogLevel int + GuildID string + + deaf bool + mute bool + speaking bool + + OpusSend chan []byte // Chan for sending opus audio, automatically closed after dead, DON'T CLOSE YOURSELF + OpusRecv chan *Packet // Chan for receiving opus audio, automatically closed after dead, DON'T CLOSE YOURSELF + + // can be nil, use only for send message + // mostly this is available connection or nil, but rarely closed connection + wsConn *websocket.Conn + + // calling this may close websocket and all related connection. + wsCancel context.CancelFunc + + udpConn *net.UDPConn + + session *Session + + sessionID string - op4 voiceOP4 op2 voiceOP2 + op4 voiceOP4 + + cipher cipher.AEAD voiceSpeakingUpdateHandlers []VoiceSpeakingUpdateHandler + + seqAck atomic.Int32 // for heartbeat and resume } // VoiceSpeakingUpdateHandler type provides a function definition for the @@ -91,17 +113,15 @@ func (v *VoiceConnection) Speaking(b bool) (err error) { Data voiceSpeakingData `json:"d"` } + v.Cond.L.Lock() + defer v.Cond.L.Unlock() if v.wsConn == nil { return fmt.Errorf("no VoiceConnection websocket") } - data := voiceSpeakingOp{5, voiceSpeakingData{b, 0}} - v.wsMutex.Lock() err = v.wsConn.WriteJSON(data) - v.wsMutex.Unlock() - v.Lock() - defer v.Unlock() + v.Cond.Broadcast() if err != nil { v.speaking = false v.log(LogError, "Speaking() write json error, %s", err) @@ -113,111 +133,56 @@ func (v *VoiceConnection) Speaking(b bool) (err error) { return } -// ChangeChannel sends Discord a request to change channels within a Guild -// !!! NOTE !!! This function may be removed in favour of just using ChannelVoiceJoin -func (v *VoiceConnection) ChangeChannel(channelID string, mute, deaf bool) (err error) { +// Disconnect requests disconnect from this voice channel and wait for disconencted +func (v *VoiceConnection) Disconnect(ctx context.Context) error { v.log(LogInformational, "called") - data := voiceChannelJoinOp{4, voiceChannelJoinData{&v.GuildID, &channelID, mute, deaf}} - v.session.wsMutex.Lock() - err = v.session.wsConn.WriteJSON(data) - v.session.wsMutex.Unlock() + err := v.session.VoiceStateUpdate(v.GuildID, "", true, true) if err != nil { - return + return err } - v.ChannelID = channelID - v.deaf = deaf - v.mute = mute - v.speaking = false - return + return v.waitUntilStatus(ctx, VoiceConnectionStatusDead) } -// Disconnect disconnects from this voice channel and closes the websocket -// and udp connections to Discord. -func (v *VoiceConnection) Disconnect() (err error) { - - // Send a OP4 with a nil channel to disconnect - v.Lock() - if v.sessionID != "" { - data := voiceChannelJoinOp{4, voiceChannelJoinData{&v.GuildID, nil, true, true}} - v.session.wsMutex.Lock() - err = v.session.wsConn.WriteJSON(data) - v.session.wsMutex.Unlock() - v.sessionID = "" - } - v.Unlock() - - // Close websocket and udp connections - v.Close() +// Stop all goroutines related to this voice conection, remove self from Session, and set status to dead. +// NOTE: unlock before calling this +func (v *VoiceConnection) Kill() { - v.log(LogInformational, "Deleting VoiceConnection %s", v.GuildID) + v.log(LogInformational, "called") v.session.Lock() - delete(v.session.VoiceConnections, v.GuildID) + if v.session.VoiceConnections[v.GuildID] == v { + delete(v.session.VoiceConnections, v.GuildID) + } v.session.Unlock() - - return -} - -// Close closes the voice ws and udp connections -func (v *VoiceConnection) Close() { - - v.log(LogInformational, "called") - - v.Lock() - defer v.Unlock() - - v.Ready = false - v.speaking = false - - if v.close != nil { - v.log(LogInformational, "closing v.close") - close(v.close) - v.close = nil + v.Cond.L.Lock() + defer v.Cond.L.Unlock() + if v.wsCancel != nil { + v.wsCancel() } - - if v.udpConn != nil { - v.log(LogInformational, "closing udp") - err := v.udpConn.Close() - if err != nil { - v.log(LogError, "error closing udp connection, %s", err) - } - v.udpConn = nil + if v.Status != VoiceConnectionStatusDead { + v.Status = VoiceConnectionStatusDead + v.Cond.Broadcast() + close(v.dead) + go func() { + time.Sleep(100 * time.Millisecond) // safe + close(v.OpusRecv) + close(v.OpusSend) + }() } - if v.wsConn != nil { - v.log(LogInformational, "sending close frame") - - // To cleanly close a connection, a client should send a close - // frame and wait for the server to close the connection. - v.wsMutex.Lock() - err := v.wsConn.WriteMessage(websocket.CloseMessage, websocket.FormatCloseMessage(websocket.CloseNormalClosure, "")) - v.wsMutex.Unlock() - if err != nil { - v.log(LogError, "error closing websocket, %s", err) - } - - // TODO: Wait for Discord to actually close the connection. - time.Sleep(1 * time.Second) - - v.log(LogInformational, "closing websocket") - err = v.wsConn.Close() - if err != nil { - v.log(LogError, "error closing websocket, %s", err) - } - - v.wsConn = nil - } + v.log(LogInformational, "done") } // AddHandler adds a Handler for VoiceSpeakingUpdate events. func (v *VoiceConnection) AddHandler(h VoiceSpeakingUpdateHandler) { - v.Lock() - defer v.Unlock() + v.Cond.L.Lock() + defer v.Cond.L.Unlock() v.voiceSpeakingUpdateHandlers = append(v.voiceSpeakingUpdateHandlers, h) + v.Cond.Broadcast() } // VoiceSpeakingUpdate is a struct for a VoiceSpeakingUpdate event. @@ -231,314 +196,492 @@ type VoiceSpeakingUpdate struct { // Unexported Internal Functions Below. // ------------------------------------------------------------------------------------------------ -// A voiceOP4 stores the data for the voice operation 4 websocket event -// which provides us with the NaCl SecretBox encryption key -type voiceOP4 struct { - SecretKey [32]byte `json:"secret_key"` - Mode string `json:"mode"` +// unrecoverable error handling +// VoiceConnection should be unlocked before calling this +func (v *VoiceConnection) failure(err error) { + v.log(LogError, "voice unrecoverable error, %v", err.Error()) + v.log(LogDebug, "voice struct: %#v\n", v) + v.Cond.L.Lock() + if v.Err == nil { + v.Status = VoiceConnectionStatusDead + v.Err = err + v.Cond.Broadcast() + } + v.Cond.L.Unlock() + // cleanup + v.Kill() + v.Disconnect(context.Background()) +} + +// voiceWebsocketMessage is basic message struct of voice websocket +type voiceWebsocketMessage struct { + Operation int `json:"op"` + RawData json.RawMessage `json:"d"` + Sequence *int32 `json:"seq"` } // A voiceOP2 stores the data for the voice operation 2 websocket event // which is sort of like the voice READY packet type voiceOP2 struct { - SSRC uint32 `json:"ssrc"` - Port int `json:"port"` - Modes []string `json:"modes"` - HeartbeatInterval time.Duration `json:"heartbeat_interval"` - IP string `json:"ip"` + SSRC uint32 `json:"ssrc"` + Port int `json:"port"` + Modes []string `json:"modes"` + IP string `json:"ip"` +} + +// A voiceOP4 stores the data for the voice operation 4 websocket event +// which provides us with the NaCl SecretBox encryption key +type voiceOP4 struct { + SecretKey []byte `json:"secret_key"` + Mode string `json:"mode"` +} + +// A voiceOP8 stores the data for the voice operation 8 websocket event HELLO +type voiceOP8 struct { + HeartbeatInterval int `json:"heartbeat_interval"` } // WaitUntilConnected waits for the Voice Connection to // become ready, if it does not become ready it returns an err -func (v *VoiceConnection) waitUntilConnected() error { - +func (v *VoiceConnection) waitUntilStatus(ctx context.Context, status VoiceConnectionStatus) error { v.log(LogInformational, "called") - i := 0 - for { - v.RLock() - ready := v.Ready - v.RUnlock() - if ready { - return nil - } + ch := make(chan error) - if i > 10 { - return fmt.Errorf("timeout waiting for voice") + go func() { + defer close(ch) + v.Cond.L.Lock() + defer v.Cond.L.Unlock() + for v.Status != status && v.Status != VoiceConnectionStatusDead { + select { + case <-ctx.Done(): + return + default: + } + v.Cond.Wait() } + ch <- v.Err + }() - time.Sleep(1 * time.Second) - i++ + select { + case err := <-ch: + return err + case <-ctx.Done(): + return ctx.Err() } + } -// Open opens a voice connection. This should be called -// after VoiceChannelJoin is used and the data VOICE websocket events -// are captured. -func (v *VoiceConnection) open() (err error) { +// onVoiceServerUpdate handles a VOICE_SERVER_UPDATE event of main gateway. +// wait for VOICE_SERVER_UPDATE and open voice websocket connection. +func (v *VoiceConnection) onVoiceServerUpdate(ev *VoiceServerUpdate) (err error) { v.log(LogInformational, "called") - v.Lock() - defer v.Unlock() + v.Cond.L.Lock() + defer v.Cond.L.Unlock() - // Don't open a websocket if one is already open - if v.wsConn != nil { - v.log(LogWarning, "refusing to overwrite non-nil websocket") + // Close a websocket if one is already open + if v.wsCancel != nil { + v.wsCancel() + } + + // If no endpoint, just wait for next event + if ev.Endpoint == nil { return } - // TODO temp? loop to wait for the SessionID - i := 0 - for { - if v.sessionID != "" { - break - } + go v.websocket(context.TODO(), *ev.Endpoint, ev.Token) - if i > 20 { // only loop for up to 1 second total - return fmt.Errorf("did not receive voice Session ID in time") - } - // Release the lock, so sessionID can be populated upon receiving a VoiceStateUpdate event. - v.Unlock() - time.Sleep(50 * time.Millisecond) - i++ - v.Lock() - } + return +} - // Connect to VoiceConnection Websocket - vg := "wss://" + strings.TrimSuffix(v.endpoint, ":80") - v.log(LogInformational, "connecting to voice endpoint %s", vg) - v.wsConn, _, err = v.session.Dialer.Dial(vg, nil) - if err != nil { - v.log(LogWarning, "error connecting to voice endpoint %s, %s", vg, err) - v.log(LogDebug, "voice struct: %#v\n", v) - return - } +var ErrVoiceNoSessionID = errors.New("did not receive voice Session ID in time") +var ErrVoiceReconnectionLimit = errors.New("reconnection limit reached") +var ErrVoiceUnknownEncryptionMode = errors.New("unknown encryption mode") - type voiceHandshakeData struct { - ServerID string `json:"server_id"` - UserID string `json:"user_id"` - SessionID string `json:"session_id"` - Token string `json:"token"` - } - type voiceHandshakeOp struct { - Op int `json:"op"` // Always 0 - Data voiceHandshakeData `json:"d"` +// websocket open the voice websocket, handle reconnect, and listens on it for messages and passes them to the voice event handler. +// This is automatically called by the Open func. +func (v *VoiceConnection) websocket(ctx context.Context, endpoint string, token string) { + + v.log(LogInformational, "called") + + ctx, cancel := context.WithCancel(ctx) + defer cancel() + + v.Cond.L.Lock() + // Close a websocket if one is already open + if v.wsCancel != nil { + v.wsCancel() } - data := voiceHandshakeOp{0, voiceHandshakeData{v.GuildID, v.UserID, v.sessionID, v.token}} + v.wsCancel = cancel + v.Cond.L.Unlock() + + sessionIdDone := make(chan struct{}) + go func() { + v.Cond.L.Lock() + defer v.Cond.L.Unlock() + for v.sessionID == "" { + v.Cond.Wait() + } + close(sessionIdDone) + }() + timeout := time.NewTimer(1 * time.Second) - v.wsMutex.Lock() - err = v.wsConn.WriteJSON(data) - v.wsMutex.Unlock() - if err != nil { - v.log(LogWarning, "error sending init packet, %s", err) + select { + case <-sessionIdDone: + case <-timeout.C: + v.failure(ErrVoiceNoSessionID) return } - v.close = make(chan struct{}) - go v.wsListen(v.wsConn, v.close) + // avoid resource leak before Go 1.23 + if !timeout.Stop() { + <-timeout.C + } - // add loop/check for Ready bool here? - // then return false if not ready? - // but then wsListen will also err. + v.seqAck.Store(-1) - return -} + for i := 0; i < 100; i++ { + select { + case <-ctx.Done(): + return + default: + } -// wsListen listens on the voice websocket for messages and passes them -// to the voice event handler. This is automatically called by the Open func -func (v *VoiceConnection) wsListen(wsConn *websocket.Conn, close <-chan struct{}) { + ctx, cancel := context.WithCancel(ctx) + defer cancel() // this cancel() is not needed actually, but do it to suppress lint warning - v.log(LogInformational, "called") + v.Cond.L.Lock() + v.Status = VoiceConnectionStatusConnecting + v.Cond.Broadcast() + v.Cond.L.Unlock() - for { - _, message, err := v.wsConn.ReadMessage() + vg := "wss://" + endpoint + "?v=8" + v.log(LogInformational, "connecting to voice endpoint %s", vg) + wsConn, _, err := v.session.Dialer.Dial(vg, nil) if err != nil { - // 4014 indicates a manual disconnection by someone in the guild; - // we shouldn't reconnect. - if websocket.IsCloseError(err, 4014) { - v.log(LogInformational, "received 4014 manual disconnection") + err = fmt.Errorf("error connecting to voice endpoint %s, %w", vg, err) + v.failure(err) + return + } + go func() { + <-ctx.Done() + // don't do graceful closing of websocket because it's not needed + err := wsConn.Close() + v.log(LogDebug, "closed voice websocket due to context done: %v", err) + }() + + v.Cond.L.Lock() + v.wsConn = wsConn + v.Cond.L.Unlock() + + if i == 0 { + type voiceHandshakeData struct { + ServerID string `json:"server_id"` + UserID string `json:"user_id"` + SessionID string `json:"session_id"` + Token string `json:"token"` + MaxDAVEProtocolVersion int `json:"max_dave_protocol_version"` + } + type voiceHandshakeOp struct { + Op int `json:"op"` // Always 0 + Data voiceHandshakeData `json:"d"` + } + data := voiceHandshakeOp{0, voiceHandshakeData{ + ServerID: v.GuildID, + UserID: v.session.State.User.ID, + SessionID: v.sessionID, + Token: token, + MaxDAVEProtocolVersion: 0, // TODO: implement DAVE + }} + + err = wsConn.WriteJSON(data) + if err != nil { + err = fmt.Errorf("error sending identify packet, %w", err) + v.failure(err) + return + } + } else { + type voiceResumeData struct { + ServerID string `json:"server_id"` + SessionID string `json:"session_id"` + Token string `json:"token"` + SeqAck int32 `json:"seq_ack"` + } + type voiceResumeOp struct { + Op int `json:"op"` // Always 7 + Data voiceResumeData `json:"d"` + } + data := voiceResumeOp{7, voiceResumeData{ + ServerID: v.GuildID, + SessionID: v.sessionID, + Token: token, + SeqAck: v.seqAck.Load(), + }} + + v.log(LogInformational, "resuming voice websocket") + v.log(LogDebug, "resume packet, %#v", data) + + err = wsConn.WriteJSON(data) + if err != nil { + err = fmt.Errorf("error sending resume packet, %w", err) + v.failure(err) + return + } + + // reopen UDP connection because WebSocket broken likely meaning UDP broken too. + err = v.udpOpen(ctx) + if err != nil { + err = fmt.Errorf("failed to resume UDP connection, %w", err) + v.failure(err) + return + } + } + for { + select { + case <-ctx.Done(): + return + default: + } + + messageType, message, err := wsConn.ReadMessage() + if err != nil { // Abandon the voice WS connection - v.Lock() - v.wsConn = nil - v.Unlock() - - // Wait for VOICE_SERVER_UPDATE. - // When the bot is moved by the user to another voice channel, - // VOICE_SERVER_UPDATE is received after the code 4014. - for i := 0; i < 5; i++ { // TODO: temp, wait for VoiceServerUpdate. - <-time.After(1 * time.Second) - - v.RLock() - reconnected := v.wsConn != nil - v.RUnlock() - if !reconnected { - continue - } - v.log(LogInformational, "successfully reconnected after 4014 manual disconnection") - return + v.Cond.L.Lock() + if v.wsConn == wsConn { + v.wsConn = nil } + v.Cond.Broadcast() + v.Cond.L.Unlock() - // When VOICE_SERVER_UPDATE is not received, disconnect as usual. - v.log(LogInformational, "disconnect due to 4014 manual disconnection") + select { + case <-ctx.Done(): + return + default: + } - v.session.Lock() - delete(v.session.VoiceConnections, v.GuildID) - v.session.Unlock() + // 4014 indicates a manual disconnection by someone in the guild; + // we shouldn't reconnect. + if websocket.IsCloseError(err, 4014) { + v.log(LogInformational, "received 4014 manual disconnection") - v.Close() + return + } - return - } + // 4015 indicates that voice server crashed so we should reconnect. + // Other code is our bad, should never happen, we stop reconnecting to avoid loop. + if websocket.IsUnexpectedCloseError(err, 4015) { + err := fmt.Errorf("voice websocket closed, %w", err) + v.failure(err) + return + } - // Detect if we have been closed manually. If a Close() has already - // happened, the websocket we are listening on will be different to the - // current session. - v.RLock() - sameConnection := v.wsConn == wsConn - v.RUnlock() - if sameConnection { + v.log(LogInformational, "voice socket disconnected, reconnecting, %v", err) - v.log(LogError, "voice endpoint %s websocket closed unexpectantly, %s", v.endpoint, err) + // close goroutine related to websocket + cancel() - // Start reconnect goroutine then exit. - go v.reconnect() + // reconnect + break } - return - } - // Pass received message to voice event handler - select { - case <-close: - return - default: - go v.onEvent(message) + // Pass received message to voice event handler + go v.onEvent(ctx, messageType == websocket.BinaryMessage, message) } } + + v.failure(ErrVoiceReconnectionLimit) } // wsEvent handles any voice websocket events. This is only called by the // wsListen() function. -func (v *VoiceConnection) onEvent(message []byte) { - - v.log(LogDebug, "received: %s", string(message)) +func (v *VoiceConnection) onEvent(ctx context.Context, binary bool, message []byte) { - var e Event - if err := json.Unmarshal(message, &e); err != nil { - v.log(LogError, "unmarshall error, %s", err) - return + if binary { + v.log(LogDebug, "received binary: %x", message) + } else { + v.log(LogDebug, "received string: %s", string(message)) } - switch e.Operation { - - case 2: // READY + if binary { + // TODO: implement DAVE + } else { - if err := json.Unmarshal(e.RawData, &v.op2); err != nil { - v.log(LogError, "OP2 unmarshall error, %s, %s", err, string(e.RawData)) + var e voiceWebsocketMessage + if err := json.Unmarshal(message, &e); err != nil { + v.log(LogError, "unmarshall error, %s", err) return } - // Start the voice websocket heartbeat to keep the connection alive - go v.wsHeartbeat(v.wsConn, v.close, v.op2.HeartbeatInterval) - // TODO monitor a chan/bool to verify this was successful + if e.Sequence != nil { + v.seqAck.Store(*e.Sequence) + } + + switch e.Operation { + + case 2: // READY + op2 := voiceOP2{} + + if err := json.Unmarshal(e.RawData, &op2); err != nil { + err := fmt.Errorf("OP2 unmarshal error, %s, %s", err, string(e.RawData)) + v.failure(err) + return + } + + v.Cond.L.Lock() + v.op2 = op2 + v.Cond.Broadcast() + v.Cond.L.Unlock() + + // Start the UDP connection + err := v.udpOpen(ctx) + if err != nil { + err := fmt.Errorf("error opening udp connection, %s", err) + v.failure(err) + return + } - // Start the UDP connection - err := v.udpOpen() - if err != nil { - v.log(LogError, "error opening udp connection, %s", err) return - } - // Start the opusSender. - // TODO: Should we allow 48000/960 values to be user defined? - if v.OpusSend == nil { - v.OpusSend = make(chan []byte, 2) - } - go v.opusSender(v.udpConn, v.close, v.OpusSend, 48000, 960) + case 4: // udp encryption secret key + op4 := voiceOP4{} + if err := json.Unmarshal(e.RawData, &op4); err != nil { + err := fmt.Errorf("OP4 unmarshal error, %s, %s", err, string(e.RawData)) + v.failure(err) + return + } - // Start the opusReceiver - if !v.deaf { - if v.OpusRecv == nil { - v.OpusRecv = make(chan *Packet, 2) + v.Cond.L.Lock() + v.op4 = op4 + switch op4.Mode { + case "aead_aes256_gcm_rtpsize": + block, err := aes.NewCipher(op4.SecretKey) + if err != nil { + v.Cond.L.Unlock() + v.failure(err) + return + } + v.cipher, err = cipher.NewGCM(block) + if err != nil { + v.Cond.L.Unlock() + v.failure(err) + return + } + case "aead_xchacha20_poly1305_rtpsize": + var err error + v.cipher, err = chacha20poly1305.NewX(op4.SecretKey) + if err != nil { + v.Cond.L.Unlock() + v.failure(err) + return + } + default: + err := fmt.Errorf("%w: %s", ErrVoiceUnknownEncryptionMode, op4.Mode) + v.Cond.L.Unlock() + v.failure(err) + return } - go v.opusReceiver(v.udpConn, v.close, v.OpusRecv) - } + // Start the opusSender. + // TODO: Should we allow 48000/960 values to be user defined? + if v.OpusSend == nil { + v.OpusSend = make(chan []byte, 2) + } + go v.opusSender(ctx, 48000, 960) - return + // Start the opusReceiver + if !v.deaf { + if v.OpusRecv == nil { + v.OpusRecv = make(chan *Packet, 2) + } - case 3: // HEARTBEAT response - // add code to use this to track latency? - return + go v.opusReceiver(ctx) + } - case 4: // udp encryption secret key - v.Lock() - defer v.Unlock() + v.Status = VoiceConnectionStatusReady - v.op4 = voiceOP4{} - if err := json.Unmarshal(e.RawData, &v.op4); err != nil { - v.log(LogError, "OP4 unmarshall error, %s, %s", err, string(e.RawData)) + v.Cond.Broadcast() + v.Cond.L.Unlock() return - } - return - case 5: - if len(v.voiceSpeakingUpdateHandlers) == 0 { + case 5: + if len(v.voiceSpeakingUpdateHandlers) == 0 { + return + } + + voiceSpeakingUpdate := &VoiceSpeakingUpdate{} + if err := json.Unmarshal(e.RawData, voiceSpeakingUpdate); err != nil { + v.log(LogError, "OP5 unmarshall error, %s, %s", err, string(e.RawData)) + return + } + + for _, h := range v.voiceSpeakingUpdateHandlers { + h(v, voiceSpeakingUpdate) + } + + case 6: // HEARTBEAT response + // add code to use this to track latency? + v.log(LogDebug, "recieved heartbeat ACK") return - } - voiceSpeakingUpdate := &VoiceSpeakingUpdate{} - if err := json.Unmarshal(e.RawData, voiceSpeakingUpdate); err != nil { - v.log(LogError, "OP5 unmarshall error, %s, %s", err, string(e.RawData)) + case 8: // HELLO + + op8 := voiceOP8{} + + if err := json.Unmarshal(e.RawData, &op8); err != nil { + v.log(LogError, "OP6 unmarshall error, %s, %s", err, string(e.RawData)) + return + } + // Start the voice websocket heartbeat to keep the connection alive + go v.wsHeartbeat(ctx, v.wsConn, op8.HeartbeatInterval) + + case 9: // resumed + v.log(LogInformational, "resumed voice websocket") return - } - for _, h := range v.voiceSpeakingUpdateHandlers { - h(v, voiceSpeakingUpdate) + default: + v.log(LogDebug, "unknown voice operation, %d, %s", e.Operation, string(e.RawData)) } - - default: - v.log(LogDebug, "unknown voice operation, %d, %s", e.Operation, string(e.RawData)) } - return } type voiceHeartbeatOp struct { - Op int `json:"op"` // Always 3 - Data int `json:"d"` + Op int `json:"op"` // Always 3 + Data voiceHeartbeatData `json:"d"` +} + +type voiceHeartbeatData struct { + T int64 `json:"t"` + SeqAck int32 `json:"seq_ack"` } -// NOTE :: When a guild voice server changes how do we shut this down -// properly, so a new connection can be setup without fuss? -// // wsHeartbeat sends regular heartbeats to voice Discord so it knows the client // is still connected. If you do not send these heartbeats Discord will // disconnect the websocket connection after a few seconds. -func (v *VoiceConnection) wsHeartbeat(wsConn *websocket.Conn, close <-chan struct{}, i time.Duration) { +func (v *VoiceConnection) wsHeartbeat(ctx context.Context, wsConn *websocket.Conn, interval int) { - if close == nil || wsConn == nil { + if wsConn == nil { return } var err error - ticker := time.NewTicker(i * time.Millisecond) + ticker := time.NewTicker(time.Duration(interval) * time.Millisecond) defer ticker.Stop() for { v.log(LogDebug, "sending heartbeat packet") - v.wsMutex.Lock() - err = wsConn.WriteJSON(voiceHeartbeatOp{3, int(time.Now().Unix())}) - v.wsMutex.Unlock() + err = wsConn.WriteJSON(voiceHeartbeatOp{3, voiceHeartbeatData{time.Now().Unix(), v.seqAck.Load()}}) if err != nil { - v.log(LogError, "error sending heartbeat to voice endpoint %s, %s", v.endpoint, err) + v.log(LogError, "error sending heartbeat to voice endpoint, %s", err) return } select { case <-ticker.C: // continue loop and send heartbeat - case <-close: + case <-ctx.Done(): return } } @@ -551,7 +694,7 @@ func (v *VoiceConnection) wsHeartbeat(wsConn *websocket.Conn, close <-chan struc type voiceUDPData struct { Address string `json:"address"` // Public IP of machine running this code Port uint16 `json:"port"` // UDP Port of machine running this code - Mode string `json:"mode"` // always "xsalsa20_poly1305" + Mode string `json:"mode"` // Encryption mode } type voiceUDPD struct { @@ -568,26 +711,9 @@ type voiceUDPOp struct { // initial required handshake. This connection is left open in the session // and can be used to send or receive audio. This should only be called // from voice.wsEvent OP2 -func (v *VoiceConnection) udpOpen() (err error) { - - v.Lock() - defer v.Unlock() +func (v *VoiceConnection) udpOpen(ctx context.Context) (err error) { - if v.wsConn == nil { - return fmt.Errorf("nil voice websocket") - } - - if v.udpConn != nil { - return fmt.Errorf("udp connection already open") - } - - if v.close == nil { - return fmt.Errorf("nil close channel") - } - - if v.endpoint == "" { - return fmt.Errorf("empty endpoint") - } + v.Cond.L.Lock() host := v.op2.IP + ":" + strconv.Itoa(v.op2.Port) addr, err := net.ResolveUDPAddr("udp", host) @@ -597,12 +723,24 @@ func (v *VoiceConnection) udpOpen() (err error) { } v.log(LogInformational, "connecting to udp addr %s", addr.String()) - v.udpConn, err = net.DialUDP("udp", nil, addr) + udpConn, err := net.DialUDP("udp", nil, addr) if err != nil { v.log(LogWarning, "error connecting to udp addr %s, %s", addr.String(), err) return } + v.udpConn = udpConn + + v.Cond.Broadcast() + v.Cond.L.Unlock() + + // close if context done + go func() { + <-ctx.Done() + err := udpConn.Close() + v.log(LogDebug, "closed voice UDP due to context done, %v", err) + }() + // Create a 74 byte array to store the packet data sb := make([]byte, 74) binary.BigEndian.PutUint16(sb, 1) // Packet type (0x1 is request, 0x2 is response) @@ -636,28 +774,43 @@ func (v *VoiceConnection) udpOpen() (err error) { var ip string for i := 8; i < len(rb)-2; i++ { if rb[i] == 0 { - break + ip = string(rb[8:i]) } - ip += string(rb[i]) } // Grab port from position 72 and 73 port := binary.BigEndian.Uint16(rb[len(rb)-2:]) + encryptionMode := "" +encryptionModeLoop: + for _, mode := range v.op2.Modes { + switch mode { + case "aead_aes256_gcm_rtpsize": + encryptionMode = mode + break encryptionModeLoop // prefer + case "aead_xchacha20_poly1305_rtpsize": + encryptionMode = mode + } + } + // Take the data from above and send it back to Discord to finalize // the UDP connection handshake. - data := voiceUDPOp{1, voiceUDPD{"udp", voiceUDPData{ip, port, "xsalsa20_poly1305"}}} + data := voiceUDPOp{1, voiceUDPD{"udp", voiceUDPData{ip, port, encryptionMode}}} - v.wsMutex.Lock() - err = v.wsConn.WriteJSON(data) - v.wsMutex.Unlock() + v.Cond.L.Lock() + wsConn := v.wsConn + v.Cond.L.Unlock() + if wsConn == nil { + return + } + err = wsConn.WriteJSON(data) if err != nil { - v.log(LogWarning, "udp write error, %#v, %s", data, err) + v.log(LogWarning, "udpop write error, %#v, %s", data, err) return } // start udpKeepAlive - go v.udpKeepAlive(v.udpConn, v.close, 5*time.Second) + go v.udpKeepAlive(ctx, v.udpConn, 5*time.Second) // TODO: find a way to check that it fired off okay return @@ -665,12 +818,7 @@ func (v *VoiceConnection) udpOpen() (err error) { // udpKeepAlive sends a udp packet to keep the udp connection open // This is still a bit of a "proof of concept" -func (v *VoiceConnection) udpKeepAlive(udpConn *net.UDPConn, close <-chan struct{}, i time.Duration) { - - if udpConn == nil || close == nil { - return - } - +func (v *VoiceConnection) udpKeepAlive(ctx context.Context, udpConn *net.UDPConn, i time.Duration) { var err error var sequence uint64 @@ -692,7 +840,7 @@ func (v *VoiceConnection) udpKeepAlive(udpConn *net.UDPConn, close <-chan struct select { case <-ticker.C: // continue loop and send keepalive - case <-close: + case <-ctx.Done(): return } } @@ -700,29 +848,21 @@ func (v *VoiceConnection) udpKeepAlive(udpConn *net.UDPConn, close <-chan struct // opusSender will listen on the given channel and send any // pre-encoded opus audio to Discord. Supposedly. -func (v *VoiceConnection) opusSender(udpConn *net.UDPConn, close <-chan struct{}, opus <-chan []byte, rate, size int) { +func (v *VoiceConnection) opusSender(ctx context.Context, rate, size int) { - if udpConn == nil || close == nil { - return - } + v.log(LogInformational, "called") - // VoiceConnection is now ready to receive audio packets - // TODO: this needs reviewed as I think there must be a better way. - v.Lock() - v.Ready = true - v.Unlock() - defer func() { - v.Lock() - v.Ready = false - v.Unlock() - }() + v.Cond.L.Lock() + udpConn := v.udpConn + v.Cond.L.Unlock() var sequence uint16 var timestamp uint32 var recvbuf []byte var ok bool udpHeader := make([]byte, 12) - var nonce [24]byte + + var nonce = make([]byte, v.cipher.NonceSize()) // build the parts that don't change in the udpHeader udpHeader[0] = 0x80 @@ -732,22 +872,22 @@ func (v *VoiceConnection) opusSender(udpConn *net.UDPConn, close <-chan struct{} // start a send loop that loops until buf chan is closed ticker := time.NewTicker(time.Millisecond * time.Duration(size/(rate/1000))) defer ticker.Stop() - for { + for i := uint32(0); true; i++ { // Get data from chan. If chan is closed, return. select { - case <-close: + case <-ctx.Done(): return - case recvbuf, ok = <-opus: + case recvbuf, ok = <-v.OpusSend: if !ok { return } // else, continue loop } - v.RLock() + v.Cond.L.Lock() speaking := v.speaking - v.RUnlock() + v.Cond.L.Unlock() if !speaking { err := v.Speaking(true) if err != nil { @@ -760,15 +900,18 @@ func (v *VoiceConnection) opusSender(udpConn *net.UDPConn, close <-chan struct{} binary.BigEndian.PutUint32(udpHeader[4:], timestamp) // encrypt the opus data - copy(nonce[:], udpHeader) - v.RLock() - sendbuf := secretbox.Seal(udpHeader, recvbuf, &nonce, &v.op4.SecretKey) - v.RUnlock() + binary.LittleEndian.PutUint32(nonce, i) + sendbuf := make([]byte, len(udpHeader), len(udpHeader)+len(nonce)+len(recvbuf)+v.cipher.Overhead()) + copy(sendbuf, udpHeader) + v.Cond.L.Lock() + sendbuf = v.cipher.Seal(sendbuf, nonce, recvbuf, udpHeader) + v.Cond.L.Unlock() + sendbuf = append(sendbuf, nonce[:4]...) // block here until we're exactly at the right time :) // Then send rtp audio packet to Discord over UDP select { - case <-close: + case <-ctx.Done(): return case <-ticker.C: // continue @@ -776,68 +919,60 @@ func (v *VoiceConnection) opusSender(udpConn *net.UDPConn, close <-chan struct{} _, err := udpConn.Write(sendbuf) if err != nil { - v.log(LogError, "udp write error, %s", err) - v.log(LogDebug, "voice struct: %#v\n", v) + err := fmt.Errorf("udp write error, %w", err) + v.failure(err) return } - if (sequence) == 0xFFFF { - sequence = 0 - } else { - sequence++ - } - - if (timestamp + uint32(size)) >= 0xFFFFFFFF { - timestamp = 0 - } else { - timestamp += uint32(size) - } + // don't care overflow because it is already defined in Go spec + // https://go.dev/ref/spec#Integer_overflow + sequence++ + timestamp += uint32(size) } } // A Packet contains the headers and content of a received voice packet. type Packet struct { - SSRC uint32 - Sequence uint16 - Timestamp uint32 - Type []byte - Opus []byte - PCM []int16 + Flags byte // first byte of RTP header + PayloadType byte // second byte of RTP header + Sequence uint16 + Timestamp uint32 + SSRC uint32 + CSRC []uint32 + Extension []byte // RTP header extension with extension header, can be nil + Opus []byte } // opusReceiver listens on the UDP socket for incoming packets // and sends them across the given channel // NOTE :: This function may change names later. -func (v *VoiceConnection) opusReceiver(udpConn *net.UDPConn, close <-chan struct{}, c chan *Packet) { +func (v *VoiceConnection) opusReceiver(ctx context.Context) { - if udpConn == nil || close == nil { - return - } + v.log(LogInformational, "called") + + v.Cond.L.Lock() + udpConn := v.udpConn + ch := v.OpusRecv + v.Cond.L.Unlock() recvbuf := make([]byte, 1024) - var nonce [24]byte + var nonce = make([]byte, v.cipher.NonceSize()) for { rlen, err := udpConn.Read(recvbuf) if err != nil { - // Detect if we have been closed manually. If a Close() has already - // happened, the udp connection we are listening on will be different - // to the current session. - v.RLock() - sameConnection := v.udpConn == udpConn - v.RUnlock() - if sameConnection { - - v.log(LogError, "udp read error, %s, %s", v.endpoint, err) - v.log(LogDebug, "voice struct: %#v\n", v) - - go v.reconnect() + select { + case <-ctx.Done(): + return + default: + err := fmt.Errorf("udp read error, %w", err) + v.failure(err) + return } - return } select { - case <-close: + case <-ctx.Done(): return default: // continue loop @@ -850,101 +985,46 @@ func (v *VoiceConnection) opusReceiver(udpConn *net.UDPConn, close <-chan struct // build a audio packet struct p := Packet{} - p.Type = recvbuf[0:2] + p.Flags = recvbuf[0] + p.PayloadType = recvbuf[1] + extentionExist := (p.Flags & 0x10) != 0 // RFC 3550 5.1 + csrcCount := (p.Flags & 0x0f) // RFC 3550 5.1 p.Sequence = binary.BigEndian.Uint16(recvbuf[2:4]) p.Timestamp = binary.BigEndian.Uint32(recvbuf[4:8]) p.SSRC = binary.BigEndian.Uint32(recvbuf[8:12]) + p.CSRC = make([]uint32, csrcCount) + for i := range p.CSRC { + p.CSRC[i] = binary.BigEndian.Uint32(recvbuf[12+4*i : 12+4*(i+1)]) + } + plainLength := 12 + 4*int(csrcCount) + if extentionExist { + plainLength += 4 + } + // decrypt opus data - copy(nonce[:], recvbuf[0:12]) + copy(nonce, recvbuf[rlen-4:rlen]) - if opus, ok := secretbox.Open(nil, recvbuf[12:rlen], &nonce, &v.op4.SecretKey); ok { - p.Opus = opus - } else { + v.Cond.L.Lock() + p.Opus, err = v.cipher.Open(recvbuf[plainLength:plainLength], nonce, recvbuf[plainLength:rlen-4], recvbuf[:plainLength]) + v.Cond.L.Unlock() + if err != nil { + v.log(LogInformational, "failed to open udp packet, %v", err) continue } - // extension bit set, and not a RTCP packet - if ((recvbuf[0] & 0x10) == 0x10) && ((recvbuf[1] & 0x80) == 0) { - // get extended header length - extlen := binary.BigEndian.Uint16(p.Opus[2:4]) - // 4 bytes (ext header header) + 4*extlen (ext header data) - shift := int(4 + 4*extlen) - if len(p.Opus) > shift { - p.Opus = p.Opus[shift:] - } + if extentionExist { + extensionBegin := 12 + 4*int(csrcCount) + extensionLength := binary.BigEndian.Uint16(recvbuf[extensionBegin+2 : extensionBegin+4]) + p.Extension = recvbuf[extensionBegin : extensionBegin+4+int(extensionLength)*4] + p.Opus = p.Opus[int(extensionLength)*4:] } - if c != nil { + if ch != nil { select { - case c <- &p: - case <-close: + case ch <- &p: + case <-ctx.Done(): return } } } } - -// Reconnect will close down a voice connection then immediately try to -// reconnect to that session. -// NOTE : This func is messy and a WIP while I find what works. -// It will be cleaned up once a proven stable option is flushed out. -// aka: this is ugly shit code, please don't judge too harshly. -func (v *VoiceConnection) reconnect() { - - v.log(LogInformational, "called") - - v.Lock() - if v.reconnecting { - v.log(LogInformational, "already reconnecting to channel %s, exiting", v.ChannelID) - v.Unlock() - return - } - v.reconnecting = true - v.Unlock() - - defer func() { - v.Lock() - v.reconnecting = false - v.Unlock() - }() - - // Close any currently open connections - v.Close() - - wait := time.Duration(1) - for { - - <-time.After(wait * time.Second) - wait *= 2 - if wait > 600 { - wait = 600 - } - - if v.session.DataReady == false || v.session.wsConn == nil { - v.log(LogInformational, "cannot reconnect to channel %s with unready session", v.ChannelID) - continue - } - - v.log(LogInformational, "trying to reconnect to channel %s", v.ChannelID) - - _, err := v.session.ChannelVoiceJoin(v.GuildID, v.ChannelID, v.mute, v.deaf) - if err == nil { - v.log(LogInformational, "successfully reconnected to channel %s", v.ChannelID) - return - } - - v.log(LogInformational, "error reconnecting to channel %s, %s", v.ChannelID, err) - - // if the reconnect above didn't work lets just send a disconnect - // packet to reset things. - // Send a OP4 with a nil channel to disconnect - data := voiceChannelJoinOp{4, voiceChannelJoinData{&v.GuildID, nil, true, true}} - v.session.wsMutex.Lock() - err = v.session.wsConn.WriteJSON(data) - v.session.wsMutex.Unlock() - if err != nil { - v.log(LogError, "error sending disconnect packet, %s", err) - } - - } -} diff --git a/wsapi.go b/wsapi.go index d101c542a..2978cd035 100644 --- a/wsapi.go +++ b/wsapi.go @@ -13,11 +13,13 @@ package discordgo import ( "bytes" "compress/zlib" + "context" "encoding/json" "errors" "fmt" "io" "net/http" + "sync" "sync/atomic" "time" @@ -697,16 +699,16 @@ type voiceChannelJoinOp struct { // ChannelVoiceJoin joins the session user to a voice channel. // -// gID : Guild ID of the channel to join. -// cID : Channel ID of the channel to join. -// mute : If true, you will be set to muted upon joining. -// deaf : If true, you will be set to deafened upon joining. -func (s *Session) ChannelVoiceJoin(gID, cID string, mute, deaf bool) (voice *VoiceConnection, err error) { +// gID : Guild ID of the channel to join. +// cID : Channel ID of the channel to join. +// mute : If true, you will be set to muted upon joining. +// deaf : If true, you will be set to deafened upon joining. +func (s *Session) ChannelVoiceJoin(ctx context.Context, gID, cID string, mute, deaf bool) (voice *VoiceConnection, err error) { s.log(LogInformational, "called") s.RLock() - voice, _ = s.VoiceConnections[gID] + voice = s.VoiceConnections[gID] s.RUnlock() if voice == nil { @@ -716,39 +718,35 @@ func (s *Session) ChannelVoiceJoin(gID, cID string, mute, deaf bool) (voice *Voi s.Unlock() } - voice.Lock() + voice.Cond = sync.NewCond(&sync.Mutex{}) + voice.Cond.L.Lock() + voice.Status = VoiceConnectionStatusNew + voice.dead = make(chan struct{}) + voice.Dead = voice.dead voice.GuildID = gID - voice.ChannelID = cID - voice.deaf = deaf - voice.mute = mute voice.session = s - voice.Unlock() + voice.LogLevel = s.LogLevel + voice.Cond.L.Unlock() - err = s.ChannelVoiceJoinManual(gID, cID, mute, deaf) + err = s.VoiceStateUpdate(gID, cID, mute, deaf) if err != nil { return } - // doesn't exactly work perfect yet.. TODO - err = voice.waitUntilConnected() - if err != nil { - s.log(LogWarning, "error waiting for voice to connect, %s", err) - voice.Close() - return - } + err = voice.waitUntilStatus(ctx, VoiceConnectionStatusReady) return } -// ChannelVoiceJoinManual initiates a voice session to a voice channel, but does not complete it. +// VoiceStateUpdate initiates a voice session to a voice channel, but does not complete it. // // This should only be used when the VoiceServerUpdate will be intercepted and used elsewhere. // -// gID : Guild ID of the channel to join. -// cID : Channel ID of the channel to join, leave empty to disconnect. -// mute : If true, you will be set to muted upon joining. -// deaf : If true, you will be set to deafened upon joining. -func (s *Session) ChannelVoiceJoinManual(gID, cID string, mute, deaf bool) (err error) { +// gID : Guild ID of the channel to join. +// cID : Channel ID of the channel to join, leave empty to disconnect. +// mute : If true, you will be set to muted upon joining. +// deaf : If true, you will be set to deafened upon joining. +func (s *Session) VoiceStateUpdate(gID, cID string, mute, deaf bool) (err error) { s.log(LogInformational, "called") @@ -770,11 +768,6 @@ func (s *Session) ChannelVoiceJoinManual(gID, cID string, mute, deaf bool) (err // onVoiceStateUpdate handles Voice State Update events on the data websocket. func (s *Session) onVoiceStateUpdate(st *VoiceStateUpdate) { - // If we don't have a connection for the channel, don't bother - if st.ChannelID == "" { - return - } - // Check if we have a voice connection to update s.RLock() voice, exists := s.VoiceConnections[st.GuildID] @@ -789,11 +782,16 @@ func (s *Session) onVoiceStateUpdate(st *VoiceStateUpdate) { } // Store the SessionID for later use. - voice.Lock() - voice.UserID = st.UserID - voice.sessionID = st.SessionID - voice.ChannelID = st.ChannelID - voice.Unlock() + if st.ChannelID == "" { + voice.Kill() + } else { + voice.Cond.L.Lock() + defer voice.Cond.L.Unlock() + voice.sessionID = st.SessionID + voice.mute = st.Mute + voice.deaf = st.Deaf + voice.Cond.Broadcast() + } } // onVoiceServerUpdate handles the Voice Server Update data websocket event. @@ -801,12 +799,12 @@ func (s *Session) onVoiceStateUpdate(st *VoiceStateUpdate) { // This is also fired if the Guild's voice region changes while connected // to a voice channel. In that case, need to re-establish connection to // the new region endpoint. -func (s *Session) onVoiceServerUpdate(st *VoiceServerUpdate) { +func (s *Session) onVoiceServerUpdate(ev *VoiceServerUpdate) { s.log(LogInformational, "called") s.RLock() - voice, exists := s.VoiceConnections[st.GuildID] + voice, exists := s.VoiceConnections[ev.GuildID] s.RUnlock() // If no VoiceConnection exists, just skip this @@ -814,19 +812,8 @@ func (s *Session) onVoiceServerUpdate(st *VoiceServerUpdate) { return } - // If currently connected to voice ws/udp, then disconnect. - // Has no effect if not connected. - voice.Close() - - // Store values for later use - voice.Lock() - voice.token = st.Token - voice.endpoint = st.Endpoint - voice.GuildID = st.GuildID - voice.Unlock() - // Open a connection to the voice server - err := voice.open() + err := voice.onVoiceServerUpdate(ev) if err != nil { s.log(LogError, "onVoiceServerUpdate voice.open, %s", err) } @@ -891,25 +878,6 @@ func (s *Session) reconnect() { err = s.Open() if err == nil { s.log(LogInformational, "successfully reconnected to gateway") - - // I'm not sure if this is actually needed. - // if the gw reconnect works properly, voice should stay alive - // However, there seems to be cases where something "weird" - // happens. So we're doing this for now just to improve - // stability in those edge cases. - if s.ShouldReconnectVoiceOnSessionError { - s.RLock() - defer s.RUnlock() - for _, v := range s.VoiceConnections { - - s.log(LogInformational, "reconnecting voice connection to guild %s", v.GuildID) - go v.reconnect() - - // This is here just to prevent violently spamming the - // voice reconnects - time.Sleep(1 * time.Second) - } - } return } From 918af90ced373ff72ae3d729b83a5c18d6444880 Mon Sep 17 00:00:00 2001 From: ozraru Date: Sun, 19 Jan 2025 01:04:59 +0900 Subject: [PATCH 2/3] Remove use sync/atomic for compat of old Go --- voice.go | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/voice.go b/voice.go index edf14a59a..79602fc40 100644 --- a/voice.go +++ b/voice.go @@ -20,7 +20,6 @@ import ( "net" "strconv" "sync" - "sync/atomic" "time" "github.com/gorilla/websocket" @@ -88,7 +87,7 @@ type VoiceConnection struct { voiceSpeakingUpdateHandlers []VoiceSpeakingUpdateHandler - seqAck atomic.Int32 // for heartbeat and resume + seqAck int // for heartbeat and resume } // VoiceSpeakingUpdateHandler type provides a function definition for the @@ -217,7 +216,7 @@ func (v *VoiceConnection) failure(err error) { type voiceWebsocketMessage struct { Operation int `json:"op"` RawData json.RawMessage `json:"d"` - Sequence *int32 `json:"seq"` + Sequence *int `json:"seq"` } // A voiceOP2 stores the data for the voice operation 2 websocket event @@ -340,7 +339,9 @@ func (v *VoiceConnection) websocket(ctx context.Context, endpoint string, token <-timeout.C } - v.seqAck.Store(-1) + v.Cond.L.Lock() + v.seqAck = -1 + v.Cond.L.Unlock() for i := 0; i < 100; i++ { select { @@ -407,18 +408,21 @@ func (v *VoiceConnection) websocket(ctx context.Context, endpoint string, token ServerID string `json:"server_id"` SessionID string `json:"session_id"` Token string `json:"token"` - SeqAck int32 `json:"seq_ack"` + SeqAck int `json:"seq_ack"` } type voiceResumeOp struct { Op int `json:"op"` // Always 7 Data voiceResumeData `json:"d"` } + + v.Cond.L.Lock() data := voiceResumeOp{7, voiceResumeData{ ServerID: v.GuildID, SessionID: v.sessionID, Token: token, - SeqAck: v.seqAck.Load(), + SeqAck: v.seqAck, }} + v.Cond.L.Unlock() v.log(LogInformational, "resuming voice websocket") v.log(LogDebug, "resume packet, %#v", data) @@ -516,7 +520,10 @@ func (v *VoiceConnection) onEvent(ctx context.Context, binary bool, message []by } if e.Sequence != nil { - v.seqAck.Store(*e.Sequence) + + v.Cond.L.Lock() + v.seqAck = *e.Sequence + v.Cond.L.Unlock() } switch e.Operation { @@ -655,7 +662,7 @@ type voiceHeartbeatOp struct { type voiceHeartbeatData struct { T int64 `json:"t"` - SeqAck int32 `json:"seq_ack"` + SeqAck int `json:"seq_ack"` } // wsHeartbeat sends regular heartbeats to voice Discord so it knows the client @@ -672,7 +679,10 @@ func (v *VoiceConnection) wsHeartbeat(ctx context.Context, wsConn *websocket.Con defer ticker.Stop() for { v.log(LogDebug, "sending heartbeat packet") - err = wsConn.WriteJSON(voiceHeartbeatOp{3, voiceHeartbeatData{time.Now().Unix(), v.seqAck.Load()}}) + v.Cond.L.Lock() + seqAck := v.seqAck + v.Cond.L.Unlock() + err = wsConn.WriteJSON(voiceHeartbeatOp{3, voiceHeartbeatData{time.Now().Unix(), seqAck}}) if err != nil { v.log(LogError, "error sending heartbeat to voice endpoint, %s", err) return From 308a3a2695d0badd0f10899e1f8a045b614ce6ee Mon Sep 17 00:00:00 2001 From: ozraru Date: Sun, 19 Jan 2025 01:17:02 +0900 Subject: [PATCH 3/3] Edited comments to pass lint --- voice.go | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/voice.go b/voice.go index 79602fc40..a455994e6 100644 --- a/voice.go +++ b/voice.go @@ -30,14 +30,22 @@ import ( // Code related to both VoiceConnection Websocket and UDP connections. // ------------------------------------------------------------------------------------------------ +// VoiceConnectionStatus is status of VoiceConnection +// New -> Connecting <-> Ready +// any -> Dead type VoiceConnectionStatus int const ( - VoiceConnectionStatusInvalid VoiceConnectionStatus = iota // status not specified, bug? - VoiceConnectionStatusNew // initiating connection - VoiceConnectionStatusConnecting // connecting websocket and udp - VoiceConnectionStatusReady // ready to send/receive audio - VoiceConnectionStatusDead // already dead(error or disconnected normally) + // VoiceConnectionStatusInvalid means status not specified, maybe bug? + VoiceConnectionStatusInvalid VoiceConnectionStatus = iota + // VoiceConnectionStatusNew means initiating connection + VoiceConnectionStatusNew + // VoiceConnectionStatusConnecting means connecting websocket and udp (includes reconnecting) + VoiceConnectionStatusConnecting + // VoiceConnectionStatusReady means ready to send/receive audio + VoiceConnectionStatusReady + // VoiceConnectionStatusDead means already dead(error or disconnected normally) + VoiceConnectionStatusDead ) // A VoiceConnection struct holds all the data and functions related to a Discord Voice Connection. @@ -45,8 +53,6 @@ type VoiceConnection struct { Cond *sync.Cond // Status of this connection. Please don't change. - // New -> Connecting <-> Ready - // any -> Dead Status VoiceConnectionStatus // Closed if this VoiceConection status become Dead @@ -145,7 +151,7 @@ func (v *VoiceConnection) Disconnect(ctx context.Context) error { return v.waitUntilStatus(ctx, VoiceConnectionStatusDead) } -// Stop all goroutines related to this voice conection, remove self from Session, and set status to dead. +// Kill stop all goroutines related to this voice conection, remove self from Session, and set status to dead. // NOTE: unlock before calling this func (v *VoiceConnection) Kill() { @@ -295,8 +301,13 @@ func (v *VoiceConnection) onVoiceServerUpdate(ev *VoiceServerUpdate) (err error) return } +// ErrVoiceNoSessionID means timed out to receive voice Session ID var ErrVoiceNoSessionID = errors.New("did not receive voice Session ID in time") + +// ErrVoiceReconnectionLimit means reached a hard limit to reconnect var ErrVoiceReconnectionLimit = errors.New("reconnection limit reached") + +// ErrVoiceUnknownEncryptionMode means Discord requested encryption mode which is not supported var ErrVoiceUnknownEncryptionMode = errors.New("unknown encryption mode") // websocket open the voice websocket, handle reconnect, and listens on it for messages and passes them to the voice event handler. @@ -316,19 +327,19 @@ func (v *VoiceConnection) websocket(ctx context.Context, endpoint string, token v.wsCancel = cancel v.Cond.L.Unlock() - sessionIdDone := make(chan struct{}) + sessionIDDone := make(chan struct{}) go func() { v.Cond.L.Lock() defer v.Cond.L.Unlock() for v.sessionID == "" { v.Cond.Wait() } - close(sessionIdDone) + close(sessionIDDone) }() timeout := time.NewTimer(1 * time.Second) select { - case <-sessionIdDone: + case <-sessionIDDone: case <-timeout.C: v.failure(ErrVoiceNoSessionID) return