diff --git a/chat-android/src/main/java/com/ably/chat/Room.kt b/chat-android/src/main/java/com/ably/chat/Room.kt index 28ac2cd2..2cd61f25 100644 --- a/chat-android/src/main/java/com/ably/chat/Room.kt +++ b/chat-android/src/main/java/com/ably/chat/Room.kt @@ -213,7 +213,7 @@ internal class DefaultRoom( lifecycleManager = RoomLifecycleManager(roomScope, statusLifecycle, roomFeatures, this.logger) - this.logger.debug("Initialized with features: ${roomFeatures.joinToString { it.featureName }}") + this.logger.debug("Initialized with features: ${roomFeatures.map { it.featureName }.joinWithBrackets}") } override fun onStatusChange(listener: RoomLifecycle.Listener): Subscription = diff --git a/chat-android/src/main/java/com/ably/chat/RoomLifecycleManager.kt b/chat-android/src/main/java/com/ably/chat/RoomLifecycleManager.kt index 4da631bc..a8fa775f 100644 --- a/chat-android/src/main/java/com/ably/chat/RoomLifecycleManager.kt +++ b/chat-android/src/main/java/com/ably/chat/RoomLifecycleManager.kt @@ -117,8 +117,12 @@ internal class RoomLifecycleManager( private val roomScope: CoroutineScope, private val statusLifecycle: DefaultRoomLifecycle, private val contributors: List, - private val logger: Logger, + roomLogger: Logger, ) { + private val logger = roomLogger.withContext( + "RoomLifecycleManager", + dynamicContext = mapOf("scope" to { Thread.currentThread().name }), + ) /** * AtomicCoroutineScope makes sure all operations are atomic and run with given priority. @@ -183,63 +187,84 @@ internal class RoomLifecycleManager( * @returns Returns when the room is attached, or the room enters a failed state. * Spec: CHA-RL5 */ - @Suppress("CognitiveComplexMethod", "ThrowsCount") + @Suppress("CognitiveComplexMethod", "ThrowsCount", "LongMethod") private suspend fun doRetry(contributor: ContributesToRoomLifecycle) { + logger.trace("doRetry();") + // Handle the channel wind-down. + logger.debug("doRetry(); winding down channels except for feature: ${contributor.featureName}") // CHA-RL5a - Handle the channel wind-down for other channels var result = kotlin.runCatching { doChannelWindDown(contributor) } while (result.isFailure) { // CHA-RL5c - If in doing the wind down, we've entered failed state, then it's game over anyway if (this.statusLifecycle.status === RoomStatus.Failed) { + logger.error("doRetry(); channels wind down failed, RoomStatus is FAILED", result.exceptionOrNull()) throw result.exceptionOrNull() ?: IllegalStateException("room is in a failed state") } + logger.warn("doRetry(); channels wind down failed, retrying in $retryDurationInMs ms", result.exceptionOrNull()) delay(retryDurationInMs) result = kotlin.runCatching { doChannelWindDown(contributor) } } + logger.debug("doRetry(); channels wind down successfully completed") // A helper that allows us to retry the attach operation val doAttachWithRetry: suspend () -> Unit = { + logger.trace("doRetry.doAttachWithRetry();") coroutineScope { + logger.debug("doRetry.doAttachWithRetry(); attempting to attach all feature channels") statusLifecycle.setStatus(RoomStatus.Attaching) val attachmentResult = doAttach() // CHA-RL5c - If we're in failed, then we should wind down all the channels, eventually - but we're done here if (attachmentResult.status === RoomStatus.Failed) { + logger.error("doRetry.doAttachWithRetry(); failed to attach channels, RoomStatus is Failed", attachmentResult.exception) atomicCoroutineScope.async(LifecycleOperationPrecedence.Internal.priority) { runDownChannelsOnFailedAttach() } + logger.warn("doRetry.doAttachWithRetry(); started winding down all channels internally") return@coroutineScope } // If we're in suspended, then we should wait for the channel to reattach, but wait for it to do so if (attachmentResult.status === RoomStatus.Suspended) { - val failedFeature = attachmentResult.failedFeature - ?: throw lifeCycleException( - "no failed feature in doRetry", - ErrorCode.RoomLifecycleError, - ) + logger.warn( + "doRetry.doAttachWithRetry(); failed to attach channels, RoomStatus is Suspended", + attachmentResult.exception, + ) + val failedFeature = attachmentResult.failedFeature ?: throw lifeCycleException( + "no failed feature in doRetry", + ErrorCode.RoomLifecycleError, + ) + logger.warn("doRetry.doAttachWithRetry(); Retrying attach due to suspended feature: ${failedFeature.featureName}") // No need to catch errors, rather they should propagate to caller method return@coroutineScope doRetry(failedFeature) } + // We attached, huzzah! + logger.debug("doRetry.doAttachWithRetry(); Room ATTACH success, RoomStatus: ${statusLifecycle.status}") } } // If given suspended contributor channel has reattached, then we can retry the attach if (contributor.channel.state == ChannelState.attached) { + logger.debug("doRetry(); feature: ${contributor.featureName} reattached, retrying attach for others") return doAttachWithRetry() } // CHA-RL5d - Otherwise, wait for our suspended contributor channel to re-attach and try again try { + logger.debug("doRetry(); waiting to channel ATTACHED for feature: ${contributor.featureName}") listenToChannelAttachOrFailure(contributor) + logger.debug("doRetry(); waiting success, feature: ${contributor.featureName} ATTACHED, retrying attach for others") delay(retryDurationInMs) // Let other channels get into ATTACHING state - // Attach successful - return doAttachWithRetry() - } catch (ex: AblyException) { + } catch (ex: Throwable) { // CHA-RL5c - Channel attach failed - statusLifecycle.setStatus(RoomStatus.Failed, ex.errorInfo) + logger.error("doRetry(); waiting failed for channel attach, feature: ${contributor.featureName}", ex) + // No need for channels wind down, since it's taken care at the start of doRetry() + statusLifecycle.setStatus(RoomStatus.Failed, (ex as? AblyException)?.errorInfo) throw ex } + // Attach successful, try attach for other channels + return doAttachWithRetry() } /** @@ -292,59 +317,60 @@ internal class RoomLifecycleManager( */ @Suppress("ThrowsCount") internal suspend fun attach() { + logger.trace("attach();") val deferredAttach = atomicCoroutineScope.async(LifecycleOperationPrecedence.AttachOrDetach.priority) { // CHA-RL1d if (statusLifecycle.status == RoomStatus.Attached) { // CHA-RL1a + logger.debug("attach(); room is already attached") return@async } if (statusLifecycle.status == RoomStatus.Releasing) { // CHA-RL1b - throw lifeCycleException( - "unable to attach room; room is releasing", - ErrorCode.RoomIsReleasing, - ) + logger.error("attach(); attach failed, room is in releasing state") + throw lifeCycleException("unable to attach room; room is releasing", ErrorCode.RoomIsReleasing) } if (statusLifecycle.status == RoomStatus.Released) { // CHA-RL1c - throw lifeCycleException( - "unable to attach room; room is released", - ErrorCode.RoomIsReleased, - ) + logger.error("attach(); attach failed, room is in released state") + throw lifeCycleException("unable to attach room; room is released", ErrorCode.RoomIsReleased) } // At this point, we force the room status to be attaching clearAllTransientDetachTimeouts() operationInProgress = true statusLifecycle.setStatus(RoomStatus.Attaching) // CHA-RL1e + logger.debug("attach(); transitioned room to ATTACHING state") val attachResult = doAttach() // CHA-RL1h4 - If we're in a failed state, then we should wind down all the channels, eventually if (attachResult.status === RoomStatus.Failed) { + logger.error("attach(); failed to attach channels, RoomStatus is Failed", attachResult.exception) // CHA-RL1h5 - detach all remaining channels atomicCoroutineScope.async(LifecycleOperationPrecedence.Internal.priority) { runDownChannelsOnFailedAttach() } + logger.warn("attach(); started winding down all channels internally") throw attachResult.exception // CHA-RL1h1 } // CHA-RL1h1, CHA-RL1h2 - If we're in suspended, then this attach should fail, but we'll retry after a short delay async if (attachResult.status === RoomStatus.Suspended) { + logger.warn("attach(); failed to attach channels, RoomStatus is Suspended", attachResult.exception) if (attachResult.failedFeature == null) { - throw lifeCycleException( - "no failed feature in attach", - ErrorCode.RoomLifecycleError, - ) + throw lifeCycleException("no failed feature in attach", ErrorCode.RoomLifecycleError) } attachResult.failedFeature?.let { // CHA-RL1h3 - Enter recovery for failed room feature/contributor atomicCoroutineScope.async(LifecycleOperationPrecedence.Internal.priority) { doRetry(it) } + logger.warn("attach(); retrying attach due to suspended feature: ${it.featureName}") } throw attachResult.exception // CHA-RL1h1 } // We attached, finally! + logger.debug("attach(): room attached successfully") } deferredAttach.await() @@ -357,12 +383,17 @@ internal class RoomLifecycleManager( * Spec: CHA-RL1f, CHA-RL1g, CHA-RL1h */ private suspend fun doAttach(): RoomAttachmentResult { + logger.trace("doAttach();") val attachResult = DefaultRoomAttachmentResult() + logger.debug("doAttach(); trying to attach all features: ${contributors.map { it.featureName }.joinWithBrackets}") for (feature in contributors) { // CHA-RL1f - attach each feature sequentially try { + logger.debug("doAttach(); attaching feature: ${feature.featureName}") feature.channel.attachCoroutine() firstAttachesCompleted[feature] = true + logger.debug("doAttach(); attached feature: ${feature.featureName}") } catch (ex: Throwable) { // CHA-RL1h - handle channel attach failure + logger.error("doAttach(); attach failed for feature: ${feature.featureName}", ex) attachResult.throwable = ex attachResult.failedFeatureField = feature attachResult.errorField = lifeCycleErrorInfo( @@ -387,17 +418,21 @@ internal class RoomLifecycleManager( // Regardless of whether we're suspended or failed, run-down the other channels // The wind-down procedure will take Precedence over any user-driven actions + logger.warn("doAttach(); transitioning room to ${attachResult.status.name} state") statusLifecycle.setStatus(attachResult) return attachResult } } // CHA-RL1g, We successfully attached all the channels - set our status to attached, start listening changes in channel status + logger.debug("doAttach(); attach success for all features: ${contributors.map { it.featureName }.joinWithBrackets}") this.statusLifecycle.setStatus(attachResult) + logger.debug("doAttach(); transitioned room to ATTACHED state") this.operationInProgress = false // Iterate the pending discontinuity events and trigger them for ((contributor, error) in pendingDiscontinuityEvents) { + logger.warn("doAttach(); emitting discontinuity event for feature: ${contributor.featureName}") contributor.discontinuityDetected(error) } pendingDiscontinuityEvents.clear() @@ -407,9 +442,11 @@ internal class RoomLifecycleManager( /** * If we've failed to attach, then we're in the failed state and all that is left to do is to detach all the channels. * Spec: CHA-RL1h5, CHA-RL1h6 - * @returns Returns only when all channels are detached. Doesn't throw exception. + * @returns Returns only when all channels are either detached or failed. Doesn't throw exception. */ private suspend fun runDownChannelsOnFailedAttach() { + logger.trace("runDownChannelsOnFailedAttach();") + logger.debug("runDownChannelsOnFailedAttach(); attempting to detach all channels") // At this point, we have control over the channel lifecycle, so we can hold onto it until things are resolved // Keep trying to detach the channels until they're all detached. var channelWindDown = kotlin.runCatching { doChannelWindDown() } @@ -419,6 +456,7 @@ internal class RoomLifecycleManager( delay(retryDurationInMs) channelWindDown = kotlin.runCatching { doChannelWindDown() } } + logger.debug("runDownChannelsOnFailedAttach(); success, all channels are either detached or failed") } /** @@ -431,11 +469,13 @@ internal class RoomLifecycleManager( */ @Suppress("CognitiveComplexMethod", "ComplexCondition") private suspend fun doChannelWindDown(except: ContributesToRoomLifecycle? = null) = coroutineScope { + logger.trace("doChannelWindDown();") contributors.map { contributor: ContributesToRoomLifecycle -> async { // CHA-RL5a1 - If its the contributor we want to wait for a conclusion on, then we should not detach it // Unless we're in a failed state, in which case we should detach it if (contributor.channel === except?.channel && statusLifecycle.status !== RoomStatus.Failed) { + logger.debug("doChannelWindDown(); ignoring feature: ${contributor.featureName} as per except param") return@async } // If the room's already in the failed state, or it's releasing, we should not detach a failed channel @@ -446,12 +486,15 @@ internal class RoomLifecycleManager( ) && contributor.channel.state === ChannelState.failed ) { + logger.debug("doChannelWindDown(); ignoring feature: ${contributor.featureName} since it's already in failed state") return@async } try { + logger.debug("doChannelWindDown(); detaching feature: ${contributor.featureName}") contributor.channel.detachCoroutine() } catch (throwable: Throwable) { + logger.warn("doChannelWindDown(); failed to detach feature: ${contributor.featureName}", throwable) // CHA-RL2h2 - If the contributor is in a failed state and we're not ignoring failed states, we should fail the room if ( contributor.channel.state === ChannelState.failed && @@ -482,39 +525,36 @@ internal class RoomLifecycleManager( */ @Suppress("ThrowsCount") internal suspend fun detach() { + logger.trace("detach();") val deferredDetach = atomicCoroutineScope.async(LifecycleOperationPrecedence.AttachOrDetach.priority) { // CHA-RL2i // CHA-RL2a - If we're already detached, this is a no-op if (statusLifecycle.status === RoomStatus.Detached) { + logger.debug("detach(); room is already detached") return@async } // CHA-RL2c - If the room is released, we can't detach if (statusLifecycle.status === RoomStatus.Released) { - throw lifeCycleException( - "unable to detach room; room is released", - ErrorCode.RoomIsReleased, - ) + logger.error("detach(); detach failed, room is in released state") + throw lifeCycleException("unable to detach room; room is released", ErrorCode.RoomIsReleased) } // CHA-RL2b - If the room is releasing, we can't detach if (statusLifecycle.status === RoomStatus.Releasing) { - throw lifeCycleException( - "unable to detach room; room is releasing", - ErrorCode.RoomIsReleasing, - ) + logger.error("detach(); detach failed, room is in releasing state") + throw lifeCycleException("unable to detach room; room is releasing", ErrorCode.RoomIsReleasing) } // CHA-RL2d - If we're in failed, we should not attempt to detach if (statusLifecycle.status === RoomStatus.Failed) { - throw lifeCycleException( - "unable to detach room; room has failed", - ErrorCode.RoomInFailedState, - ) + logger.error("detach(); detach failed, room is in failed state") + throw lifeCycleException("unable to detach room; room has failed", ErrorCode.RoomInFailedState) } // CHA-RL2e - We force the room status to be detaching operationInProgress = true clearAllTransientDetachTimeouts() statusLifecycle.setStatus(RoomStatus.Detaching) + logger.debug("detach(); transitioned room to DETACHING state") // CHA-RL2f - We now perform an all-channel wind down. // We keep trying until we reach a suitable conclusion. @@ -524,16 +564,18 @@ internal class RoomLifecycleManager( } /** - * Perform a detach. - * If detaching a channel fails, we should retry until every channel is either in the detached state, or in the failed state. + * Performs detach until every channel gets into either detached or failed state. * Spec: CHA-RL2f */ private suspend fun doDetach() { + logger.trace("doDetach();") + logger.debug("doDetach(); detaching all features: ${contributors.map { it.featureName }.joinWithBrackets}") var channelWindDown = kotlin.runCatching { doChannelWindDown() } var firstContributorFailedError: AblyException? = null while (channelWindDown.isFailure) { // CHA-RL2h val err = channelWindDown.exceptionOrNull() if (err is AblyException && err.errorInfo?.code != -1 && firstContributorFailedError == null) { + logger.error("doDetach(); detach failed for one of the feature, detaching others", err) firstContributorFailedError = err // CHA-RL2h1- First failed contributor error is captured } delay(retryDurationInMs) @@ -542,16 +584,14 @@ internal class RoomLifecycleManager( // CHA-RL2g - If we aren't in the failed state, then we're detached if (statusLifecycle.status !== RoomStatus.Failed) { + logger.debug("doDetach(); successfully detached all features: ${contributors.map { it.featureName }.joinWithBrackets}") statusLifecycle.setStatus(RoomStatus.Detached) + logger.debug("doDetach(); transitioned room to DETACHED state") return } // CHA-RL2h1 - If we're in the failed state, then we need to throw the error - throw firstContributorFailedError - ?: lifeCycleException( - "unknown error in doDetach", - ErrorCode.RoomLifecycleError, - ) + throw firstContributorFailedError ?: lifeCycleException("unknown error in doDetach", ErrorCode.RoomLifecycleError) } /** @@ -564,9 +604,11 @@ internal class RoomLifecycleManager( * Spec: CHA-RL3 */ internal suspend fun release() { + logger.trace("release();") val deferredRelease = atomicCoroutineScope.async(LifecycleOperationPrecedence.Release.priority) { // CHA-RL3k // CHA-RL3a - If we're already released, this is a no-op if (statusLifecycle.status === RoomStatus.Released) { + logger.debug("release(); room is already released") return@async } @@ -574,6 +616,7 @@ internal class RoomLifecycleManager( if (statusLifecycle.status === RoomStatus.Detached || statusLifecycle.status === RoomStatus.Initialized ) { + logger.debug("release(); room is in ${statusLifecycle.status}, so transitioning to released state") statusLifecycle.setStatus(RoomStatus.Released) return@async } @@ -582,6 +625,7 @@ internal class RoomLifecycleManager( clearAllTransientDetachTimeouts() operationInProgress = true statusLifecycle.setStatus(RoomStatus.Releasing) + logger.debug("release(); transitioned room to RELEASING state") // CHA-RL3f - Do the release until it completes return@async releaseChannels() @@ -595,12 +639,15 @@ internal class RoomLifecycleManager( * Spec: CHA-RL3f, CHA-RL3d */ private suspend fun releaseChannels() { + logger.trace("releaseChannels();") + logger.debug("releaseChannels(); releasing all features: ${contributors.map { it.featureName }.joinWithBrackets}") var contributorsReleased = kotlin.runCatching { doRelease() } while (contributorsReleased.isFailure) { // Wait a short period and then try again delay(retryDurationInMs) contributorsReleased = kotlin.runCatching { doRelease() } } + logger.debug("releaseChannels(); success, released all features: ${contributors.map { it.featureName }.joinWithBrackets}") } /** @@ -610,29 +657,37 @@ internal class RoomLifecycleManager( */ @Suppress("RethrowCaughtException") private suspend fun doRelease() = coroutineScope { + logger.trace("doRelease();") contributors.map { contributor: ContributesToRoomLifecycle -> async { // CHA-RL3e - Failed channels, we can ignore if (contributor.channel.state == ChannelState.failed) { + logger.debug("doRelease(); ignoring feature: ${contributor.featureName} since it's in failed state") return@async } // Detached channels, we can ignore if (contributor.channel.state == ChannelState.detached) { + logger.debug("doRelease(); ignoring feature: ${contributor.featureName} since it's already detached") return@async } try { + logger.debug("doRelease(); detaching feature: ${contributor.featureName}") contributor.channel.detachCoroutine() + logger.debug("doRelease(); successfully detached feature: ${contributor.featureName}") } catch (ex: Throwable) { - // TODO - log error here before rethrowing + logger.warn("doRelease(); failed to detach feature: ${contributor.featureName}", ex) throw ex } } }.awaitAll() // CHA-RL3h - underlying Realtime Channels are released from the core SDK prevent leakage + logger.debug("doRelease(); releasing underlying channels from core SDK to prevent leakage") contributors.forEach { it.release() } + logger.debug("doRelease(); underlying channels released from core SDK") statusLifecycle.setStatus(RoomStatus.Released) // CHA-RL3g + logger.debug("doRelease(); transitioned room to RELEASED state") } } diff --git a/chat-android/src/main/java/com/ably/chat/Utils.kt b/chat-android/src/main/java/com/ably/chat/Utils.kt index d9a02c1b..1ff75324 100644 --- a/chat-android/src/main/java/com/ably/chat/Utils.kt +++ b/chat-android/src/main/java/com/ably/chat/Utils.kt @@ -133,6 +133,8 @@ val Channel.errorMessage: String ", ${reason.message}" } +val List.joinWithBrackets: String get() = joinToString(prefix = "[", postfix = "]") { it } + @Suppress("FunctionName") fun ChatChannelOptions(init: (ChannelOptions.() -> Unit)? = null): ChannelOptions { val options = ChannelOptions()