diff --git a/OneSignalSDK/onesignal/core/src/main/java/com/onesignal/internal/OneSignalImp.kt b/OneSignalSDK/onesignal/core/src/main/java/com/onesignal/internal/OneSignalImp.kt index 29fca0b41..3aa19004c 100644 --- a/OneSignalSDK/onesignal/core/src/main/java/com/onesignal/internal/OneSignalImp.kt +++ b/OneSignalSDK/onesignal/core/src/main/java/com/onesignal/internal/OneSignalImp.kt @@ -50,7 +50,11 @@ internal class OneSignalImp( private val ioDispatcher: CoroutineDispatcher = Dispatchers.IO, ) : IOneSignal, IServiceProvider { - private val suspendCompletion = CompletableDeferred() + // Reset every time the synchronized(initLock) block flips state to IN_PROGRESS so that + // a retry-after-FAILED gets a fresh latch instead of an already-completed one. Mutated only + // under initLock; reads outside that lock must local-capture before suspending on it. + @Volatile + private var suspendCompletion = CompletableDeferred() @Volatile private var initState: InitState = InitState.NOT_STARTED @@ -305,6 +309,9 @@ internal class OneSignalImp( } initState = InitState.IN_PROGRESS + // Fresh latch for this init attempt -- prevents retry-after-FAILED waiters from + // observing the prior init's already-completed deferred. + suspendCompletion = CompletableDeferred() } // FeatureManager depends on ConfigModelStore/PreferencesService which requires appContext. @@ -333,48 +340,69 @@ internal class OneSignalImp( return initWithContextSuspend(context, null) } + // ReturnCount: 4 returns vs detekt's limit of 2. The function intentionally has + // multiple early-return failure paths (user-locked, missing appId, exception). The + // existing detekt baseline entry was for the unannotated signature; once we added + // @Suppress(TooGenericExceptionCaught) the baseline ID no longer matched, so we + // suppress ReturnCount inline rather than churning the baseline. + @Suppress("ReturnCount", "TooGenericExceptionCaught") private fun internalInit( context: Context, appId: String?, ): Boolean { - // Check whether current Android user is accessible. - // Return early if it is inaccessible, as we are unable to complete initialization without access - // to device storage like SharedPreferences. - if (!AndroidUtils.isAndroidUserUnlocked(context)) { - Logging.warn("initWithContext called when device storage is locked, no user data is accessible!") - initState = InitState.FAILED - notifyInitComplete() - return false - } + try { + // Check whether current Android user is accessible. + // Return early if it is inaccessible, as we are unable to complete initialization without access + // to device storage like SharedPreferences. + if (!AndroidUtils.isAndroidUserUnlocked(context)) { + Logging.warn("initWithContext called when device storage is locked, no user data is accessible!") + initState = InitState.FAILED + notifyInitComplete() + return false + } + + initEssentials(context) - initEssentials(context) + val startupService = bootstrapServices() - val startupService = bootstrapServices() + // Now that the IoC container is ready, subscribe the Otel lifecycle + // manager to config store events so it reacts to fresh remote config. + otelManager?.subscribeToConfigStore(services.getService()) - // Now that the IoC container is ready, subscribe the Otel lifecycle - // manager to config store events so it reacts to fresh remote config. - otelManager?.subscribeToConfigStore(services.getService()) + val result = resolveAppId(appId, configModel, preferencesService) + if (result.failed) { + val message = "suspendInitInternal: no appId provided or found in local storage. Please pass a valid appId to initWithContext()." + val exception = IllegalStateException(message) + // attach the real crash cause to the init failure exception that will be throw shortly after + initFailureException?.addSuppressed(exception) + Logging.warn(message) + initState = InitState.FAILED + notifyInitComplete() + return false + } + configModel.appId = result.appId!! // safe because failed is false + val forceCreateUser = result.forceCreateUser - val result = resolveAppId(appId, configModel, preferencesService) - if (result.failed) { - val message = "suspendInitInternal: no appId provided or found in local storage. Please pass a valid appId to initWithContext()." - val exception = IllegalStateException(message) - // attach the real crash cause to the init failure exception that will be throw shortly after - initFailureException?.addSuppressed(exception) - Logging.warn(message) + updateConfig() + userSwitcher.initUser(forceCreateUser) + startupService.scheduleStart() + initState = InitState.SUCCESS + notifyInitComplete() + return true + } catch (t: Throwable) { + // Any unchecked throw from initEssentials/bootstrapServices/subscribeToConfigStore/ + // updateConfig/userSwitcher.initUser/startupService.scheduleStart would otherwise leave + // initState at IN_PROGRESS forever and `suspendCompletion` uncompleted -- causing + // re-entrant suspend callers (e.g. SyncJobService) to hang indefinitely on `await()`. + // Always reach a terminal state and signal waiters before propagating. + Logging.error("OneSignal: internalInit threw unexpectedly; marking init FAILED", t) + initFailureException = (initFailureException ?: IllegalStateException("OneSignal initWithContext failed.")).apply { + addSuppressed(t) + } initState = InitState.FAILED notifyInitComplete() return false } - configModel.appId = result.appId!! // safe because failed is false - val forceCreateUser = result.forceCreateUser - - updateConfig() - userSwitcher.initUser(forceCreateUser) - startupService.scheduleStart() - initState = InitState.SUCCESS - notifyInitComplete() - return true } override fun login( @@ -474,7 +502,16 @@ internal class OneSignalImp( * @param operationName Optional operation name to include in error messages (e.g., "login", "logout") */ private suspend fun waitUntilInitInternal(operationName: String? = null) { - when (initState) { + // Local-capture state + deferred under initLock so we await on the same generation + // we observed (a concurrent retry-after-FAILED can replace `suspendCompletion`). + val observedState: InitState + val completionToAwait: CompletableDeferred? + synchronized(initLock) { + observedState = initState + completionToAwait = if (observedState == InitState.IN_PROGRESS) suspendCompletion else null + } + + when (observedState) { InitState.NOT_STARTED -> { val message = if (operationName != null) { "Must call 'initWithContext' before '$operationName'" @@ -496,7 +533,7 @@ internal class OneSignalImp( // This is intentional per PR #2412: "ANR is the lesser of two evils and the app can recover, // where an uncaught throw it can not." To avoid ANRs, call SDK methods from background threads // or use the suspend API from coroutines. - suspendCompletion.await() + completionToAwait!!.await() // Log how long initialization took val elapsed = System.currentTimeMillis() - startTime @@ -632,19 +669,39 @@ internal class OneSignalImp( ): Boolean { Logging.log(LogLevel.DEBUG, "initWithContext(context: $context, appId: $appId)") - // This ensures the stack trace points to the caller that triggered init, not the async worker thread. - initFailureException = IllegalStateException("OneSignal initWithContext failed.") - // Use IO dispatcher for initialization to prevent ANRs and optimize for I/O operations return withContext(runtimeIoDispatcher) { - // do not do this again if already initialized or init is in progress + val shouldRunInit: Boolean + // Local-capture under the lock so that even if a concurrent retry-after-FAILED + // resets `suspendCompletion`, we await on the same generation we observed. + val completionToAwait: CompletableDeferred? synchronized(initLock) { if (initState.isSDKAccessible()) { - Logging.log(LogLevel.DEBUG, "initWithContext: SDK already initialized or in progress") - return@withContext true + shouldRunInit = false + completionToAwait = suspendCompletion + } else { + shouldRunInit = true + completionToAwait = null + initState = InitState.IN_PROGRESS + // Fresh latch for this init attempt. + suspendCompletion = CompletableDeferred() + // Only the call that actually starts init owns the failure-attribution exception. + // Re-entrant callers must not overwrite it -- otherwise the failure stack trace + // would point at the SyncJobService coroutine instead of the original initiator. + initFailureException = IllegalStateException("OneSignal initWithContext failed.") } + } - initState = InitState.IN_PROGRESS + if (!shouldRunInit) { + // Another caller has already started (or completed) init. Honor this method's + // contract by suspending until initialization is *fully* completed -- not just + // kicked off. This closes a race where re-entrant suspend callers (e.g. the + // SyncJobService entry point under SDK_BACKGROUND_THREADING) would otherwise + // proceed to use IBackgroundService implementations like SessionService whose + // bootstrap() had not yet run, NPE'ing on still-null model fields. + Logging.log(LogLevel.DEBUG, "initWithContext: init already in progress or completed, awaiting completion") + completionToAwait!!.await() + return@withContext initState == InitState.SUCCESS } val result = internalInit(context, appId) diff --git a/OneSignalSDK/onesignal/core/src/main/java/com/onesignal/session/internal/session/impl/SessionService.kt b/OneSignalSDK/onesignal/core/src/main/java/com/onesignal/session/internal/session/impl/SessionService.kt index 8c3613385..1997d486b 100644 --- a/OneSignalSDK/onesignal/core/src/main/java/com/onesignal/session/internal/session/impl/SessionService.kt +++ b/OneSignalSDK/onesignal/core/src/main/java/com/onesignal/session/internal/session/impl/SessionService.kt @@ -35,13 +35,24 @@ internal class SessionService( private val _time: ITime, ) : ISessionService, IBootstrapService, IStartableService, IBackgroundService, IApplicationLifecycleHandler { override val startTime: Long - get() = session!!.startTime + // Pre-bootstrap default returns "now" so call sites computing `_time.currentTimeMillis - startTime` + // (e.g. IAM session-duration / SESSION_TIME triggers) see ~0ms elapsed instead of ~58 years + // (which is what `0L` / Jan 1970 would produce). + get() = session?.startTime ?: _time.currentTimeMillis /** * Run in the background when the session would time out, only if a session is currently active. + * + * Returns null if [bootstrap] has not yet run -- callers that invoke us before bootstrap + * (possible under SDK_BACKGROUND_THREADING when init is still in flight) should treat this + * as "no schedule needed" rather than crashing on a null-deref. */ override val scheduleBackgroundRunIn: Long? - get() = if (session!!.isValid) config!!.sessionFocusTimeout else null + get() { + val session = this.session ?: return null + val config = this.config ?: return null + return if (session.isValid) config.sessionFocusTimeout else null + } private val sessionLifeCycleNotifier: EventProducer = EventProducer() private var session: SessionModel? = null @@ -69,13 +80,17 @@ internal class SessionService( } private fun endSession() { - if (!session!!.isValid) return - val activeDuration = session!!.activeDuration + // Defensive: if bootstrap() has not run yet, there is no session state to end. + // This can happen under SDK_BACKGROUND_THREADING when SyncJobService races with + // an in-flight initWithContext that has not yet reached bootstrapServices(). + val session = this.session ?: return + if (!session.isValid) return + val activeDuration = session.activeDuration Logging.debug("SessionService.backgroundRun: Session ended. activeDuration: $activeDuration") - session!!.isValid = false + session.isValid = false sessionLifeCycleNotifier.fire { it.onSessionEnded(activeDuration) } - session!!.activeDuration = 0L + session.activeDuration = 0L } /** @@ -90,34 +105,45 @@ internal class SessionService( override fun onFocus(firedOnSubscribe: Boolean) { Logging.log(LogLevel.DEBUG, "SessionService.onFocus() - fired from start: $firedOnSubscribe") + val session = this.session + if (session == null) { + Logging.warn("SessionService.onFocus called before bootstrap; ignoring.") + return + } + // Treat app cold starts as a new session, we attempt to end any previous session to do this. if (!hasFocused) { hasFocused = true endSession() } - if (!session!!.isValid) { + if (!session.isValid) { // As the old session was made inactive, we need to create a new session shouldFireOnSubscribe = firedOnSubscribe - session!!.sessionId = UUID.randomUUID().toString() - session!!.startTime = _time.currentTimeMillis - session!!.focusTime = session!!.startTime - session!!.isValid = true - Logging.debug("SessionService: New session started at ${session!!.startTime}") + session.sessionId = UUID.randomUUID().toString() + session.startTime = _time.currentTimeMillis + session.focusTime = session.startTime + session.isValid = true + Logging.debug("SessionService: New session started at ${session.startTime}") sessionLifeCycleNotifier.fire { it.onSessionStarted() } } else { // existing session: just remember the focus time so we can calculate the active time // when onUnfocused is called. - session!!.focusTime = _time.currentTimeMillis + session.focusTime = _time.currentTimeMillis sessionLifeCycleNotifier.fire { it.onSessionActive() } } } override fun onUnfocused() { + val session = this.session + if (session == null) { + Logging.warn("SessionService.onUnfocused called before bootstrap; ignoring.") + return + } // capture the amount of time the app was focused - val dt = _time.currentTimeMillis - session!!.focusTime - session!!.activeDuration += dt - Logging.log(LogLevel.DEBUG, "SessionService.onUnfocused adding time $dt for total: ${session!!.activeDuration}") + val dt = _time.currentTimeMillis - session.focusTime + session.activeDuration += dt + Logging.log(LogLevel.DEBUG, "SessionService.onUnfocused adding time $dt for total: ${session.activeDuration}") } override fun subscribe(handler: ISessionLifecycleHandler) { diff --git a/OneSignalSDK/onesignal/core/src/test/java/com/onesignal/core/internal/application/SDKInitTests.kt b/OneSignalSDK/onesignal/core/src/test/java/com/onesignal/core/internal/application/SDKInitTests.kt index 724784856..22397110b 100644 --- a/OneSignalSDK/onesignal/core/src/test/java/com/onesignal/core/internal/application/SDKInitTests.kt +++ b/OneSignalSDK/onesignal/core/src/test/java/com/onesignal/core/internal/application/SDKInitTests.kt @@ -18,8 +18,12 @@ import io.kotest.matchers.shouldNotBe import io.mockk.every import io.mockk.mockkObject import io.mockk.unmockkObject +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.async import kotlinx.coroutines.runBlocking +import kotlinx.coroutines.withTimeoutOrNull import java.util.concurrent.CountDownLatch +import java.util.concurrent.TimeUnit @RobolectricTest class SDKInitTests : FunSpec({ @@ -488,6 +492,189 @@ class SDKInitTests : FunSpec({ // Should throw immediately because isInitialized is false exception.message shouldBe "Must call 'initWithContext' before 'logout'" } + + // Regression test for the SessionService NPE under SDK_BACKGROUND_THREADING. + // + // Background: when init is in flight, internalInit() has not yet called bootstrapServices(), + // which means IBootstrapService implementations like SessionService have null model fields. + // SyncJobService re-enters via initWithContextSuspend(context, null) and then immediately + // calls runBackgroundServices(); if initWithContextSuspend returns true while init is still + // in progress (the old behavior), the loop hits SessionService.endSession() → NPE. + // + // The fix: initWithContextSuspend must suspend until init is *fully* completed, not just + // kicked off. This test verifies that contract by checking that os.isInitialized is true + // at the moment the re-entrant suspend call returns. + test("initWithContextSuspend with in-flight init waits for completion before returning") { + val context = getApplicationContext() + val trigger = CountDownLatch(1) + // started signals when the first caller has entered internalInit() and reached the + // blocking SharedPreferences access -- by which point initState is IN_PROGRESS. + val started = CountDownLatch(1) + val blockingCtx = object : ContextWrapper(context) { + override fun getSharedPreferences(name: String, mode: Int): SharedPreferences { + started.countDown() + // Bounded await -- if the test logic ever fails to release `trigger`, the + // IO worker exits cleanly instead of blocking forever and deadlocking the suite. + trigger.await(30, TimeUnit.SECONDS) + return super.getSharedPreferences(name, mode) + } + } + val os = OneSignalImp() + + runBlocking { + // First caller: stalls inside internalInit() once it tries to read prefs. + val firstInit = async(Dispatchers.IO) { + os.initWithContextSuspend(blockingCtx, "appId") + } + + // Deterministically wait until firstInit has entered the prefs-blocking region. + // After this point, initState is guaranteed to be IN_PROGRESS. + started.await(5, TimeUnit.SECONDS) shouldBe true + os.isInitialized shouldBe false + firstInit.isCompleted shouldBe false + + // Second caller: under the OLD behavior the synchronized(initLock) block sees + // state == IN_PROGRESS and returns `true` *while* internalInit() (running on the + // first caller) has not yet reached `initState = InitState.SUCCESS`. Under the + // fix, this call must suspend until the first caller fully completes init -- so + // that os.isInitialized is guaranteed `true` at the moment we return. + // + // We capture isInitialized at exactly the moment initWithContextSuspend returns + // for the second caller. Old code: false (bug). New code: true (fix). + val secondInit = async(Dispatchers.IO) { + val result = os.initWithContextSuspend(context, "appId") + val initializedAtReturn = os.isInitialized + Pair(result, initializedAtReturn) + } + + // Sanity: the second caller has not pre-empted the test by returning before + // we unblock the first caller (timing depends on lazy ServiceProvider locks). + Thread.sleep(200) + + // Unblock the first caller so internalInit() can complete (state -> SUCCESS). + trigger.countDown() + + firstInit.await() shouldBe true + val (secondResult, initializedAtReturn) = secondInit.await() + secondResult shouldBe true + // KEY assertion: os must be fully initialized at the moment the re-entrant + // suspend init returns. Old code violated this by returning early while state + // was still IN_PROGRESS, which is what allowed SyncJobService to reach + // runBackgroundServices() before bootstrap() had run. + initializedAtReturn shouldBe true + os.isInitialized shouldBe true + } + } + + // Regression test for review-flagged Defect 1 (stale latch on retry-after-FAILED). + // + // Background: `suspendCompletion` was a single-shot `val CompletableDeferred`. Once + // any init terminates (even FAILED), the deferred stays permanently complete -- so a + // re-entrant suspend caller arriving DURING a subsequent retry would `await()` on the + // already-completed deferred, return instantly, and read transient state (likely + // IN_PROGRESS -> false), silently dropping JobService work. + // + // The fix resets `suspendCompletion` whenever the synchronized(initLock) block flips state + // into IN_PROGRESS, and await sites local-capture the deferred under the lock. + test("initWithContextSuspend resets latch on retry-after-FAILED") { + val context = getApplicationContext() + val os = OneSignalImp() + + runBlocking { + // 1. Force a deterministic FAILED first init via the user-locked branch + // (matches the pattern used by other tests in this file). + mockkObject(AndroidUtils) + every { AndroidUtils.isAndroidUserUnlocked(any()) } returns false + val firstInit = os.initWithContextSuspend(context, "appId") + firstInit shouldBe false + os.isInitialized shouldBe false + // At this point the (pre-fix) single-shot `suspendCompletion` is permanently complete. + unmockkObject(AndroidUtils) + + // 2. Stall a fresh retry via BlockingPrefsContext so initState sits at IN_PROGRESS. + val started = CountDownLatch(1) + val trigger = CountDownLatch(1) + val blockingCtx = object : ContextWrapper(context) { + override fun getSharedPreferences(name: String, mode: Int): SharedPreferences { + started.countDown() + // Bounded await -- if the test logic ever fails to release `trigger`, the + // IO worker exits cleanly instead of blocking forever and deadlocking the suite. + trigger.await(30, TimeUnit.SECONDS) + return super.getSharedPreferences(name, mode) + } + } + + val secondInit = async(Dispatchers.IO) { os.initWithContextSuspend(blockingCtx, "appId") } + started.await(5, TimeUnit.SECONDS) shouldBe true + os.isInitialized shouldBe false + secondInit.isCompleted shouldBe false + + // 3. Re-entrant caller. With Defect 1, it would wake immediately on the + // FAILED-generation latch (still permanently complete) and return based on + // transient state. With the fix, it waits on the *new* generation's latch. + val thirdInit = async(Dispatchers.IO) { + val r = os.initWithContextSuspend(context, "appId") + val initializedAtReturn = os.isInitialized + Pair(r, initializedAtReturn) + } + + Thread.sleep(300) + thirdInit.isCompleted shouldBe false // would be true with the stale-latch bug + + // 4. Release the second init; both new callers complete with state == SUCCESS. + trigger.countDown() + + secondInit.await() shouldBe true + val (thirdResult, thirdInitializedAtReturn) = thirdInit.await() + thirdResult shouldBe true + thirdInitializedAtReturn shouldBe true + os.isInitialized shouldBe true + } + } + + // Regression test for review-flagged Defect 2 (indefinite hang if internalInit throws). + // + // Background: `internalInit` had no try/catch wrapping its body. An unchecked throw from + // initEssentials/bootstrapServices/etc. would leave initState=IN_PROGRESS forever and + // `suspendCompletion` uncompleted -- causing every re-entrant suspend caller (e.g. + // SyncJobService) to hang on `await()` indefinitely, holding its budget slot until the + // OS killed the worker. + // + // The fix wraps internalInit's body in try/catch, ensuring a terminal state and + // `notifyInitComplete()` on any throw. + test("initWithContextSuspend reaches terminal state when internalInit throws") { + val context = getApplicationContext() + val os = OneSignalImp() + + // Make the very first call inside internalInit throw (mirrors the user-locked test + // pattern, but with a throw instead of a `false` return). This is the cheapest way to + // simulate "any unchecked exception during bootstrap" without coupling to specific + // bootstrap internals. + mockkObject(AndroidUtils) + every { AndroidUtils.isAndroidUserUnlocked(any()) } throws RuntimeException("simulated bootstrap failure") + + runBlocking { + // Without the fix: the throw propagates out of internalInit, escapes withContext, + // and either fails the test outright or leaves the SDK in a deadlocked + // IN_PROGRESS state with `suspendCompletion` never completed. + // With the fix: the catch block sets FAILED + notifyInitComplete + returns false. + val firstResult = withTimeoutOrNull(5_000) { + os.initWithContextSuspend(context, "appId") + } + firstResult shouldBe false + os.isInitialized shouldBe false + + unmockkObject(AndroidUtils) + + // State is FAILED so a retry is allowed and (per Defect 1 fix) gets a fresh latch. + // This also doubles as a smoke test that we didn't leak IN_PROGRESS. + val retry = withTimeoutOrNull(5_000) { + os.initWithContextSuspend(context, "appId") + } + retry shouldBe true + os.isInitialized shouldBe true + } + } }) /**