diff --git a/lib/optimizely/event/batch_event_processor.rb b/lib/optimizely/event/batch_event_processor.rb index 52ec0533..88e23539 100644 --- a/lib/optimizely/event/batch_event_processor.rb +++ b/lib/optimizely/event/batch_event_processor.rb @@ -172,20 +172,35 @@ def flush_queue! return if @current_batch.empty? log_event = Optimizely::EventFactory.create_log_event(@current_batch, @logger) - begin - @logger.log( - Logger::INFO, - 'Flushing Queue.' - ) - - @event_dispatcher.dispatch_event(log_event) - @notification_center&.send_notifications( - NotificationCenter::NOTIFICATION_TYPES[:LOG_EVENT], - log_event - ) - rescue StandardError => e - @logger.log(Logger::ERROR, "Error dispatching event: #{log_event} #{e.message}.") + @logger.log( + Logger::INFO, + 'Flushing Queue.' + ) + + retry_count = 0 + max_retries = Optimizely::Helpers::Constants::EVENT_DISPATCH_CONFIG[:MAX_RETRIES] + + while retry_count < max_retries + begin + @event_dispatcher.dispatch_event(log_event) + @notification_center&.send_notifications( + NotificationCenter::NOTIFICATION_TYPES[:LOG_EVENT], + log_event + ) + # Success - break out of retry loop + break + rescue StandardError => e + @logger.log(Logger::ERROR, "Error dispatching event: #{log_event} #{e.message}.") + retry_count += 1 + + if retry_count < max_retries + delay = calculate_retry_interval(retry_count - 1) + @logger.log(Logger::DEBUG, "Retrying event dispatch (attempt #{retry_count} of #{max_retries - 1}) after #{delay}s") + sleep(delay) + end + end end + @current_batch = [] end @@ -231,5 +246,16 @@ def positive_number?(value) # false otherwise. Helpers::Validator.finite_number?(value) && value.positive? end + + # Calculate exponential backoff interval: 200ms, 400ms, 800ms, ... capped at 1s + # + # @param retry_count - Zero-based retry count + # @return [Float] - Delay in seconds + def calculate_retry_interval(retry_count) + initial_interval = Helpers::Constants::EVENT_DISPATCH_CONFIG[:INITIAL_RETRY_INTERVAL] + max_interval = Helpers::Constants::EVENT_DISPATCH_CONFIG[:MAX_RETRY_INTERVAL] + interval = initial_interval * (2**retry_count) + [interval, max_interval].min + end end end diff --git a/lib/optimizely/helpers/constants.rb b/lib/optimizely/helpers/constants.rb index db042acd..4334f56d 100644 --- a/lib/optimizely/helpers/constants.rb +++ b/lib/optimizely/helpers/constants.rb @@ -459,7 +459,10 @@ module Constants }.freeze EVENT_DISPATCH_CONFIG = { - REQUEST_TIMEOUT: 10 + REQUEST_TIMEOUT: 10, + MAX_RETRIES: 3, + INITIAL_RETRY_INTERVAL: 0.2, # 200ms in seconds + MAX_RETRY_INTERVAL: 1.0 # 1 second }.freeze ODP_GRAPHQL_API_CONFIG = { @@ -490,7 +493,9 @@ module Constants DEFAULT_QUEUE_CAPACITY: 10_000, DEFAULT_BATCH_SIZE: 10, DEFAULT_FLUSH_INTERVAL_SECONDS: 1, - DEFAULT_RETRY_COUNT: 3 + DEFAULT_RETRY_COUNT: 3, + INITIAL_RETRY_INTERVAL: 0.2, # 200ms in seconds + MAX_RETRY_INTERVAL: 1.0 # 1 second }.freeze HTTP_HEADERS = { diff --git a/lib/optimizely/odp/odp_event_manager.rb b/lib/optimizely/odp/odp_event_manager.rb index fc9084a1..e44ebd0d 100644 --- a/lib/optimizely/odp/odp_event_manager.rb +++ b/lib/optimizely/odp/odp_event_manager.rb @@ -239,7 +239,12 @@ def flush_batch! end break unless should_retry - @logger.log(Logger::DEBUG, 'Error dispatching ODP events, scheduled to retry.') if i < @retry_count + if i < @retry_count - 1 + # Exponential backoff: 200ms, 400ms, 800ms, ... capped at 1s + delay = calculate_retry_interval(i) + @logger.log(Logger::DEBUG, "Error dispatching ODP events, retrying (attempt #{i + 1} of #{@retry_count - 1}) after #{delay}s") + sleep(delay) + end i += 1 end @@ -282,5 +287,16 @@ def process_config_update @api_key = @odp_config&.api_key @api_host = @odp_config&.api_host end + + # Calculate exponential backoff interval: 200ms, 400ms, 800ms, ... capped at 1s + # + # @param retry_count - Zero-based retry count + # @return [Float] - Delay in seconds + def calculate_retry_interval(retry_count) + initial_interval = Helpers::Constants::ODP_EVENT_MANAGER[:INITIAL_RETRY_INTERVAL] + max_interval = Helpers::Constants::ODP_EVENT_MANAGER[:MAX_RETRY_INTERVAL] + interval = initial_interval * (2**retry_count) + [interval, max_interval].min + end end end diff --git a/spec/event/batch_event_processor_spec.rb b/spec/event/batch_event_processor_spec.rb index 14e0d01c..604784d6 100644 --- a/spec/event/batch_event_processor_spec.rb +++ b/spec/event/batch_event_processor_spec.rb @@ -293,9 +293,11 @@ @event_processor.flush # Wait until other thread has processed the event. sleep 0.1 until @event_processor.current_batch.empty? + sleep 0.7 # Wait for retries to complete (200ms + 400ms + processing time) expect(@notification_center).not_to have_received(:send_notifications) - expect(spy_logger).to have_received(:log).once.with( + # With retries, error will be logged 3 times (once per attempt) + expect(spy_logger).to have_received(:log).exactly(3).times.with( Logger::ERROR, "Error dispatching event: #{log_event} Timeout::Error." ) @@ -377,4 +379,93 @@ expect(@event_processor.event_queue.length).to eq(0) expect(spy_logger).to have_received(:log).with(Logger::WARN, 'Executor shutdown, not accepting tasks.').once end + + context 'retry logic with exponential backoff' do + it 'should retry on dispatch errors with exponential backoff' do + @event_processor = Optimizely::BatchEventProcessor.new( + event_dispatcher: @event_dispatcher, + batch_size: 1, + flush_interval: 10_000, + logger: spy_logger + ) + + user_event = Optimizely::UserEventFactory.create_conversion_event(project_config, event, 'test_user', nil, nil) + log_event = Optimizely::EventFactory.create_log_event(user_event, spy_logger) + + # Simulate dispatch failure twice, then success + call_count = 0 + allow(@event_dispatcher).to receive(:dispatch_event) do + call_count += 1 + raise StandardError, 'Network error' if call_count < 3 + end + + start_time = Time.now + @event_processor.process(user_event) + + # Wait for processing to complete + sleep 0.1 until @event_processor.event_queue.empty? + sleep 0.7 # Wait for retries to complete (200ms + 400ms + processing time) + + elapsed_time = Time.now - start_time + + # Should make 3 attempts total (1 initial + 2 retries) + expect(@event_dispatcher).to have_received(:dispatch_event).with(log_event).exactly(3).times + + # Should have delays: 200ms + 400ms = 600ms minimum + expect(elapsed_time).to be >= 0.6 + + # Should log retry attempts + expect(spy_logger).to have_received(:log).with( + Logger::DEBUG, /Retrying event dispatch/ + ).at_least(:twice) + end + + it 'should give up after max retries' do + @event_processor = Optimizely::BatchEventProcessor.new( + event_dispatcher: @event_dispatcher, + batch_size: 1, + flush_interval: 10_000, + logger: spy_logger + ) + + user_event = Optimizely::UserEventFactory.create_conversion_event(project_config, event, 'test_user', nil, nil) + log_event = Optimizely::EventFactory.create_log_event(user_event, spy_logger) + + # Simulate dispatch failure every time + allow(@event_dispatcher).to receive(:dispatch_event).and_raise(StandardError, 'Network error') + + @event_processor.process(user_event) + + # Wait for processing to complete + sleep 0.1 until @event_processor.event_queue.empty? + sleep 0.7 # Wait for all retries to complete + + # Should make 3 attempts total (1 initial + 2 retries) + expect(@event_dispatcher).to have_received(:dispatch_event).with(log_event).exactly(3).times + + # Should log error for each attempt + expect(spy_logger).to have_received(:log).with( + Logger::ERROR, /Error dispatching event/ + ).exactly(3).times + end + + it 'should calculate correct exponential backoff intervals' do + processor = Optimizely::BatchEventProcessor.new + + # First retry: 200ms + expect(processor.send(:calculate_retry_interval, 0)).to eq(0.2) + + # Second retry: 400ms + expect(processor.send(:calculate_retry_interval, 1)).to eq(0.4) + + # Third retry: 800ms + expect(processor.send(:calculate_retry_interval, 2)).to eq(0.8) + + # Fourth retry: capped at 1s + expect(processor.send(:calculate_retry_interval, 3)).to eq(1.0) + + # Fifth retry: still capped at 1s + expect(processor.send(:calculate_retry_interval, 4)).to eq(1.0) + end + end end diff --git a/spec/event_dispatcher_spec.rb b/spec/event_dispatcher_spec.rb index 193f584d..b061bcc0 100644 --- a/spec/event_dispatcher_spec.rb +++ b/spec/event_dispatcher_spec.rb @@ -47,16 +47,27 @@ it 'should pass the proxy_config to the HttpUtils helper class' do event = Optimizely::Event.new(:post, @url, @params, @post_headers) - expect(Optimizely::Helpers::HttpUtils).to receive(:make_request).with( + # Allow the method to be called (potentially multiple times due to retries) + allow(Optimizely::Helpers::HttpUtils).to receive(:make_request).with( event.url, event.http_verb, event.params.to_json, event.headers, Optimizely::Helpers::Constants::EVENT_DISPATCH_CONFIG[:REQUEST_TIMEOUT], proxy_config - ) + ).and_return(double(code: '200')) @customized_event_dispatcher.dispatch_event(event) + + # Verify it was called at least once with the correct parameters + expect(Optimizely::Helpers::HttpUtils).to have_received(:make_request).with( + event.url, + event.http_verb, + event.params.to_json, + event.headers, + Optimizely::Helpers::Constants::EVENT_DISPATCH_CONFIG[:REQUEST_TIMEOUT], + proxy_config + ).at_least(:once) end end @@ -171,10 +182,9 @@ stub_request(:post, @url).to_return(status: 399) event = Optimizely::Event.new(:post, @url, @params, @post_headers) - response = @customized_event_dispatcher.dispatch_event(event) + @customized_event_dispatcher.dispatch_event(event) - expect(response).to have_received(:log) - expect(spy_logger).to have_received(:log) + expect(spy_logger).to have_received(:log).with(Logger::DEBUG, 'event successfully sent with response code 399') expect(error_handler).to_not have_received(:handle_error) end @@ -182,10 +192,9 @@ stub_request(:post, @url).to_return(status: 600) event = Optimizely::Event.new(:post, @url, @params, @post_headers) - response = @customized_event_dispatcher.dispatch_event(event) + @customized_event_dispatcher.dispatch_event(event) - expect(response).to have_received(:log) - expect(spy_logger).to have_received(:log) + expect(spy_logger).to have_received(:log).with(Logger::DEBUG, 'event successfully sent with response code 600') expect(error_handler).not_to have_received(:handle_error) end end diff --git a/spec/odp/odp_event_manager_spec.rb b/spec/odp/odp_event_manager_spec.rb index 57402887..6729bea5 100644 --- a/spec/odp/odp_event_manager_spec.rb +++ b/spec/odp/odp_event_manager_spec.rb @@ -260,16 +260,20 @@ allow(SecureRandom).to receive(:uuid).and_return(test_uuid) event_manager = Optimizely::OdpEventManager.new(logger: spy_logger) retry_count = event_manager.instance_variable_get('@retry_count') - allow(event_manager.api_manager).to receive(:send_odp_events).exactly(retry_count + 1).times.with(api_key, api_host, odp_events).and_return(true) + allow(event_manager.api_manager).to receive(:send_odp_events).exactly(retry_count).times.with(api_key, api_host, odp_events).and_return(true) event_manager.start!(odp_config) event_manager.send_event(**events[0]) event_manager.send_event(**events[1]) event_manager.flush - sleep(0.1) until event_manager.instance_variable_get('@event_queue').empty? + # Need to wait longer for retries with exponential backoff (200ms + 400ms = 600ms) + sleep(1) until event_manager.instance_variable_get('@event_queue').empty? expect(event_manager.instance_variable_get('@current_batch').length).to eq 0 - expect(spy_logger).to have_received(:log).exactly(retry_count).times.with(Logger::DEBUG, 'Error dispatching ODP events, scheduled to retry.') + # Updated log message includes retry attempt and delay info + expect(spy_logger).to have_received(:log).with( + Logger::DEBUG, /Error dispatching ODP events, retrying/ + ).exactly(retry_count - 1).times expect(spy_logger).to have_received(:log).once.with(Logger::ERROR, "ODP event send failed (Failed after 3 retries: #{processed_events.to_json}).") event_manager.stop! @@ -278,16 +282,20 @@ it 'should retry on network failure' do allow(SecureRandom).to receive(:uuid).and_return(test_uuid) event_manager = Optimizely::OdpEventManager.new(logger: spy_logger) - allow(event_manager.api_manager).to receive(:send_odp_events).once.with(api_key, api_host, odp_events).and_return(true, true, false) + allow(event_manager.api_manager).to receive(:send_odp_events).with(api_key, api_host, odp_events).and_return(true, true, false) event_manager.start!(odp_config) event_manager.send_event(**events[0]) event_manager.send_event(**events[1]) event_manager.flush - sleep(0.1) until event_manager.instance_variable_get('@event_queue').empty? + # Need to wait longer for retries with exponential backoff (200ms + 400ms = 600ms) + sleep(1) until event_manager.instance_variable_get('@event_queue').empty? expect(event_manager.instance_variable_get('@current_batch').length).to eq 0 - expect(spy_logger).to have_received(:log).twice.with(Logger::DEBUG, 'Error dispatching ODP events, scheduled to retry.') + # Updated log message includes retry attempt and delay info + expect(spy_logger).to have_received(:log).with( + Logger::DEBUG, /Error dispatching ODP events, retrying/ + ).twice expect(spy_logger).not_to have_received(:log).with(Logger::ERROR, anything) expect(event_manager.running?).to be true event_manager.stop! @@ -539,5 +547,52 @@ expect(spy_logger).to have_received(:log).once.with(Logger::DEBUG, 'ODP event queue: cannot send before config has been set.') expect(spy_logger).not_to have_received(:log).with(Logger::ERROR, anything) end + + it 'should use exponential backoff between retries' do + allow(SecureRandom).to receive(:uuid).and_return(test_uuid) + event_manager = Optimizely::OdpEventManager.new(logger: spy_logger) + + # All requests fail to trigger retries + allow(event_manager.api_manager).to receive(:send_odp_events).with(api_key, api_host, odp_events).and_return(true) + event_manager.start!(odp_config) + + start_time = Time.now + event_manager.send_event(**events[0]) + event_manager.send_event(**events[1]) + event_manager.flush + + # Wait for all retries to complete (need at least 600ms for 200ms + 400ms delays) + sleep(1) until event_manager.instance_variable_get('@event_queue').empty? + elapsed_time = Time.now - start_time + + # Should have delays: 200ms + 400ms = 600ms minimum for 3 total attempts + expect(elapsed_time).to be >= 0.5 # Allow some tolerance + + # Should log retry attempts with delay info + expect(spy_logger).to have_received(:log).with( + Logger::DEBUG, /retrying \(attempt \d+ of \d+\) after/ + ).at_least(:once) + + event_manager.stop! + end + + it 'should calculate correct exponential backoff intervals' do + event_manager = Optimizely::OdpEventManager.new + + # First retry: 200ms + expect(event_manager.send(:calculate_retry_interval, 0)).to eq(0.2) + + # Second retry: 400ms + expect(event_manager.send(:calculate_retry_interval, 1)).to eq(0.4) + + # Third retry: 800ms + expect(event_manager.send(:calculate_retry_interval, 2)).to eq(0.8) + + # Fourth retry: capped at 1s + expect(event_manager.send(:calculate_retry_interval, 3)).to eq(1.0) + + # Fifth retry: still capped at 1s + expect(event_manager.send(:calculate_retry_interval, 4)).to eq(1.0) + end end end