-
Notifications
You must be signed in to change notification settings - Fork 3.7k
[fix][broker] remove lock contention in delayed delivery stats read paths #25990
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,118 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one | ||
| * or more contributor license agreements. See the NOTICE file | ||
| * distributed with this work for additional information | ||
| * regarding copyright ownership. The ASF licenses this file | ||
| * to you under the Apache License, Version 2.0 (the | ||
| * "License"); you may not use this file except in compliance | ||
| * with the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, | ||
| * software distributed under the License is distributed on an | ||
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| * KIND, either express or implied. See the License for the | ||
| * specific language governing permissions and limitations | ||
| * under the License. | ||
| */ | ||
| package org.apache.pulsar.broker.delayed.bucket; | ||
|
|
||
| import com.google.common.collect.Range; | ||
| import com.google.common.collect.RangeMap; | ||
| import com.google.common.collect.TreeRangeMap; | ||
| import java.util.Map; | ||
| import java.util.concurrent.atomic.AtomicLong; | ||
|
|
||
| /** | ||
| * Index of {@link ImmutableBucket}s keyed by ledger-ID {@link Range}. | ||
| * | ||
| * <p>The underlying {@link TreeRangeMap} is not thread-safe. All mutations must be | ||
| * performed under an external lock (for example {@code synchronized(tracker)}). | ||
| * | ||
| * <p>To support lock-free stats reads, this class maintains cached counters for: | ||
| * <ul> | ||
| * <li>number of buckets ({@link #count()})</li> | ||
| * <li>total snapshot length ({@link #totalSnapshotLength()})</li> | ||
| * </ul> | ||
| * | ||
| * <p>Note that {@link TreeRangeMap#put} automatically removes any existing entries | ||
| * whose ranges overlap with the inserted range. Because these removals are implicit, | ||
| * counter updates cannot be derived solely from the caller's operation. Therefore | ||
| * {@link #put(Range, ImmutableBucket)} and {@link #recomputeCounters()} rebuild the | ||
| * counters from the actual map state. | ||
| */ | ||
| class ImmutableBucketIndex { | ||
|
|
||
| private final TreeRangeMap<Long, ImmutableBucket> map = TreeRangeMap.create(); | ||
|
|
||
| private final AtomicLong count = new AtomicLong(); | ||
| private final AtomicLong totalSnapshotLength = new AtomicLong(); | ||
|
|
||
| /** | ||
| * Adds a bucket to the index. | ||
| * | ||
| * <p>If the supplied range overlaps existing entries, {@link TreeRangeMap} | ||
| * removes them automatically. Counters are therefore recomputed from the | ||
| * resulting map state. | ||
| */ | ||
| void put(Range<Long> range, ImmutableBucket bucket) { | ||
| map.put(range, bucket); | ||
| recomputeCounters(); | ||
| } | ||
|
|
||
| /** | ||
| * Removes the bucket associated with the given range. | ||
| * | ||
| * <p>The supplied range is expected to exactly match an existing entry. | ||
| */ | ||
| void remove(Range<Long> range) { | ||
| ImmutableBucket removed = map.asMapOfRanges().remove(range); | ||
| if (removed != null) { | ||
| count.decrementAndGet(); | ||
| totalSnapshotLength.addAndGet(-removed.getSnapshotLength()); | ||
| } | ||
| } | ||
|
|
||
| long count() { | ||
| return count.get(); | ||
| } | ||
|
|
||
| long totalSnapshotLength() { | ||
| return totalSnapshotLength.get(); | ||
| } | ||
|
|
||
| ImmutableBucket get(long key) { | ||
| return map.get(key); | ||
| } | ||
|
|
||
| RangeMap<Long, ImmutableBucket> subRangeMap(Range<Long> range) { | ||
| return map.subRangeMap(range); | ||
| } | ||
|
|
||
| Map<Range<Long>, ImmutableBucket> asMapOfRanges() { | ||
| return map.asMapOfRanges(); | ||
| } | ||
|
|
||
| boolean containsValue(ImmutableBucket bucket) { | ||
| return map.asMapOfRanges().containsValue(bucket); | ||
| } | ||
|
|
||
| /** | ||
| * Rebuilds cached counters from the current map state. | ||
| * | ||
| * <p>This method is useful after bulk updates or whenever the map state is | ||
| * considered the source of truth. | ||
| */ | ||
| void recomputeCounters() { | ||
| Map<Range<Long>, ImmutableBucket> ranges = map.asMapOfRanges(); | ||
|
|
||
| count.set(ranges.size()); | ||
|
|
||
| long snapshotLength = 0; | ||
| for (ImmutableBucket bucket : ranges.values()) { | ||
| snapshotLength += bucket.getSnapshotLength(); | ||
| } | ||
| totalSnapshotLength.set(snapshotLength); | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -94,7 +94,7 @@ public class PersistentDispatcherMultipleConsumers extends AbstractPersistentDis | |
| protected final MessageRedeliveryController redeliveryMessages; | ||
| protected final RedeliveryTracker redeliveryTracker; | ||
|
|
||
| private Optional<DelayedDeliveryTracker> delayedDeliveryTracker = Optional.empty(); | ||
| private volatile Optional<DelayedDeliveryTracker> delayedDeliveryTracker = Optional.empty(); | ||
|
|
||
| protected volatile boolean havePendingRead = false; | ||
| protected volatile boolean havePendingReplayRead = false; | ||
|
|
@@ -1372,13 +1372,12 @@ protected boolean isNormalReadAllowed() { | |
| } | ||
|
|
||
|
|
||
|
|
||
| protected synchronized boolean shouldPauseDeliveryForDelayTracker() { | ||
| return delayedDeliveryTracker.isPresent() && delayedDeliveryTracker.get().shouldPauseAllDeliveries(); | ||
| protected boolean shouldPauseDeliveryForDelayTracker() { | ||
| return delayedDeliveryTracker.map(DelayedDeliveryTracker::shouldPauseAllDeliveries).orElse(false); | ||
| } | ||
|
|
||
| @Override | ||
| public synchronized long getNumberOfDelayedMessages() { | ||
| public long getNumberOfDelayedMessages() { | ||
| return delayedDeliveryTracker.map(DelayedDeliveryTracker::getNumberOfDelayedMessages).orElse(0L); | ||
| } | ||
|
|
||
|
|
@@ -1464,20 +1463,15 @@ public PersistentTopic getTopic() { | |
| } | ||
|
|
||
|
|
||
| public synchronized long getDelayedTrackerMemoryUsage() { | ||
| public long getDelayedTrackerMemoryUsage() { | ||
|
Comment on lines
-1467
to
+1466
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the implementation of getBufferMemoryUsage isn't thread safe in BucketDelayedDeliveryTracker or in InMemoryDelayedDeliveryTracker. One particular detail about RoaringBitmaps is that it's not thread safe for even plain concurrent reads and data races aren't the only problem. Reads can mutate RoaringBitmap internal state. I learned that quite recently and there are other bugs in Pulsar impacted by this too since StampedLock/ReadWriteLocks aren't safe with RoaringBitmap methods. There has been changes in the project to prevent internal datastructure corruption caused by concurrent reads, but I don't think that it has been fully resolved. I checked one of the recent changes and it looks invalid although it was merged to the project. Making a single method synchronized is useless if reads or writes happen outside of the same object monitor. Since BucketDelayedDeliveryTracker heavily uses RoaringBitmaps, I think that we need a different solution to avoid blocking when stats are requested. One possibility would be to have a background thread refresh stats after there has been activity since the last operation. The instances would have to be immutable for this to work properly.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I reported #25991 about the RoaringBitmap thread-safety issue across the code base. The solution would be to use ReadWriteLock and ensure that read lock is only used for methods that never mutate internal state. It's requires internal knowledge of the RoaringBitmap implementation to do this.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @lhotari Fixed |
||
| return delayedDeliveryTracker.map(DelayedDeliveryTracker::getBufferMemoryUsage).orElse(0L); | ||
| } | ||
|
|
||
| public synchronized Map<String, TopicMetricBean> getBucketDelayedIndexStats() { | ||
| if (delayedDeliveryTracker.isEmpty()) { | ||
| return Collections.emptyMap(); | ||
| } | ||
|
|
||
| if (delayedDeliveryTracker.get() instanceof BucketDelayedDeliveryTracker) { | ||
| return ((BucketDelayedDeliveryTracker) delayedDeliveryTracker.get()).genTopicMetricMap(); | ||
| } | ||
|
|
||
| return Collections.emptyMap(); | ||
| public Map<String, TopicMetricBean> getBucketDelayedIndexStats() { | ||
| return delayedDeliveryTracker | ||
| .filter(BucketDelayedDeliveryTracker.class::isInstance) | ||
| .map(tracker -> ((BucketDelayedDeliveryTracker) tracker).genTopicMetricMap()) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. BucketDelayedDeliveryTracker.genTopicMetricMap is not thread safe. It should be made thread safe.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @lhotari Fixed
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for the update. I think this still needs serialization for correctness. This method is now reachable from an unsynchronized stats path, but it is not a pure read: it updates shared |
||
| .orElse(Collections.emptyMap()); | ||
| } | ||
|
|
||
| @Override | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.