Skip to content

Commit 4fa25ef

Browse files
committed
[FLINK-39018][checkpoint] Notify PriorityEvent to downstream task even if it is blocked to ensure the checkpoint barrier can be handled by downstream task
1 parent d2147d0 commit 4fa25ef

2 files changed

Lines changed: 80 additions & 3 deletions

File tree

flink-runtime/src/main/java/org/apache/flink/runtime/io/network/partition/PipelinedSubpartition.java

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -252,8 +252,9 @@ private boolean processPriorityBuffer(BufferConsumer bufferConsumer, int partial
252252
@GuardedBy("buffers")
253253
private boolean needNotifyPriorityEvent() {
254254
assert Thread.holdsLock(buffers);
255-
// if subpartition is blocked then downstream doesn't expect any notifications
256-
return buffers.getNumPriorityElements() == 1 && !isBlocked;
255+
// Priority events (unaligned checkpoint barriers) must notify downstream even when
256+
// blocked. The blocking mechanism is for data flow control, not for priority events.
257+
return buffers.getNumPriorityElements() == 1;
257258
}
258259

259260
@GuardedBy("buffers")
@@ -456,7 +457,9 @@ public void release() {
456457
@Nullable
457458
BufferAndBacklog pollBuffer() {
458459
synchronized (buffers) {
459-
if (isBlocked) {
460+
// When blocked, only allow priority buffers (e.g. unaligned checkpoint barriers)
461+
// to be polled. Data buffers remain blocked until resumeConsumption() is called.
462+
if (isBlocked && buffers.getNumPriorityElements() == 0) {
460463
return null;
461464
}
462465

flink-runtime/src/test/java/org/apache/flink/runtime/io/network/partition/PipelinedSubpartitionWithReadViewTest.java

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -545,6 +545,80 @@ void testResumeBlockedEmptySubpartition() throws IOException, InterruptedExcepti
545545
assertNoNextBuffer(readView);
546546
}
547547

548+
@TestTemplate
549+
void testPriorityEventBypassesBlockedSubpartition() throws Exception {
550+
subpartition.setChannelStateWriter(ChannelStateWriter.NO_OP);
551+
552+
// Block the subpartition by consuming an aligned checkpoint barrier
553+
blockSubpartitionByCheckpoint(1);
554+
assertThat(availablityListener.getNumPriorityEvents()).isZero();
555+
556+
// While blocked, add an unaligned checkpoint barrier (priority event).
557+
// Even though isBlocked=true, the priority event notification should NOT
558+
// be suppressed — priority events must bypass blocking.
559+
CheckpointOptions options =
560+
CheckpointOptions.unaligned(
561+
CheckpointType.CHECKPOINT,
562+
new CheckpointStorageLocationReference(new byte[] {0, 1, 2}));
563+
BufferConsumer barrierBuffer =
564+
EventSerializer.toBufferConsumer(new CheckpointBarrier(0, 0, options), true);
565+
subpartition.add(barrierBuffer);
566+
// Priority notification should fire immediately despite isBlocked=true
567+
assertThat(availablityListener.getNumPriorityEvents()).isOne();
568+
569+
assertNextEvent(
570+
readView,
571+
barrierBuffer.getWrittenBytes(),
572+
CheckpointBarrier.class,
573+
false,
574+
0,
575+
false,
576+
true);
577+
assertNoNextBuffer(readView);
578+
}
579+
580+
@TestTemplate
581+
void testDataStillBlockedAfterPriorityEventBypasses() throws Exception {
582+
final RecordingChannelStateWriter channelStateWriter = new RecordingChannelStateWriter();
583+
subpartition.setChannelStateWriter(channelStateWriter);
584+
585+
// Block the subpartition
586+
blockSubpartitionByCheckpoint(1);
587+
subpartition.add(createFilledFinishedBufferConsumer(BUFFER_SIZE));
588+
assertNoNextBuffer(readView);
589+
590+
// Add priority event while blocked — should notify and be pollable
591+
CheckpointOptions options =
592+
CheckpointOptions.unaligned(
593+
CheckpointType.CHECKPOINT,
594+
new CheckpointStorageLocationReference(new byte[] {0, 1, 2}));
595+
channelStateWriter.start(0, options);
596+
BufferConsumer barrierBuffer =
597+
EventSerializer.toBufferConsumer(new CheckpointBarrier(0, 0, options), true);
598+
subpartition.add(barrierBuffer);
599+
assertThat(availablityListener.getNumPriorityEvents()).isOne();
600+
601+
// Recycle inflight buffer copies held by channel state writer
602+
final List<Buffer> inflight =
603+
channelStateWriter.getAddedOutput().get(subpartition.getSubpartitionInfo());
604+
assertThat(inflight).hasSize(1);
605+
inflight.forEach(Buffer::recycleBuffer);
606+
607+
assertNextEvent(
608+
readView,
609+
barrierBuffer.getWrittenBytes(),
610+
CheckpointBarrier.class,
611+
false,
612+
0,
613+
false,
614+
true);
615+
assertNoNextBuffer(readView);
616+
617+
// After resumeConsumption, data becomes available
618+
readView.resumeConsumption();
619+
assertNextBuffer(readView, BUFFER_SIZE, false, 0, false, true);
620+
}
621+
548622
// ------------------------------------------------------------------------
549623

550624
private void blockSubpartitionByCheckpoint(int numNotifications)

0 commit comments

Comments
 (0)