Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions docs_src/codegen_options.md
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,23 @@ control the scheduler.
NOTE: If set to 0 or a negative value, no throughput minimum will be
enforced.

- `--default_arc_worst_case_throughput=...` sets the default worst-case
throughput bound for feedback arcs when `--generator=pipeline`. If set,
allows scheduling a pipeline ensuring all backedges achieve worst-case
throughput no slower than once per N cycles by default. If the designer uses
both flags, `--worst_case_throughput` is enforced as an upper bound for all
configurations, and any arcs not otherwise configured will use
`--default_arc_worst_case_throughput`.

- `--arc_worst_case_throughput=...` sets specific worst-case throughput bounds
for labeled feedback arcs when `--generator=pipeline`. Specified as a
comma-separated list of `write_label,read_label=throughput` entries. If set,
forces those explicit backedge paths to achieve throughput no slower than
once per N cycles. If the designer uses both flags,
`--worst_case_throughput` is enforced as an upper bound for all
configurations, and any arcs not otherwise configured will use
`--default_arc_worst_case_throughput`.

- `--dynamic_throughput_objective_weight=...` is disabled by default. If set,
the scheduler will attempt to optimize for dynamic throughput as well as for
area; the value controls how strongly this is prioritized. e.g., if set to
Expand Down
2 changes: 2 additions & 0 deletions xls/build_rules/xls_codegen_fdo_rules.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,8 @@ def xls_ir_verilog_fdo_impl(ctx, src, original_input_files):
"period_relaxation_percent",
"minimize_clock_on_failure",
"worst_case_throughput",
"default_arc_worst_case_throughput",
"arc_worst_case_throughput",
"additional_input_delay_ps",
"ffi_fallback_delay_ps",
"io_constraints",
Expand Down
13 changes: 13 additions & 0 deletions xls/build_rules/xls_providers.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,19 @@ SCHEDULING_FIELDS = {
"(full throughput).\n" +
"\n" +
"If zero or negative, no throughput bound will be enforced.",
"default_arc_worst_case_throughput": "Allow scheduling a pipeline with feedback arc worst-case throughput " +
"no slower than once per N cycles for all backedges by default. " +
"If the designer uses both flags, --worst_case_throughput is " +
"enforced as an upper bound for all configurations, and any arcs " +
"not otherwise configured will use " +
"--default_arc_worst_case_throughput.",
"arc_worst_case_throughput": "Allow scheduling specific feedback arcs with worst-case throughput " +
"no slower than once per N cycles. Specified as a comma-separated " +
"list of 'write_label,read_label=throughput' entries. " +
"If the designer uses both flags, --worst_case_throughput is " +
"enforced as an upper bound for all configurations, and any arcs " +
"not otherwise configured will use " +
"--default_arc_worst_case_throughput.",
"dynamic_throughput_objective_weight": "If set, the scheduler will attempt to optimize for " +
"dynamic throughput as well as for area; the value " +
"controls how strongly this is prioritized. e.g., if " +
Expand Down
4 changes: 4 additions & 0 deletions xls/scheduling/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,10 @@ cc_test(
"//xls/ir:op",
"//xls/passes:optimization_pass",
"//xls/passes:pass_base",
"//xls/tools:scheduling_options_flags",
"//xls/tools:scheduling_options_flags_cc_proto",
"@com_google_absl//absl/flags:commandlineflag",
"@com_google_absl//absl/flags:reflection",
"@googletest//:gtest",
],
)
Expand Down Expand Up @@ -317,6 +320,7 @@ cc_test(
":scheduling_options",
"//xls/common:xls_gunit_main",
"//xls/common/status:matchers",
"//xls/common/status:status_macros",
"//xls/estimators/delay_model:delay_estimator",
"//xls/estimators/delay_model:delay_estimators",
"//xls/fdo:delay_manager",
Expand Down
286 changes: 286 additions & 0 deletions xls/scheduling/pipeline_schedule_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@

#include <cstdint>
#include <limits>
#include <memory>
#include <optional>
#include <string>
#include <string_view>
#include <utility>
#include <vector>
Expand All @@ -32,6 +34,7 @@
#include "absl/strings/str_format.h"
#include "absl/types/span.h"
#include "xls/common/status/matchers.h"
#include "xls/common/status/status_macros.h"
#include "xls/estimators/delay_model/delay_estimator.h"
#include "xls/estimators/delay_model/delay_estimators.h"
#include "xls/fdo/delay_manager.h"
Expand Down Expand Up @@ -117,6 +120,38 @@ using ::testing::UnorderedPointwise;

class PipelineScheduleTest : public IrTestBase {};

struct LabeledFeedbackArcProc {
std::unique_ptr<Package> package;
Proc* proc;
BValue read;
BValue next;
};

absl::StatusOr<LabeledFeedbackArcProc> BuildLabeledFeedbackArcProc(
std::string write_label, std::string read_label) {
auto package = std::make_unique<Package>("the_package");
Type* u32 = package->GetBitsType(32);
XLS_ASSIGN_OR_RETURN(auto out_ch, package->CreateStreamingChannel(
"out_ch", ChannelOps::kSendOnly, u32));

ProcBuilder pb("the_proc", package.get());
BValue tkn = pb.Literal(Value::Token());
XLS_ASSIGN_OR_RETURN(auto se,
pb.UnreadStateElement("state", Value(UBits(0, 32))));
BValue read = pb.StateRead(se, /*predicate=*/std::nullopt, read_label);
BValue add_val = pb.Add(read, pb.Literal(UBits(1, 32)));
pb.Send(out_ch, tkn, add_val);
BValue next = pb.Next(se, add_val, /*pred=*/std::nullopt, write_label);
XLS_ASSIGN_OR_RETURN(auto proc, pb.Build());

return LabeledFeedbackArcProc{
.package = std::move(package),
.proc = proc,
.read = read,
.next = next,
};
}

TEST_F(PipelineScheduleTest, SelectsEntry) {
auto p = CreatePackage();
FunctionBuilder fb(TestName(), p.get());
Expand Down Expand Up @@ -2198,6 +2233,257 @@ TEST_F(PipelineScheduleTest, ProcWithZeroReadsErrors) {
absl_testing::StatusIs(absl::StatusCode::kInvalidArgument,
testing::HasSubstr("has no reads")));
}
TEST_F(PipelineScheduleTest, ProcFeedbackArcDefaultThroughput) {
Package p(TestName());
Type* u32 = p.GetBitsType(32);
XLS_ASSERT_OK_AND_ASSIGN(
Channel * out_ch,
p.CreateStreamingChannel("out_ch", ChannelOps::kSendOnly, u32));

ProcBuilder pb("the_proc", &p);
BValue tkn = pb.Literal(Value::Token());
XLS_ASSERT_OK_AND_ASSIGN(
StateElement * se1, pb.UnreadStateElement("state1", Value(UBits(0, 32))));
XLS_ASSERT_OK_AND_ASSIGN(
StateElement * se2, pb.UnreadStateElement("state2", Value(UBits(0, 32))));

BValue r1 = pb.StateRead(se1, /*predicate=*/std::nullopt, "R1");
BValue r2 = pb.StateRead(se2, /*predicate=*/std::nullopt, "R2");

BValue add_val = pb.Add(r1, r2);
pb.Send(out_ch, tkn, add_val);

BValue w1 =
pb.Next(se1, r1, /*pred=*/std::nullopt, "W1"); // loop 1: W1 -> R1
BValue w2 =
pb.Next(se2, r2, /*pred=*/std::nullopt, "W2"); // loop 2: W2 -> R2

XLS_ASSERT_OK_AND_ASSIGN(Proc * proc, pb.Build());

// Test case 1: arc throughput = 3, default = 4, global worst_case = 2.
// Global worst-case acts as a strict upper bound, so both loops clamp to 2.
// So both loops must be scheduled such that Next - StateRead <= 2 - 1 = 1.
{
SchedulingOptions options;
options.clock_period_ps(2);
options.worst_case_throughput(2);
options.default_arc_worst_case_throughput(4);
options.arc_worst_case_throughput({{{"W1", "R1"}, 3}});

XLS_ASSERT_OK_AND_ASSIGN(
PipelineSchedule schedule,
RunPipelineSchedule(proc, TestDelayEstimator(), options));

EXPECT_LE(schedule.cycle(w1.node()) - schedule.cycle(r1.node()), 1);
EXPECT_LE(schedule.cycle(w2.node()) - schedule.cycle(r2.node()), 1);
}

// Test case 2: arc throughput = 4, default = 2, global worst_case = 0.
// arc throughput (4) wins for loop 1, default (2) wins for loop 2.
// Clamping is skipped because global worst-case is not enforced.
// So loop 2 must satisfy Next2 - Read2 <= 2 - 1 = 1.
{
SchedulingOptions options;
options.clock_period_ps(2);
options.worst_case_throughput(0);
options.default_arc_worst_case_throughput(2);
options.arc_worst_case_throughput({{{"W1", "R1"}, 4}});

XLS_ASSERT_OK_AND_ASSIGN(
PipelineSchedule schedule,
RunPipelineSchedule(proc, TestDelayEstimator(), options));

// Loop 2 must satisfy default arc throughput limit of 2 (Next2 - Read2 <=
// 1)
EXPECT_LE(schedule.cycle(w2.node()) - schedule.cycle(r2.node()), 1);
}
}

TEST_F(PipelineScheduleTest, ProcFeedbackArcThroughputUnlabeled) {
Package p(TestName());
Type* u32 = p.GetBitsType(32);
XLS_ASSERT_OK_AND_ASSIGN(
Channel * out_ch,
p.CreateStreamingChannel("out_ch", ChannelOps::kSendOnly, u32));

ProcBuilder pb("the_proc", &p);
BValue tkn = pb.Literal(Value::Token());
XLS_ASSERT_OK_AND_ASSIGN(
StateElement * se1, pb.UnreadStateElement("state1", Value(UBits(0, 32))));
XLS_ASSERT_OK_AND_ASSIGN(
StateElement * se2, pb.UnreadStateElement("state2", Value(UBits(0, 32))));

BValue r1 = pb.StateRead(se1, /*predicate=*/std::nullopt, "R1");
BValue r2 = pb.StateRead(se2, /*predicate=*/std::nullopt); // Unlabeled

BValue add_val = pb.Add(r1, r2);
pb.Send(out_ch, tkn, add_val);

BValue w1 = pb.Next(se1, r1, /*pred=*/std::nullopt,
"W1"); // loop 1: W1 -> R1 (labeled)
BValue w2 = pb.Next(se2, r2,
/*pred=*/std::nullopt); // loop 2: unlabeled -> unlabeled

XLS_ASSERT_OK_AND_ASSIGN(Proc * proc, pb.Build());

// Test case: labeled throughput = 4 (for loop 1), unlabeled throughput = 2
// (for loop 2), default = 5, global worst_case = 0. labeled throughput (4)
// wins for loop 1. Unlabeled throughput (2) wins for loop 2 (unlabeled
// pattern matches over wildcard fallback). Clamping is skipped because global
// worst-case is not enforced. So loop 2 must satisfy Next2 - Read2 <= 2 - 1 =
// 1, and loop 1 must satisfy Next1 - Read1 <= 4 - 1 = 3.
{
SchedulingOptions options;
options.clock_period_ps(2);
options.worst_case_throughput(0);
options.arc_worst_case_throughput(
{{{"W1", "R1"}, 4}, {{"_", "_"}, 2}, {{"*", "*"}, 5}});

XLS_ASSERT_OK_AND_ASSIGN(
PipelineSchedule schedule,
RunPipelineSchedule(proc, TestDelayEstimator(), options));

// Loop 1 must satisfy specific throughput limit of 4 (Next1 - Read1 <= 3)
EXPECT_LE(schedule.cycle(w1.node()) - schedule.cycle(r1.node()), 3);

// Loop 2 must satisfy unlabeled throughput limit of 2 (Next2 - Read2 <= 1)
EXPECT_LE(schedule.cycle(w2.node()) - schedule.cycle(r2.node()), 1);
}
}

TEST_F(PipelineScheduleTest, ProcFeedbackArcThroughputConstraints) {
XLS_ASSERT_OK_AND_ASSIGN(LabeledFeedbackArcProc setup,
BuildLabeledFeedbackArcProc("my_write", "my_read"));

// Test case 1: arc throughput = 2, global worst_case = 4.
// Specific should win, clamping is min(2, 4) = 2.
// Next - StateRead <= 2 - 1 = 1.
{
SchedulingOptions options;
options.clock_period_ps(2);
options.worst_case_throughput(4);
options.arc_worst_case_throughput({{{"my_write", "my_read"}, 2}});

XLS_ASSERT_OK_AND_ASSIGN(
PipelineSchedule schedule,
RunPipelineSchedule(setup.proc, TestDelayEstimator(), options));

EXPECT_LE(
schedule.cycle(setup.next.node()) - schedule.cycle(setup.read.node()),
1);
}

// Test case 2: arc throughput = 3, global worst_case = 2.
// Clamping should restrict specific to global: min(3, 2) = 2.
// So Next - StateRead <= 1.
{
SchedulingOptions options;
options.clock_period_ps(2);
options.worst_case_throughput(2);
options.arc_worst_case_throughput({{{"my_write", "my_read"}, 3}});

XLS_ASSERT_OK_AND_ASSIGN(
PipelineSchedule schedule,
RunPipelineSchedule(setup.proc, TestDelayEstimator(), options));

EXPECT_LE(
schedule.cycle(setup.next.node()) - schedule.cycle(setup.read.node()),
1);
}
}

TEST_F(PipelineScheduleTest, ProcFeedbackArcThroughputUnusedPattern) {
XLS_ASSERT_OK_AND_ASSIGN(LabeledFeedbackArcProc setup,
BuildLabeledFeedbackArcProc("my_write", "my_read"));

SchedulingOptions options;
options.clock_period_ps(2);
options.arc_worst_case_throughput({{{"typo_write", "my_read"}, 2}});

EXPECT_THAT(RunPipelineSchedule(setup.proc, TestDelayEstimator(), options),
absl_testing::StatusIs(
absl::StatusCode::kInvalidArgument,
testing::HasSubstr("did not match any feedback arc")));
}

TEST_F(PipelineScheduleTest, ProcFeedbackArcThroughputSpecificity) {
XLS_ASSERT_OK_AND_ASSIGN(LabeledFeedbackArcProc setup,
BuildLabeledFeedbackArcProc("L_W", "L_R"));

// Exact match (L_W, L_R = 2, score 4) should win over wildcard (L_W, * = 4,
// score 2). Limit should be 2 (Next - Read <= 1).
{
SchedulingOptions options;
options.clock_period_ps(2);
options.arc_worst_case_throughput(
{{{"L_W", "*"}, 4}, {{"L_W", "L_R"}, 2}, {{"*", "*"}, 5}});

XLS_ASSERT_OK_AND_ASSIGN(
PipelineSchedule schedule,
RunPipelineSchedule(setup.proc, TestDelayEstimator(), options));

EXPECT_LE(
schedule.cycle(setup.next.node()) - schedule.cycle(setup.read.node()),
1);
}
}

TEST_F(PipelineScheduleTest, ProcFeedbackArcThroughputAmbiguousMatch) {
XLS_ASSERT_OK_AND_ASSIGN(LabeledFeedbackArcProc setup,
BuildLabeledFeedbackArcProc("L_W", "L_R"));

// Tie between (L_W, * = 4) and (*, L_R = 2). Both have score 2 but different
// values. Should fail with ambiguous match error.
SchedulingOptions options;
options.clock_period_ps(2);
options.arc_worst_case_throughput({{{"L_W", "*"}, 4}, {{"*", "L_R"}, 2}});

EXPECT_THAT(RunPipelineSchedule(setup.proc, TestDelayEstimator(), options),
absl_testing::StatusIs(
absl::StatusCode::kInvalidArgument,
testing::HasSubstr("Ambiguous throughput configuration")));
}

TEST_F(PipelineScheduleTest, ProcFeedbackArcThroughputNonPositiveClamping) {
XLS_ASSERT_OK_AND_ASSIGN(LabeledFeedbackArcProc setup,
BuildLabeledFeedbackArcProc("L_W", "L_R"));

// Case 1: Global worst_case = 0 (not enforced), specific = 2 (enforced).
// Effective limit should be 2 (Next - Read <= 1).
{
SchedulingOptions options;
options.clock_period_ps(2);
options.worst_case_throughput(0); // Not enforced
options.arc_worst_case_throughput({{{"L_W", "L_R"}, 2}});

XLS_ASSERT_OK_AND_ASSIGN(
PipelineSchedule schedule,
RunPipelineSchedule(setup.proc, TestDelayEstimator(), options));

EXPECT_LE(
schedule.cycle(setup.next.node()) - schedule.cycle(setup.read.node()),
1);
}

// Case 2: Global worst_case = 2 (enforced), specific = 0 (not enforced).
// Effective limit should fallback to global 2 (Next - Read <= 1).
{
SchedulingOptions options;
options.clock_period_ps(2);
options.worst_case_throughput(2);
options.arc_worst_case_throughput({
{{"L_W", "L_R"}, 0} // Not enforced individually
});

XLS_ASSERT_OK_AND_ASSIGN(
PipelineSchedule schedule,
RunPipelineSchedule(setup.proc, TestDelayEstimator(), options));

EXPECT_LE(
schedule.cycle(setup.next.node()) - schedule.cycle(setup.read.node()),
1);
}
}

} // namespace
} // namespace xls
Loading
Loading