You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
The extensive std::cout debug output throughout the constructor and getMaybeHeuristicsFor method will severely impact performance. This debug code should be removed or made conditional with a debug flag before merging.
LinearOp::evaluate contains extremely verbose debug output (100+ lines) that will significantly impact performance. This debug code should be removed or made conditional before merging to main.
std::cout << "[DEBUG] LinearOp::evaluate - ENTRY" << std::endl;
std::cout.flush();
std::cout << "[DEBUG] LinearOp::evaluate - STEP 1: Getting input tensor (inputs.size=" << inputs.size() << ")" << std::endl;
std::cout.flush();
constauto in = inputs.at(0).as<at::Tensor>();
std::cout << "[DEBUG] LinearOp::evaluate - STEP 1a: Input tensor obtained, shape=[";
for (int64_t i = 0; i < in.dim(); i++) {
if (i > 0) std::cout << ", ";
std::cout << in.size(i);
}
std::cout << "], device=" << in.device() << std::endl;
std::cout.flush();
std::cout << "[DEBUG] LinearOp::evaluate - STEP 2: Getting weight tensor" << std::endl;
std::cout.flush();
auto weight = inputs.at(1).as<at::Tensor>();
std::cout << "[DEBUG] LinearOp::evaluate - STEP 2a: Weight tensor obtained, shape=[";
for (int64_t i = 0; i < weight.dim(); i++) {
if (i > 0) std::cout << ", ";
std::cout << weight.size(i);
}
std::cout << "], device=" << weight.device() << std::endl;
std::cout.flush();
auto squeeze_device_dims = [](at::Tensor& t,
int64_t num_device_dims) -> void {
// Record the initial shape for the error message.
std::vector<int64_t> shape = t.sizes().vec();
for ([[maybe_unused]] auto _ : arange(num_device_dims)) {
NVF_CHECK(
t.size(0) == 1,
"When the weight is >2D, expect its preceding dimensions and ""the bias's preceding dimensions to ""be DID-parallel and therefore size-1: ",
shape);
t = t.squeeze(0);
}
};
// The squeezes and unsqueezes are currently required to support a sharded// linear layer. Remove them after #2563.
std::cout << "[DEBUG] LinearOp::evaluate - STEP 3: Calculating num_device_dims (weight.dim=" << weight.dim() << ")" << std::endl;
std::cout.flush();
auto num_device_dims = weight.dim() - 2;
std::cout << "[DEBUG] LinearOp::evaluate - STEP 3a: num_device_dims=" << num_device_dims << std::endl;
std::cout.flush();
std::cout << "[DEBUG] LinearOp::evaluate - STEP 4: Squeezing device dims from weight" << std::endl;
std::cout.flush();
squeeze_device_dims(weight, num_device_dims);
std::cout << "[DEBUG] LinearOp::evaluate - STEP 4a: Weight squeezed, new shape=[";
for (int64_t i = 0; i < weight.dim(); i++) {
if (i > 0) std::cout << ", ";
std::cout << weight.size(i);
}
std::cout << "]" << std::endl;
std::cout.flush();
std::cout << "[DEBUG] LinearOp::evaluate - STEP 5: Checking hasBias (hasBias=" << hasBias() << ")" << std::endl;
std::cout.flush();
at::Tensor out_tensor;
if (hasBias()) {
std::cout << "[DEBUG] LinearOp::evaluate - STEP 5a: Getting bias tensor" << std::endl;
std::cout.flush();
auto bias = inputs.at(2).as<at::Tensor>();
std::cout << "[DEBUG] LinearOp::evaluate - STEP 5b: Bias tensor obtained, shape=[";
for (int64_t i = 0; i < bias.dim(); i++) {
if (i > 0) std::cout << ", ";
std::cout << bias.size(i);
}
std::cout << "], device=" << bias.device() << std::endl;
std::cout.flush();
std::cout << "[DEBUG] LinearOp::evaluate - STEP 5c: Squeezing device dims from bias" << std::endl;
std::cout.flush();
squeeze_device_dims(bias, num_device_dims);
std::cout << "[DEBUG] LinearOp::evaluate - STEP 5d: Bias squeezed, new shape=[";
for (int64_t i = 0; i < bias.dim(); i++) {
if (i > 0) std::cout << ", ";
std::cout << bias.size(i);
}
std::cout << "]" << std::endl;
std::cout.flush();
std::cout << "[DEBUG] LinearOp::evaluate - STEP 5e: Calling at::linear with bias" << std::endl;
std::cout << "[DEBUG] LinearOp::evaluate - INPUT METADATA:" << std::endl;
std::cout << " in.sizes: [";
for (int64_t i = 0; i < in.dim(); i++) {
if (i > 0) std::cout << ", ";
std::cout << in.size(i);
}
std::cout << "]" << std::endl;
std::cout << " in.strides: [";
for (int64_t i = 0; i < in.dim(); i++) {
if (i > 0) std::cout << ", ";
std::cout << in.stride(i);
}
std::cout << "]" << std::endl;
std::cout << " in.dtype: " << in.dtype() << std::endl;
std::cout << " in.device: " << in.device() << std::endl;
std::cout << " in.is_contiguous: " << in.is_contiguous() << std::endl;
std::cout << " in.numel: " << in.numel() << std::endl;
std::cout << "[DEBUG] LinearOp::evaluate - WEIGHT METADATA:" << std::endl;
std::cout << " weight.sizes: [";
for (int64_t i = 0; i < weight.dim(); i++) {
if (i > 0) std::cout << ", ";
std::cout << weight.size(i);
}
std::cout << "]" << std::endl;
std::cout << " weight.strides: [";
for (int64_t i = 0; i < weight.dim(); i++) {
if (i > 0) std::cout << ", ";
std::cout << weight.stride(i);
}
std::cout << "]" << std::endl;
std::cout << " weight.dtype: " << weight.dtype() << std::endl;
std::cout << " weight.device: " << weight.device() << std::endl;
std::cout << " weight.is_contiguous: " << weight.is_contiguous() << std::endl;
std::cout << " weight.numel: " << weight.numel() << std::endl;
std::cout << "[DEBUG] LinearOp::evaluate - BIAS METADATA:" << std::endl;
std::cout << " bias.sizes: [";
for (int64_t i = 0; i < bias.dim(); i++) {
if (i > 0) std::cout << ", ";
std::cout << bias.size(i);
}
std::cout << "]" << std::endl;
std::cout << " bias.strides: [";
for (int64_t i = 0; i < bias.dim(); i++) {
if (i > 0) std::cout << ", ";
std::cout << bias.stride(i);
}
std::cout << "]" << std::endl;
std::cout << " bias.dtype: " << bias.dtype() << std::endl;
std::cout << " bias.device: " << bias.device() << std::endl;
std::cout << " bias.is_contiguous: " << bias.is_contiguous() << std::endl;
std::cout << " bias.numel: " << bias.numel() << std::endl;
std::cout.flush();
out_tensor = at::linear(in, weight, bias);
std::cout << "[DEBUG] LinearOp::evaluate - STEP 5f: at::linear completed" << std::endl;
std::cout.flush();
} else {
std::cout << "[DEBUG] LinearOp::evaluate - STEP 5g: Calling at::linear without bias" << std::endl;
std::cout << "[DEBUG] LinearOp::evaluate - INPUT METADATA:" << std::endl;
std::cout << " in.sizes: [";
for (int64_t i = 0; i < in.dim(); i++) {
if (i > 0) std::cout << ", ";
std::cout << in.size(i);
}
std::cout << "]" << std::endl;
std::cout << " in.strides: [";
for (int64_t i = 0; i < in.dim(); i++) {
if (i > 0) std::cout << ", ";
std::cout << in.stride(i);
}
std::cout << "]" << std::endl;
std::cout << " in.dtype: " << in.dtype() << std::endl;
std::cout << " in.device: " << in.device() << std::endl;
std::cout << " in.is_contiguous: " << in.is_contiguous() << std::endl;
std::cout << " in.numel: " << in.numel() << std::endl;
std::cout << "[DEBUG] LinearOp::evaluate - WEIGHT METADATA:" << std::endl;
std::cout << " weight.sizes: [";
for (int64_t i = 0; i < weight.dim(); i++) {
if (i > 0) std::cout << ", ";
std::cout << weight.size(i);
}
std::cout << "]" << std::endl;
std::cout << " weight.strides: [";
for (int64_t i = 0; i < weight.dim(); i++) {
if (i > 0) std::cout << ", ";
std::cout << weight.stride(i);
}
std::cout << "]" << std::endl;
std::cout << " weight.dtype: " << weight.dtype() << std::endl;
std::cout << " weight.device: " << weight.device() << std::endl;
std::cout << " weight.is_contiguous: " << weight.is_contiguous() << std::endl;
std::cout << " weight.numel: " << weight.numel() << std::endl;
std::cout.flush();
out_tensor = at::linear(in, weight);
std::cout << "[DEBUG] LinearOp::evaluate - STEP 5h: at::linear completed" << std::endl;
std::cout.flush();
}
std::cout << "[DEBUG] LinearOp::evaluate - STEP 6: at::linear result shape=[";
for (int64_t i = 0; i < out_tensor.dim(); i++) {
if (i > 0) std::cout << ", ";
std::cout << out_tensor.size(i);
}
std::cout << "], device=" << out_tensor.device() << std::endl;
std::cout.flush();
std::cout << "[DEBUG] LinearOp::evaluate - STEP 7: Unsqueezing output (num_device_dims=" << num_device_dims << ")" << std::endl;
std::cout.flush();
for ([[maybe_unused]] auto _ : arange(num_device_dims)) {
out_tensor = out_tensor.unsqueeze(0);
}
std::cout << "[DEBUG] LinearOp::evaluate - STEP 7a: Unsqueezed output shape=[";
for (int64_t i = 0; i < out_tensor.dim(); i++) {
if (i > 0) std::cout << ", ";
std::cout << out_tensor.size(i);
}
std::cout << "]" << std::endl;
std::cout.flush();
// Handle rFactor DIDs similar to MatmulOp::evaluate.
std::cout << "[DEBUG] LinearOp::evaluate - STEP 8: Checking rFactor device dimension index" << std::endl;
std::cout.flush();
if (constauto rfactor_did_idx = getRFactorDeviceDimensionIndex(out());
rfactor_did_idx != -1) {
std::cout << "[DEBUG] LinearOp::evaluate - STEP 8a: rFactor DID index=" << rfactor_did_idx << ", unsqueezing" << std::endl;
std::cout.flush();
out_tensor = out_tensor.unsqueeze(rfactor_did_idx);
std::cout << "[DEBUG] LinearOp::evaluate - STEP 8b: Final output shape=[";
for (int64_t i = 0; i < out_tensor.dim(); i++) {
if (i > 0) std::cout << ", ";
std::cout << out_tensor.size(i);
}
std::cout << "]" << std::endl;
std::cout.flush();
} else {
std::cout << "[DEBUG] LinearOp::evaluate - STEP 8c: No rFactor DID, skipping unsqueeze" << std::endl;
std::cout.flush();
}
std::cout << "[DEBUG] LinearOp::evaluate - STEP 9: Returning result" << std::endl;
std::cout.flush();
return {out_tensor};
The ReproLinearAddFusion test has commented out validation assertions (lines 1611-1612, 1650-1657). These should be uncommented or the test should be marked as expected to fail if the issue being reproduced is not yet resolved.
// at::Tensor ref = at::linear(t0, t2) + t1;// testValidate(// executor_cache.fusion(), outputs, {t0, t1, t2}, {ref}, __LINE__, __FILE__);// Serialize the FusionExecutorCache to test serde path// This reproduces the serialization behavior when enable_automatic_serialization() is used
flatbuffers::FlatBufferBuilder builder(1024);
auto serialized = executor_cache.serialize(builder);
builder.Finish(serialized);
// Get the serialized bufferuint8_t* buf = builder.GetBufferPointer();
// Create a new fusion and executor cache for deserializationauto fusion2 = std::make_unique<Fusion>();
FusionGuard fg2(fusion2.get());
auto tv0_2 = makeSymbolicTensor(3, DataType::BFloat16);
auto tv1_2 = makeSymbolicTensor(3, DataType::BFloat16);
auto tv2_2 = makeSymbolicTensor(2, DataType::BFloat16);
fusion2->addInput(tv0_2);
fusion2->addInput(tv1_2);
fusion2->addInput(tv2_2);
auto tv3_2 = linear(tv0_2, tv2_2);
auto tv4_2 = add(tv3_2, tv1_2);
fusion2->addOutput(tv4_2);
FusionExecutorCache executor_cache2(std::move(fusion2), /*fusion_id=*/1);
// Deserialize into the new executor cache// Cast the buffer to the FusionExecutorCache flatbuffer typeauto buffer = flatbuffers::GetRoot<serde::FusionExecutorCache>(buf);
executor_cache2.deserialize(buffer, /*fusion_id=*/1);
// Run with the deserialized cacheauto outputs2 = executor_cache2.runFusionWithInputs({t0, t1, t2});
(void)outputs2;
// // Validate deserialized run// testValidate(// executor_cache2.fusion(),// outputs2,// {t0, t1, t2},// {ref},// __LINE__,// __FILE__);
}
Add this suggestion to a batch that can be applied as a single commit.This suggestion is invalid because no changes were made to the code.Suggestions cannot be applied while the pull request is closed.Suggestions cannot be applied while viewing a subset of changes.Only one suggestion per line can be applied in a batch.Add this suggestion to a batch that can be applied as a single commit.Applying suggestions on deleted lines is not supported.You must change the existing code in this line in order to create a valid suggestion.Outdated suggestions cannot be applied.This suggestion has been applied or marked resolved.Suggestions cannot be applied from pending reviews.Suggestions cannot be applied on multi-line comments.Suggestions cannot be applied while the pull request is queued to merge.Suggestion cannot be applied right now. Please check back later.
No description provided.