Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -204,3 +204,5 @@ link_infini_train_exe(test_precision_check)
add_executable(test_lora test/lora/test_lora.cc)
link_infini_train_exe(test_lora)

add_executable(test_transformer_spec test/transformer_spec/test_transformer_spec.cc)
link_infini_train_exe(test_transformer_spec)
11 changes: 7 additions & 4 deletions example/gpt2/main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@
#include "glog/logging.h"

#include "infini_train/include/autocast.h"
#include "infini_train/include/core/models/decode_only_transformer/model.h"
#include "infini_train/include/core/runtime/device_guard.h"
#include "infini_train/include/core/transformer/transformer_config.h"
#include "infini_train/include/dataloader.h"
#include "infini_train/include/device.h"
#include "infini_train/include/nn/lora/lora_utils.h"
Expand All @@ -35,7 +37,6 @@

#include "example/common/tiny_shakespeare_dataset.h"
#include "example/common/tokenizer.h"
#include "example/gpt2/net.h"

// I/O
DEFINE_string(input_bin, "", "input .bin to train on");
Expand Down Expand Up @@ -100,7 +101,7 @@ constexpr char kDtypeFP32[] = "float32";
constexpr char kDtypeBF16[] = "bfloat16";

//
const std::unordered_map<std::string, GPT2Config> kModelToConfigs = {
const std::unordered_map<std::string, nn::TransformerConfig> kModelToConfigs = {
{"d12", {.block_size = 1024, .vocab_size = 50257, .n_layer = 12, .n_head = 12, .n_embd = 768}},
{"d24", {.block_size = 1024, .vocab_size = 50257, .n_layer = 24, .n_head = 16, .n_embd = 1024}},
{"d36", {.block_size = 1024, .vocab_size = 50257, .n_layer = 36, .n_head = 20, .n_embd = 1280}},
Expand Down Expand Up @@ -187,11 +188,13 @@ void Train(const nn::parallel::Rank &rank) {
// ManualSeed(42);

// init the model, either from scratch or from OpenAI pretrained checkpoint
GPT2Config model_config;
nn::TransformerConfig model_config;
std::shared_ptr<nn::Module> model = nullptr;

if (!FLAGS_llmc_filepath.empty()) {
model = GPT2::FromLLMC(FLAGS_llmc_filepath);
auto gpt2_model = GPT2::FromLLMC(FLAGS_llmc_filepath);
model_config = gpt2_model->GetConfig();
model = gpt2_model;
} else if (kModelToConfigs.count(FLAGS_model)) {
model_config = kModelToConfigs.at(FLAGS_model);
model = std::make_shared<GPT2>(model_config);
Expand Down
435 changes: 74 additions & 361 deletions example/gpt2/net.cc

Large diffs are not rendered by default.

150 changes: 0 additions & 150 deletions example/gpt2/net.h

This file was deleted.

9 changes: 6 additions & 3 deletions example/llama3/main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@
#include "glog/logging.h"

#include "infini_train/include/autocast.h"
#include "infini_train/include/core/models/decode_only_transformer/model.h"
#include "infini_train/include/core/runtime/device_guard.h"
#include "infini_train/include/core/transformer/transformer_config.h"
#include "infini_train/include/dataloader.h"
#include "infini_train/include/device.h"
#include "infini_train/include/nn/lora/lora_utils.h"
Expand All @@ -34,7 +36,6 @@

#include "example/common/tiny_shakespeare_dataset.h"
#include "example/common/tokenizer.h"
#include "example/llama3/net.h"

// I/O
DEFINE_string(input_bin, "", "input .bin to train on");
Expand Down Expand Up @@ -167,10 +168,12 @@ void Train(const nn::parallel::Rank &rank) {
// rng / reproducibility
// ManualSeed(42);

LLaMA3Config model_config = LLaMA3Config();
nn::TransformerConfig model_config;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nn::TransformerConfig model_config; 声明的默认值都是沿用 gpt2 的架构的,use_bias/use_rope 啥的都是按照 gpt2 来的,导致下面 else 分支实际上构造的是个 gpt2 model。

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

新增静态初始化方法,在各自main.cc调用对应的初始化方法

std::shared_ptr<nn::Module> model = nullptr;
if (!FLAGS_llmc_filepath.empty()) {
model = LLaMA3::FromLLMC(FLAGS_llmc_filepath);
auto llama3_model = LLaMA3::FromLLMC(FLAGS_llmc_filepath);
model_config = llama3_model->GetConfig();
model = llama3_model;
} else {
model = std::make_shared<LLaMA3>(model_config);
}
Expand Down
Loading
Loading