forked from RiccardoBravin/tiny-LLM
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmodel_sizer.py
More file actions
65 lines (45 loc) · 1.6 KB
/
model_sizer.py
File metadata and controls
65 lines (45 loc) · 1.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
from lib.Models.BERT import BERT_Config
from lib.Models.mamba import MAMBA_Config
from lib.Models.NanoEmbedder import NanoEmbedder_Config
from lib.Models.NanoEmbedderConv import NanoEmbedderConv_Config
from lib.Models.NanoBERT import NanoBERT_Config
from lib.Models.BERTEfficient import BERTEfficient_Config
from lib.Models.NanoBERTEfficient import NanoBERTEfficient_Config
from lib.Models.EmbBERT import EmbBERT_Config
from lib.Models.classifiers import SequenceClassifier
from transformers import Trainer, TrainingArguments, BitsAndBytesConfig
from models_config import *
from lib.utils import print_model_params
model_config = EmbBERT_Config(
vocab_size=pow(2,13),
max_length=512,
hidden_size=256,
reduced_embedding=16,
forward_expansion=2,
kernel_size=32,
num_attention_heads=1,
num_hidden_layers=6,
num_labels=2
)
model_config = EmbBERT_Med_config
model = SequenceClassifier(model_config)
print_model_params(model.model)
trainer = Trainer(
model=model
)
trainer.save_model(f"./TESTING/")
q_conf = BitsAndBytesConfig(
load_in_8bit=True,
)
classifier = SequenceClassifier.from_pretrained(
f"./TESTING/",
config=model_config,
quantization_config = q_conf,
)
from peft import LoraConfig, get_peft_model
peft_config = LoraConfig(
target_modules="all-linear",
)
classifier = get_peft_model(classifier, peft_config)
print(f"Model size: {classifier.get_memory_footprint()/1000}KB")
classifier.print_trainable_parameters()