forked from OpenDCAI/DataFlex
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpyproject.toml
More file actions
115 lines (103 loc) · 3.07 KB
/
pyproject.toml
File metadata and controls
115 lines (103 loc) · 3.07 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
[build-system]
requires = ["setuptools>=61.0", "wheel"]
build-backend = "setuptools.build_meta"
[project]
name = "dataflex"
authors = [
{name = "Hao Liang", email = "hao.liang@stu.pku.edu.cn"},
{name = "Mingrui Chen", email = "charmier2003@gmail.com"},
]
description = "A data-centric training system for Large Language Models"
readme = {file = "README.md", content-type = "text/markdown"}
requires-python = ">=3.10"
license = {text = "Apache-2.0"}
classifiers = [
"Development Status :: 4 - Beta",
"Intended Audience :: Developers",
"Intended Audience :: Education",
"Intended Audience :: Science/Research",
"License :: OSI Approved :: Apache Software License",
"Operating System :: OS Independent",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3 :: Only",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
]
keywords = ["AI", "data-centric", "LLM", "training", "data selection", "Data Mixture"]
dynamic = ["version", "dependencies"]
[project.urls]
Github = "https://github.com/OpenDCAI/DataFlex"
Documentation = "https://opendcai.github.io/DataFlow-Doc"
"Bug Reports" = "https://github.com/OpenDCAI/DataFlex/issues"
[project.scripts]
dataflex-cli = "dataflex.cli:main"
[project.optional-dependencies]
torch = ["torch>=2.4.0,<=2.10.0", "torchvision>=0.19.0,<=0.21.0", "torchaudio>=2.4.0,<=2.6.0"]
torch-npu = ["torch==2.4.0", "torch-npu==2.4.0.post2", "decorator"]
metrics = ["nltk", "jieba", "rouge-chinese"]
flash-attn = ["flash-attn>=2.5.0"]
liger-kernel = ["liger-kernel>=0.5.5"]
bitsandbytes = ["bitsandbytes>=0.39.0"]
hqq = ["hqq"]
eetq = ["eetq"]
gptq = ["optimum>=1.24.0", "gptqmodel>=2.0.0"]
aqlm = ["aqlm[gpu]>=1.1.0"]
vllm = ["vllm>=0.4.3,<=0.9.1"]
galore = ["galore-torch"]
apollo = ["apollo-torch"]
badam = ["badam>=1.2.1"]
adam-mini = ["adam-mini"]
modelscope = ["modelscope"]
openmind = ["openmind"]
swanlab = ["swanlab"]
dev = ["pre-commit", "ruff", "pytest", "build"]
[tool.setuptools]
include-package-data = true
[tool.setuptools.packages.find]
where = ["src"]
[tool.setuptools.dynamic]
version = {attr = "dataflex.version.__version__"}
dependencies = {file = "requirements.txt"}
[tool.ruff]
target-version = "py310"
line-length = 119
indent-width = 4
[tool.ruff.lint]
ignore = ["C408", "C901", "E501", "E731", "E741", "W605"]
select = ["C", "E", "F", "I", "W"]
[tool.ruff.lint.isort]
lines-after-imports = 2
known-first-party = ["dataflex"]
known-third-party = [
"accelerate",
"datasets",
"gradio",
"numpy",
"peft",
"torch",
"transformers",
"trl"
]
[tool.ruff.format]
quote-style = "double"
indent-style = "space"
docstring-code-format = true
skip-magic-trailing-comma = false
line-ending = "auto"
[tool.uv]
conflicts = [
[
{ extra = "torch-npu" },
{ extra = "aqlm" },
],
[
{ extra = "torch-npu" },
{ extra = "liger-kernel" },
],
[
{ extra = "torch-npu" },
{ extra = "vllm" },
]
]