forked from data-prep-kit/data-prep-kit
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathMakefile
More file actions
90 lines (76 loc) · 3.28 KB
/
Makefile
File metadata and controls
90 lines (76 loc) · 3.28 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
REPOROOT=../../..
# Use make help, to see the available rules
include $(REPOROOT)/transforms/.make.cicd.targets
#
# This is intended to be included across the Makefiles provided within
# a given transform's directory tree, so must use compatible syntax.
#
################################################################################
# This defines the name of the transform and is used to match against
# expected files and is used to define the transform's image name.
TRANSFORM_NAME=$(shell basename `pwd`)
################################################################################
TRANSFORM_PYTHON_SRC="-m dpk_$(TRANSFORM_NAME).runtime"
TRANSFORM_RAY_SRC="-m dpk_$(TRANSFORM_NAME).ray.runtime"
LINUX_WITH_CPU_TORCH?=true
OS := $(shell uname -s)
ifeq ($(OS),Linux)
ifeq ($(LINUX_WITH_CPU_TORCH),true)
PIP_INSTALL_EXTRA_ARGS=--extra-index-url=https://download.pytorch.org/whl/cpu
DOCKER_BUILD_EXTRA_ARGS=--build-arg PIP_INSTALL_EXTRA_ARGS=${PIP_INSTALL_EXTRA_ARGS}
endif
endif
run-python-cli-sample-parquet:
make venv
source venv/bin/activate && \
rm -fr output && \
$(PYTHON) -m dpk_$(TRANSFORM_NAME).runtime \
--data_local_config "{ 'input_folder' : 'test-data/input/', 'output_folder' : 'output/'}" \
--text_encoder_embedding_batch_size 5 \
run-python-cli-sample-lancedb:
make venv
source venv/bin/activate && \
rm -rf output && \
$(PYTHON) -m dpk_$(TRANSFORM_NAME).runtime \
--data_local_config "{ 'input_folder' : 'test-data/input/', 'output_folder' : 'output/'}" \
--text_encoder_lanceDB_data_uri "output/test.db/test.lance/" \
--text_encoder_embeddings_in_lanceDB True \
--text_encoder_lanceDB_batch_size 10 \
--text_encoder_embedding_batch_size 5 \
--text_encoder_lanceDB_fragments_json_folder "output/fragments_json/" \
--text_encoder_lanceDB_table_name "test" && \
$(PYTHON) -m dpk_$(TRANSFORM_NAME).lance_commit \
--lanceDB_storage_type "local" \
--lanceDB_uri "output/test.db/" \
--lanceDB_data_uri "output/test.db/test.lance/" \
--lanceDB_table_name "test" \
--lanceDB_fragments_json_folder "output/fragments_json/" \
--lanceDB_table_schema_folder "output/"
run-ray-cli-sample-parquet:
make venv
source venv/bin/activate && \
rm -fr output && \
$(PYTHON) -m dpk_$(TRANSFORM_NAME).ray.runtime \
--data_local_config "{ 'input_folder' : 'test-data/input/', 'output_folder' : 'output/'}" \
--text_encoder_embedding_batch_size 5 \
--run_locally True
run-ray-cli-sample-lancedb:
make venv
source venv/bin/activate && \
rm -rf output && \
$(PYTHON) -m dpk_$(TRANSFORM_NAME).ray.runtime \
--data_local_config "{ 'input_folder' : 'test-data/input/', 'output_folder' : 'output/'}" \
--text_encoder_lanceDB_data_uri "output/test.db/test.lance/" \
--text_encoder_lanceDB_batch_size 10 \
--text_encoder_embeddings_in_lanceDB True \
--text_encoder_lanceDB_fragments_json_folder "output/fragments_json/" \
--text_encoder_embedding_batch_size 5 \
--text_encoder_lanceDB_table_name "test" \
--run_locally True && \
$(PYTHON) -m dpk_$(TRANSFORM_NAME).lance_commit \
--lanceDB_storage_type "local" \
--lanceDB_uri "output/test.db/" \
--lanceDB_data_uri "output/test.db/test.lance/" \
--lanceDB_table_name "test" \
--lanceDB_fragments_json_folder "output/fragments_json/" \
--lanceDB_table_schema_folder "output/"