forked from data-prep-kit/data-prep-kit
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrun-text-encoder-lance-commit.yaml
More file actions
51 lines (50 loc) · 2.21 KB
/
run-text-encoder-lance-commit.yaml
File metadata and controls
51 lines (50 loc) · 2.21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# Specify user prefix (UP) and run prefix (RP) before applying the pipeline run
# UP=MT RP=`date +"%m%d%H"` envsubst < run-gwclassification.yaml | kubectl apply -f -
apiVersion: tekton.dev/v1
kind: PipelineRun
metadata:
name: text-encoder-lance-commit
spec:
pipelineRef:
name: text-encoder-lance-commit
timeouts:
pipeline: "24h" # Set to 0 to disable timeout
params:
- name: TEXT_ENCODER_LANCEDB_DATA_URI
value: "s3://bucket-wikimedia-1/DPK_tests/text_encoder/tekton_tests/lance/tekton_tests.db/tekton_tests.lance/"
- name: TEXT_ENCODER_LANCEDB_FRAGMENTS_JSON_FOLDER
value: "bucket-wikimedia-1/DPK_tests/text_encoder/tekton_tests/lance/tekton_tests_fragments_json/"
- name: TEXT_ENCODER_LANCEDB_TABLE_NAME
value: "tekton_tests"
- name: TEXT_ENCODER_MODEL_NAME
value: "ibm-granite/granite-embedding-small-english-r2"
- name: TEXT_ENCODER_INPUT_FOLDER
value: "bucket-wikimedia-1/DPK_tests/text_encoder/tekton_tests/input_parquets/"
- name: TEXT_ENCODER_OUTPUT_FOLDER
value: "bucket-wikimedia-1/DPK_tests/text_encoder/tekton_tests/input_parquets_tmp4embeddings/"
- name: TEXT_ENCODER_CONTENTS_NAME
value: "contents"
- name: TEXT_ENCODER_OUTPUT_EMBEDDINGS_COLUMN_NAME
value: "embeddings"
- name: RUNTIME_NUM_WORKERS
value: "2"
- name: LANCEDB_STORAGE_TYPE
value: "s3"
- name: LANCEDB_URI
value: "s3://bucket-wikimedia-1/DPK_tests/text_encoder/tekton_tests/lance/tekton_tests.db/"
- name: LANCEDB_DATA_URI
value: "s3://bucket-wikimedia-1/DPK_tests/text_encoder/tekton_tests/lance/tekton_tests.db/tekton_tests.lance/"
- name: LANCEDB_TABLE_NAME
value: "tekton_tests"
- name: LANCEDB_FRAGMENTS_JSON_FOLDER
value: "bucket-wikimedia-1/DPK_tests/text_encoder/tekton_tests/lance/tekton_tests_fragments_json/"
- name: LANCEDB_TABLE_SCHEMA_FOLDER
value: "bucket-wikimedia-1/DPK_tests/text_encoder/tekton_tests/input_parquets_tmp4embeddings/"
workspaces:
- name: dpk-pipeline-ws
volumeClaimTemplate:
spec:
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 1Gi