forked from data-prep-kit/data-prep-kit
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpipeline-text-encoder-lance-commit.yaml
More file actions
99 lines (97 loc) · 3.12 KB
/
pipeline-text-encoder-lance-commit.yaml
File metadata and controls
99 lines (97 loc) · 3.12 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
apiVersion: tekton.dev/v1
kind: Pipeline
metadata:
name: text-encoder-lance-commit
spec:
workspaces:
- name: dpk-pipeline-ws
params:
- name: TEXT_ENCODER_LANCEDB_DATA_URI
type: string
- name: TEXT_ENCODER_LANCEDB_FRAGMENTS_JSON_FOLDER
type: string
- name: TEXT_ENCODER_LANCEDB_TABLE_NAME
type: string
- name: TEXT_ENCODER_MODEL_NAME
type: string
default: "ibm-granite/granite-embedding-small-english-r2"
- name: TEXT_ENCODER_INPUT_FOLDER
type: string
- name: TEXT_ENCODER_OUTPUT_FOLDER
type: string
- name: TEXT_ENCODER_CONTENT_COLUMN_NAME
type: string
default: "contents"
- name: TEXT_ENCODER_OUTPUT_EMBEDDINGS_COLUMN_NAME
type: string
default: "embeddings"
- name: RUNTIME_NUM_WORKERS
type: string
default: "10"
- name: LANCEDB_STORAGE_TYPE
type: string
- name: LANCEDB_URI
type: string
- name: LANCEDB_DATA_URI
type: string
- name: LANCEDB_TABLE_NAME
type: string
- name: LANCEDB_FRAGMENTS_JSON_FOLDER
type: string
- name: LANCEDB_TABLE_SCHEMA_FOLDER
type: string
tasks:
- name: execute-repo-task
taskRef:
name: repo
workspaces:
- name: jobs
workspace: dpk-pipeline-ws
- name: execute-text-encoder-task
taskRef:
name: text-encoder
runAfter:
- execute-repo-task
workspaces:
- name: jobs
workspace: dpk-pipeline-ws
params:
- name: TEXT_ENCODER_LANCEDB_DATA_URI
value: $(params.TEXT_ENCODER_LANCEDB_DATA_URI)
- name: TEXT_ENCODER_LANCEDB_FRAGMENTS_JSON_FOLDER
value: $(params.TEXT_ENCODER_LANCEDB_FRAGMENTS_JSON_FOLDER)
- name: TEXT_ENCODER_LANCEDB_TABLE_NAME
value: $(params.TEXT_ENCODER_LANCEDB_TABLE_NAME)
- name: TEXT_ENCODER_MODEL_NAME
value: $(params.TEXT_ENCODER_MODEL_NAME)
- name: TEXT_ENCODER_INPUT_FOLDER
value: $(params.TEXT_ENCODER_INPUT_FOLDER)
- name: TEXT_ENCODER_OUTPUT_FOLDER
value: $(params.TEXT_ENCODER_OUTPUT_FOLDER)
- name: TEXT_ENCODER_CONTENT_COLUMN_NAME
value: $(params.TEXT_ENCODER_CONTENT_COLUMN_NAME)
- name: TEXT_ENCODER_OUTPUT_EMBEDDINGS_COLUMN_NAME
value: $(params.TEXT_ENCODER_OUTPUT_EMBEDDINGS_COLUMN_NAME)
- name: RUNTIME_NUM_WORKERS
value: $(params.RUNTIME_NUM_WORKERS)
- name: execute-lance-commit-task
taskRef:
name: lance-commit
runAfter:
- execute-text-encoder-task
workspaces:
- name: jobs
workspace: dpk-pipeline-ws
params:
- name: LANCEDB_STORAGE_TYPE
value: $(params.LANCEDB_STORAGE_TYPE)
- name: LANCEDB_URI
value: $(params.LANCEDB_URI)
- name: LANCEDB_DATA_URI
value: $(params.LANCEDB_DATA_URI)
- name: LANCEDB_TABLE_NAME
value: $(params.LANCEDB_TABLE_NAME)
- name: LANCEDB_FRAGMENTS_JSON_FOLDER
value: $(params.LANCEDB_FRAGMENTS_JSON_FOLDER)
- name: LANCEDB_TABLE_SCHEMA_FOLDER
value: $(params.LANCEDB_TABLE_SCHEMA_FOLDER)