Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
122 commits
Select commit Hold shift + click to select a range
092966c
updated data model for cosmx
avikdatta May 1, 2025
70b7a8c
fixed doc
avikdatta May 1, 2025
9e0a364
added base structure for metadata registration module
avikdatta May 9, 2025
b6eecee
fetch metadata from portal
avikdatta May 13, 2025
23f2eb4
check metadata columns
avikdatta May 13, 2025
29bd001
validation checks
avikdatta May 13, 2025
362ebf5
validation checking and updating context
avikdatta May 13, 2025
5c8d15b
space holder for new commands
avikdatta May 13, 2025
2576f7f
split metadata columns
avikdatta May 14, 2025
9535b4c
filtering existing metadata
avikdatta May 14, 2025
5ad2d28
change of user table schema
avikdatta May 14, 2025
fc4728f
minor changes for data mapping
avikdatta May 15, 2025
64f585c
adding data authority and secondary email to the project
avikdatta May 15, 2025
8814626
registering new samples
avikdatta May 15, 2025
d0dcfde
sync metadata
avikdatta May 15, 2025
2590f32
metadata registration
avikdatta May 15, 2025
0c4b6b8
full execution
avikdatta May 15, 2025
36110be
updates for test
avikdatta May 15, 2025
53ef1b8
minor changes
avikdatta May 15, 2025
48c9d02
fixed types
avikdatta May 16, 2025
09d8757
updated cosmx metadata registration
avikdatta May 19, 2025
97020b9
modified cosmx tables
avikdatta Jun 4, 2025
d98952f
cosmx rna qc table
avikdatta Jun 4, 2025
8c04cd4
protein fov qc
avikdatta Jun 5, 2025
3a1b34c
cosmx annotation
avikdatta Jun 5, 2025
ad93792
added empty tasks
avikdatta Jun 9, 2025
0aef7d8
added empty tests
avikdatta Jun 9, 2025
f99bca9
fixed db schema
avikdatta Jun 9, 2025
f5585da
changed dataflow
avikdatta Jun 10, 2025
5c7bad7
added fun export factory
avikdatta Jun 10, 2025
aa1017d
added prep run export
avikdatta Jun 10, 2025
1737b19
added extract cmd
avikdatta Jun 10, 2025
c76a40e
added ftp export cmd
avikdatta Jun 10, 2025
33165d4
added prep extract task
avikdatta Jun 10, 2025
9f05629
added extract cmd
avikdatta Jun 10, 2025
9a808f2
add collect data task
avikdatta Jun 10, 2025
24c327e
added codes for slide factory
avikdatta Jun 11, 2025
e15ee67
copy missing files
avikdatta Jun 11, 2025
74e28bd
added codes for md5 validation
avikdatta Jun 11, 2025
27a6c4d
minor changes
avikdatta Jun 16, 2025
a20d30d
extra tests for extract cmd
avikdatta Jun 18, 2025
85924a1
no retry after bash cmd failure
avikdatta Jun 18, 2025
a8d4fd7
fix for cedllranger template
avikdatta Jul 1, 2025
5d2031b
snakemake
avikdatta Jul 1, 2025
01f1bea
initial commits for cosmx metadata registration
avikdatta Jul 3, 2025
6d31677
registering new cosmx runs
avikdatta Jul 3, 2025
746f4f4
registering cosmx run and slide
avikdatta Jul 3, 2025
a756fa3
loadingg cosmx fovs
avikdatta Jul 4, 2025
11b97bd
loading fov annotation
avikdatta Jul 4, 2025
ebf9f60
draft codes for count qc data validation and loading
avikdatta Jul 7, 2025
fc90b76
validation of json file
avikdatta Jul 9, 2025
1685193
cosmx db load working
avikdatta Jul 9, 2025
dc514ea
column name change
avikdatta Jul 12, 2025
9c17df2
temp changes
avikdatta Jul 12, 2025
d7b3711
additional checks before export factory
avikdatta Jul 21, 2025
98751c3
updated dag43 tasks and tests
avikdatta Jul 21, 2025
6e19d6a
updated email template
avikdatta Jul 24, 2025
1227beb
ongoing changes
avikdatta Jul 24, 2025
bb1fd0a
checking if slides are all linked to correct projects or not
avikdatta Jul 24, 2025
07b7210
fixed slide metadata collection
avikdatta Aug 4, 2025
25ecf75
minor fix
avikdatta Aug 4, 2025
647e30f
added count qc report generation
avikdatta Aug 4, 2025
8f0bab3
fix for missing json file in test
avikdatta Aug 4, 2025
b186c30
test passing for get analysis id and project id method
avikdatta Aug 6, 2025
bbf4c96
test passing for fov qc generation step
avikdatta Aug 6, 2025
bd3a736
moved boilerplate codes
avikdatta Aug 6, 2025
117979b
WIP register db
avikdatta Aug 6, 2025
dcda66d
parsing tissue annotation from design file
avikdatta Aug 8, 2025
bf522aa
parsing slide metadata file
avikdatta Aug 13, 2025
2ddde97
initial draft for db load
avikdatta Aug 14, 2025
81bb83a
added cosmx slide run date
avikdatta Aug 19, 2025
9d8de73
parsing slide name and run date from metadata file
avikdatta Aug 19, 2025
7bc17a0
chnage of metadata parsing output format
avikdatta Aug 19, 2025
a1fbeb7
added vars for count data check schemas
avikdatta Aug 19, 2025
e99583e
updates for cosmx slide metadata registration
avikdatta Aug 19, 2025
61a647a
updates for cosmx registration module
avikdatta Aug 20, 2025
36067a7
added new custom decimal data type to deal with sqlite test db
avikdatta Aug 20, 2025
66c3e90
updated fov count table validation schemas
avikdatta Aug 20, 2025
9f75a13
fixed fov count table upload to db
avikdatta Aug 20, 2025
74b4f2d
fixed loading cosmx dataset to db
avikdatta Aug 20, 2025
fe9a4c0
updated docs
avikdatta Aug 20, 2025
46164ee
updated tests for registering cosmx data to db
avikdatta Aug 20, 2025
4d433b4
added test for globus data copy
avikdatta Aug 21, 2025
95c0c26
globus copy for export dir
avikdatta Aug 23, 2025
c4ef30d
globus copy of slide reports
avikdatta Aug 23, 2025
27f02ed
passing all tests for cosmx export
avikdatta Aug 23, 2025
b37651a
excluding new tests
avikdatta Aug 23, 2025
4b30acd
added dumy task
avikdatta Aug 26, 2025
fa4dd66
fixed typo
avikdatta Aug 26, 2025
4d32aa5
adding new db migration for cosmx tables
avikdatta Aug 27, 2025
ca37a33
minor fix
avikdatta Aug 27, 2025
36776cb
updated airflow task info and docstring
avikdatta Aug 27, 2025
271f4b2
added fix for export dir name bug
avikdatta Aug 30, 2025
068e678
fix for slide ids to match modified slide names
avikdatta Aug 30, 2025
27fa90d
minor change for export command
avikdatta Aug 30, 2025
126a471
change of dir structure for globus export
avikdatta Aug 31, 2025
0501a7a
change of globus expor dir for reports
avikdatta Aug 31, 2025
31d92e4
change of task output type for globus copy
avikdatta Aug 31, 2025
6f535a6
adding missing slide id
avikdatta Aug 31, 2025
6c7865b
fix for posix path
avikdatta Aug 31, 2025
901173f
fix for flat files path
avikdatta Aug 31, 2025
a224f7a
fix for missing bind dir
avikdatta Aug 31, 2025
34f399c
changes for timeout
avikdatta Aug 31, 2025
f7eb2e4
updated test for flatfile path checking
avikdatta Aug 31, 2025
674ac8c
change of resource type
avikdatta Aug 31, 2025
2f25234
changes for notebook params
avikdatta Sep 1, 2025
f0e1fc9
minor change of kernel name
avikdatta Sep 1, 2025
47830b3
minor change of reports dir path
avikdatta Sep 1, 2025
51c326b
minor change in config
avikdatta Sep 1, 2025
2aec695
fixed kernel
avikdatta Sep 1, 2025
92bae71
changed panel name key
avikdatta Sep 1, 2025
06b76f3
fix for missing posix path
avikdatta Sep 2, 2025
aaca2a8
no input for notebook html
avikdatta Sep 2, 2025
4b4ddfc
forced copy of existing reports
avikdatta Sep 2, 2025
5ee195d
fix for existing run checking
avikdatta Sep 2, 2025
53c1cf9
fix for db row
avikdatta Sep 2, 2025
147cae0
fixed email issue
avikdatta Sep 2, 2025
564c737
more fix for email template
avikdatta Sep 2, 2025
a2381a9
updated globus copy of reports
avikdatta Sep 3, 2025
b6c9cd5
updated tables
avikdatta Sep 3, 2025
e288ac1
updated cosmx report upload to filesystem
avikdatta Sep 3, 2025
b5f4a86
Merge branch 'master' into cosmx_may25
avikdatta Sep 3, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/python-app.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ name: Python application

on:
push:
branches: [ master, fix_dag25]
branches: [ master, cosmx_may25]
pull_request:
branches: [ "master" ]

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
{
"$schema": "https://json-schema.org/draft/2020-12/json-schema-core.html#",
"id": "https://github.com/imperial-genomics-facility/data-management-python/tree/master/data/validation_schema/cosmx_protein_count_file_validation_schema.json#",
"title": "IGF COSMX PROTEIN count file validation schema",
"description": "Schema for validation of COSMX PROTEIN count file",
"type" : "array",
"version": "1.0.0",
"minItems": 1,
"uniqueItems": true,
"items": {
"type": "object",
"properties" : {
"fov_id" : {
"type" : "integer"
},
"mean_fluorescence_intensity" : {
"type" : "integer"
},
"mean_unique_genes_per_cell" : {
"type" : "number"
},
"number_non_empty_cells" : {
"type" : "integer"
},
"pct_non_empty_cells" : {
"type" : "number"
},
"percentile_10_fluorescence_intensity" : {
"type" : "number"
},
"percentile_90_fluorescence_intensity" : {
"type" : "number"
},
"fluorescence_intensity_mean_igg_control_intensity": {
"type" : "number"
}
},
"required": [
"fov_id",
"mean_fluorescence_intensity",
"mean_unique_genes_per_cell",
"number_non_empty_cells",
"pct_non_empty_cells",
"percentile_10_fluorescence_intensity",
"percentile_90_fluorescence_intensity",
"fluorescence_intensity_mean_igg_control_intensity"
]
}
}
49 changes: 49 additions & 0 deletions data/validation_schema/cosmx_rna_count_file_validation_schema.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
{
"$schema": "https://json-schema.org/draft/2020-12/json-schema-core.html#",
"id": "https://github.com/imperial-genomics-facility/data-management-python/tree/master/data/validation_schema/cosmx_rna_count_file_validation_schema.json#",
"title": "IGF COSMX RNA count file validation schema",
"description": "Schema for validation of COSMX RNA count file",
"type" : "array",
"version": "1.0.0",
"minItems": 1,
"uniqueItems": true,
"items": {
"type": "object",
"properties" : {
"fov_id" : {
"type" : "integer"
},
"mean_transcript_per_cell" : {
"type" : "number"
},
"mean_unique_genes_per_cell" : {
"type" : "number"
},
"number_non_empty_cells" : {
"type" : "integer"
},
"pct_non_empty_cells" : {
"type" : "number"
},
"percentile_90_transcript_per_cell" : {
"type" : "number"
},
"percentile_10_transcript_per_cell" : {
"type" : "number"
},
"mean_negprobe_counts_per_cell": {
"type" : "number"
}
},
"required": [
"fov_id",
"mean_transcript_per_cell",
"mean_unique_genes_per_cell",
"number_non_empty_cells",
"pct_non_empty_cells",
"percentile_90_transcript_per_cell",
"percentile_10_transcript_per_cell",
"mean_negprobe_counts_per_cell"
]
}
}
55 changes: 55 additions & 0 deletions data/validation_schema/minimal_metadata_validation.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
{
"$schema": "http://json-schema.org/draft-06/schema#",
"id": "https://github.com/imperial-genomics-facility/data-management-python/tree/master/data/validation_schema/milimal_metadata_validation.json#",
"title": "IGF minimal metadata validation schema",
"description": "Schema for validation of minimal metadata input file",
"type" : "array",
"version": "1.0.0",
"minItems": 1,
"uniqueItems": true,
"items": {
"type": "object",
"properties" : {
"project_igf_id" : {
"type" : "string",
"pattern": "^IGF[a-zA-Z0-9-_]+$",
"maxLength": 50
},
"deliverable" : {
"type" : "string",
"enum" : ["FASTQ",
"COSMX"]
},
"name" : {
"type" : "string",
"pattern": "^\\w+\\s\\w+\\s?\\w+?$",
"maxLength": 30
},
"username" : {
"type" : "string",
"pattern": "^\\S+$|^$",
"maxLength": 20
},
"email_id" : {
"type" : "string",
"pattern": "\\S+\\@\\S+\\.\\w+",
"maxLength": 40
},
"sample_igf_id" : {
"type" : "string",
"pattern": "^IGF[a-zA-Z0-9-_]+$",
"maxLength": 20
},
"species_name" : {
"type" : "string",
"maxLength": 50
}
},
"required": [
"project_igf_id",
"deliverable",
"name",
"email_id",
"username"]
}
}
Loading