|
| 1 | +import { TabixIndexedFile } from '@gmod/tabix' |
| 2 | +import VcfParser from '@gmod/vcf' |
| 3 | +import { BaseFeatureDataAdapter } from '@jbrowse/core/data_adapters/BaseAdapter' |
1 | 4 | import { |
2 | | - BaseFeatureDataAdapter, |
3 | | - BaseOptions, |
4 | | -} from '@jbrowse/core/data_adapters/BaseAdapter' |
5 | | -import { NoAssemblyRegion } from '@jbrowse/core/util/types' |
| 5 | + fetchAndMaybeUnzipText, |
| 6 | + updateStatus, |
| 7 | +} from '@jbrowse/core/util' |
6 | 8 | import { openLocation } from '@jbrowse/core/util/io' |
7 | 9 | import { ObservableCreate } from '@jbrowse/core/util/rxjs' |
8 | | -import { Feature } from '@jbrowse/core/util' |
9 | | -import { TabixIndexedFile } from '@gmod/tabix' |
10 | | -import VcfParser from '@gmod/vcf' |
| 10 | + |
| 11 | +import type { BaseOptions } from '@jbrowse/core/data_adapters/BaseAdapter' |
| 12 | +import type { Feature } from '@jbrowse/core/util' |
| 13 | +import type { NoAssemblyRegion } from '@jbrowse/core/util/types' |
11 | 14 | import { VcfFeature } from '@jbrowse/plugin-variants'; |
12 | 15 |
|
| 16 | +function shorten2(name: string, max = 70) { |
| 17 | + return name.length > max ? `${name.slice(0, max)}...` : name |
| 18 | +} |
| 19 | + |
13 | 20 | export default class VcfTabixAdapter extends BaseFeatureDataAdapter { |
14 | 21 | private configured?: Promise<{ |
15 | 22 | vcf: TabixIndexedFile |
16 | 23 | parser: VcfParser |
17 | 24 | }> |
18 | 25 |
|
19 | | - private async configurePre() { |
20 | | - const pm = this.pluginManager |
| 26 | + private async configurePre(_opts?: BaseOptions) { |
21 | 27 | const vcfGzLocation = this.getConf('vcfGzLocation') |
22 | 28 | const location = this.getConf(['index', 'location']) |
23 | 29 | const indexType = this.getConf(['index', 'indexType']) |
24 | 30 |
|
25 | | - const filehandle = openLocation(vcfGzLocation, pm) |
| 31 | + const filehandle = openLocation(vcfGzLocation, this.pluginManager) |
26 | 32 | const isCSI = indexType === 'CSI' |
27 | 33 | const vcf = new TabixIndexedFile({ |
28 | 34 | filehandle, |
29 | | - csiFilehandle: isCSI ? openLocation(location, pm) : undefined, |
30 | | - tbiFilehandle: !isCSI ? openLocation(location, pm) : undefined, |
31 | | - chunkCacheSize: 50 * 2 ** 20 |
| 35 | + csiFilehandle: isCSI |
| 36 | + ? openLocation(location, this.pluginManager) |
| 37 | + : undefined, |
| 38 | + tbiFilehandle: !isCSI |
| 39 | + ? openLocation(location, this.pluginManager) |
| 40 | + : undefined, |
| 41 | + chunkCacheSize: 50 * 2 ** 20, |
32 | 42 | }) |
33 | 43 |
|
34 | | - const header = await vcf.getHeader() |
35 | 44 | return { |
36 | 45 | vcf, |
37 | | - parser: new VcfParser({ header }), |
| 46 | + parser: new VcfParser({ |
| 47 | + header: await vcf.getHeader(), |
| 48 | + }), |
38 | 49 | } |
39 | 50 | } |
40 | 51 |
|
41 | | - protected async configure() { |
| 52 | + protected async configurePre2() { |
42 | 53 | if (!this.configured) { |
43 | | - this.configured = this.configurePre().catch(e => { |
| 54 | + this.configured = this.configurePre().catch((e: unknown) => { |
44 | 55 | this.configured = undefined |
45 | 56 | throw e |
46 | 57 | }) |
47 | 58 | } |
48 | 59 | return this.configured |
49 | 60 | } |
50 | 61 |
|
| 62 | + async configure(opts?: BaseOptions) { |
| 63 | + const { statusCallback = () => {} } = opts || {} |
| 64 | + return updateStatus('Downloading index', statusCallback, () => |
| 65 | + this.configurePre2(), |
| 66 | + ) |
| 67 | + } |
51 | 68 | public async getRefNames(opts: BaseOptions = {}) { |
52 | | - const { vcf } = await this.configure() |
| 69 | + const { vcf } = await this.configure(opts) |
53 | 70 | return vcf.getReferenceSequenceNames(opts) |
54 | 71 | } |
55 | 72 |
|
56 | | - async getHeader() { |
57 | | - const { vcf } = await this.configure() |
| 73 | + async getHeader(opts?: BaseOptions) { |
| 74 | + const { vcf } = await this.configure(opts) |
58 | 75 | return vcf.getHeader() |
59 | 76 | } |
60 | 77 |
|
61 | | - async getMetadata() { |
62 | | - const { parser } = await this.configure() |
| 78 | + async getMetadata(opts?: BaseOptions) { |
| 79 | + const { parser } = await this.configure(opts) |
63 | 80 | return parser.getMetadata() |
64 | 81 | } |
65 | 82 |
|
66 | 83 | public getFeatures(query: NoAssemblyRegion, opts: BaseOptions = {}) { |
67 | 84 | return ObservableCreate<Feature>(async observer => { |
68 | 85 | const { refName, start, end } = query |
69 | | - const { vcf, parser } = await this.configure() |
70 | | - await vcf.getLines(refName, start, end, { |
71 | | - lineCallback: (line, fileOffset) => { |
72 | | - observer.next( |
73 | | - new VcfFeature({ |
74 | | - variant: parser.parseLine(line), |
75 | | - parser, |
76 | | - id: `${this.id}-vcf-${fileOffset}`, |
77 | | - }), |
78 | | - ) |
79 | | - }, |
80 | | - ...opts, |
81 | | - }) |
| 86 | + const { statusCallback = () => {} } = opts |
| 87 | + const { vcf, parser } = await this.configure(opts) |
| 88 | + |
| 89 | + await updateStatus('Downloading variants', statusCallback, () => |
| 90 | + vcf.getLines(refName, start, end, { |
| 91 | + lineCallback: (line, fileOffset) => { |
| 92 | + observer.next( |
| 93 | + new VcfFeature({ |
| 94 | + variant: parser.parseLine(line), |
| 95 | + parser, |
| 96 | + id: `${this.id}-vcf-${fileOffset}`, |
| 97 | + }), |
| 98 | + ) |
| 99 | + }, |
| 100 | + ...opts, |
| 101 | + }), |
| 102 | + ) |
82 | 103 | observer.complete() |
83 | | - }, opts.signal) |
| 104 | + }, opts.stopToken) |
| 105 | + } |
| 106 | + |
| 107 | + async getSources() { |
| 108 | + const conf = this.getConf('samplesTsvLocation') |
| 109 | + if (conf.uri === '' || conf.uri === '/path/to/samples.tsv') { |
| 110 | + const { parser } = await this.configure() |
| 111 | + return parser.samples.map(name => ({ |
| 112 | + name, |
| 113 | + })) |
| 114 | + } else { |
| 115 | + const txt = await fetchAndMaybeUnzipText( |
| 116 | + openLocation(conf, this.pluginManager), |
| 117 | + ) |
| 118 | + const lines = txt.split(/\n|\r\n|\r/) |
| 119 | + const header = lines[0]!.split('\t') |
| 120 | + const { parser } = await this.configure() |
| 121 | + const metadataLines = lines |
| 122 | + .slice(1) |
| 123 | + .filter(f => !!f) |
| 124 | + .map(line => { |
| 125 | + const [name, ...rest] = line.split('\t') |
| 126 | + return { |
| 127 | + ...Object.fromEntries( |
| 128 | + // force col 0 to be called name |
| 129 | + rest.map((c, idx) => [header[idx + 1]!, c] as const), |
| 130 | + ), |
| 131 | + name: name!, |
| 132 | + } |
| 133 | + }) |
| 134 | + const vcfSampleSet = new Set(parser.samples) |
| 135 | + const metadataSet = new Set(metadataLines.map(r => r.name)) |
| 136 | + const metadataNotInVcfSamples = [...metadataSet].filter( |
| 137 | + f => !vcfSampleSet.has(f), |
| 138 | + ) |
| 139 | + const vcfSamplesNotInMetadata = [...vcfSampleSet].filter( |
| 140 | + f => !metadataSet.has(f), |
| 141 | + ) |
| 142 | + if (metadataNotInVcfSamples.length) { |
| 143 | + console.warn( |
| 144 | + `There are ${metadataNotInVcfSamples.length} samples in metadata file (${metadataLines.length} lines) not in VCF (${parser.samples.length} samples):`, |
| 145 | + shorten2(metadataNotInVcfSamples.join(',')), |
| 146 | + ) |
| 147 | + } |
| 148 | + if (vcfSamplesNotInMetadata.length) { |
| 149 | + console.warn( |
| 150 | + `There are ${vcfSamplesNotInMetadata.length} samples in VCF file (${parser.samples.length} samples) not in metadata file (${metadataLines.length} lines):`, |
| 151 | + shorten2(vcfSamplesNotInMetadata.map(m => m).join(',')), |
| 152 | + ) |
| 153 | + } |
| 154 | + return metadataLines.filter(f => vcfSampleSet.has(f.name)) |
| 155 | + } |
84 | 156 | } |
85 | 157 |
|
86 | 158 | public freeResources(/* { region } */): void {} |
|
0 commit comments