Skip to content

Commit 0484c83

Browse files
authored
Merge pull request #400 from AdamaJava/nanno_multi_gene
feat(nanno): if snpeff deems a variant to be in multiple genes report them all
2 parents 86ea961 + fa8743e commit 0484c83

7 files changed

Lines changed: 374 additions & 141 deletions

File tree

qannotate/src/au/edu/qimr/qannotate/nanno/AnnotationSource.java

Lines changed: 16 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -78,25 +78,24 @@ public String getAnnotation(long requestedCpAsLong, ChrPosition requestedCp) {
7878
* lets see if there are any records that match on ref and alt
7979
*/
8080
return getAnnotationsFromCurrentRecords(requestedCp);
81+
}
8182

82-
} else {
83-
int matchWithNextCP = Long.compare(requestedCpAsLong, nextCPAsLong);
84-
if (nextCPAsLong > -1 && matchWithNextCP < 0) {
85-
/*
86-
* requestedCp is "less than" next CP
87-
* return empty list here
88-
*/
89-
} else {
90-
// logger.debug(reader.getFile().getName() + ": getting next record. requestedCp: " + (null != requestedCp ? requestedCp.toIGVString() : null) + ", currentCP: " + (null != currentCP ? currentCP.toIGVString() : null));
91-
getNextRecord(requestedCpAsLong, matchWithNextCP);
92-
if (requestedCpAsLong == currentCPAsLong) {
93-
return getAnnotationsFromCurrentRecords(requestedCp);
94-
}
95-
/*
96-
* requestedCP and currentCP are not equal
97-
*/
98-
}
83+
int matchWithNextCP = Long.compare(requestedCpAsLong, nextCPAsLong);
84+
if (nextCPAsLong > -1 && matchWithNextCP < 0) {
85+
/*
86+
* requestedCp is "less than" next CP
87+
* return empty list here
88+
*/
89+
return annotationToReturn(null);
90+
}
91+
92+
getNextRecord(requestedCpAsLong, matchWithNextCP);
93+
if (requestedCpAsLong == currentCPAsLong) {
94+
return getAnnotationsFromCurrentRecords(requestedCp);
9995
}
96+
/*
97+
* requestedCP and currentCP are not equal
98+
*/
10099
return annotationToReturn(null);
101100
}
102101

qannotate/src/au/edu/qimr/qannotate/nanno/AnnotationSourceSnpEffVCF.java

Lines changed: 38 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,6 @@ public AnnotationSourceSnpEffVCF(RecordReader<String> reader, int chrPositionInR
6262
@Override
6363
public String getAnnotation(long requestedCpAsLong, ChrPosition requestedCp) {
6464

65-
// logger.debug(reader.getFile().getName() + ": requestedCp is " + (null != requestedCp ? requestedCp.toIGVString() : null) + ", currentCP: " + (null != currentCP ? currentCP.toIGVString() : null) + ", nextCP: " + (null != nextCP ? nextCP.toIGVString() : null));
66-
6765
/*
6866
* check to see if the records we currently have stored are a match
6967
*/
@@ -73,70 +71,21 @@ public String getAnnotation(long requestedCpAsLong, ChrPosition requestedCp) {
7371
* we match on position
7472
* lets see if there are any records that match on ref and alt
7573
*/
76-
// return getAnnotationsFromRecords(requestedCp);
77-
if (requestedCp instanceof ChrPositionRefAlt reqCpRefAlt) {
78-
String reqRef = reqCpRefAlt.getRef();
79-
String reqAlt = reqCpRefAlt.getAlt();
80-
for (String rec : currentRecords) {
81-
String[] recArray = TabTokenizer.tokenize(rec, DEFAULT_DELIMITER);
82-
String recRef = recArray[refPositionInFile];
83-
String recAlt = recArray[altPositionInFile];
84-
85-
if (recAlt.contains(",")) {
86-
String[] recAltArray = recAlt.split(",");
87-
for (String recAltValue : recAltArray) {
88-
if (reqRef.equals(recRef) && reqAlt.equals(recAltValue)) {
89-
return annotationToReturnWithAlt(rec, recAltValue);
90-
}
91-
}
92-
} else {
93-
if (reqRef.equals(recRef) && reqAlt.equals(recAlt)) {
94-
return annotationToReturnWithAlt(rec, recAlt);
95-
}
96-
}
97-
}
98-
}
99-
74+
return getAnnotationsFromRecords(requestedCp);
10075
} else {
10176
int matchWithNextCP = Long.compare(requestedCpAsLong, nextCPAsLong);
10277
if (nextCPAsLong > -1 && matchWithNextCP < 0) {
10378

10479
} else {
10580

106-
// logger.debug(reader.getFile().getName() + ": getting next record. requestedCp: " + (null != requestedCp ? requestedCp.toIGVString() : null) + ", currentCP: " + (null != currentCP ? currentCP.toIGVString() : null));
10781
getNextRecord(requestedCpAsLong, matchWithNextCP);
10882
if (requestedCpAsLong == currentCPAsLong) {
10983
/*
11084
* we match on position
11185
* lets see if there are any records that match on ref and alt
11286
*/
113-
if (requestedCp instanceof ChrPositionRefAlt reqCpRefAlt) {
114-
String reqRef = reqCpRefAlt.getRef();
115-
String reqAlt = reqCpRefAlt.getAlt();
116-
for (String rec : currentRecords) {
117-
String[] recArray = TabTokenizer.tokenize(rec, DEFAULT_DELIMITER);
118-
String recRef = recArray[refPositionInFile];
119-
String recAlt = recArray[altPositionInFile];
120-
121-
if (recAlt.contains(",")) {
122-
String[] recAltArray = recAlt.split(",");
123-
for (String recAltValue : recAltArray) {
124-
if (reqRef.equals(recRef) && reqAlt.equals(recAltValue)) {
125-
return annotationToReturnWithAlt(rec, recAltValue);
126-
}
127-
}
128-
} else {
129-
if (reqRef.equals(recRef) && reqAlt.equals(recAlt)) {
130-
return annotationToReturnWithAlt(rec, recAlt);
131-
}
132-
}
133-
}
134-
}
135-
// return getAnnotationsFromRecords(requestedCp);
87+
return getAnnotationsFromRecords(requestedCp);
13688
}
137-
/*
138-
* requestedCP and currentCP are not equal
139-
*/
14089
}
14190
}
14291
return annotationToReturn(null);
@@ -151,7 +100,7 @@ private String getAnnotationsFromRecords(ChrPosition requestedCp){
151100
String recRef = recArray[refPositionInFile];
152101
String recAlt = recArray[altPositionInFile];
153102

154-
if (recAlt.contains(",")) {
103+
if (recAlt.indexOf(',') >= 0) {
155104
String[] recAltArray = recAlt.split(",");
156105
for (String recAltValue : recAltArray) {
157106
if (reqRef.equals(recRef) && reqAlt.equals(recAltValue)) {
@@ -177,7 +126,6 @@ public String annotationToReturn(String[] record) {
177126
* dealing with a vcf file and assuming that the required annotation fields are in the INFO field
178127
* so get that and go from there.
179128
*/
180-
// String[] recordArray = record.split("\t");
181129
String info = record[7];
182130
String alt = record[4];
183131

@@ -221,32 +169,31 @@ public static String extractFieldsFromInfoField(String info, List<String> fields
221169
if (StringUtils.isNullOrEmpty(worstConsequence)) {
222170
return emptyInfoFieldResult;
223171
}
224-
225172
/*
226-
* we have our consequence
227-
* split by pipe and then get our fields
173+
* we have our consequences (comma-delimited)
174+
* split by comma into consequences, then by pipe into fields
228175
*/
229-
String[] consequenceArray = TabTokenizer.tokenize(worstConsequence, '|');
176+
String[] consequences = worstConsequence.split(",");
230177

231178
for (String af : fields) {
232179
if (!StringUtils.isNullOrEmpty(af)) {
233180

234-
/*
235-
* get position from map
236-
*/
237181
String aflc = af.toLowerCase();
238182
Integer arrayPosition = SNP_EFF_ANNOTATION_FIELDS_AND_POSITIONS.get(aflc);
239-
if (null != arrayPosition && arrayPosition >= 0 && arrayPosition < consequenceArray.length) {
240-
/*
241-
* good
242-
*/
243-
String annotation = consequenceArray[arrayPosition];
244-
dataToReturn.append((!dataToReturn.isEmpty()) ? FIELD_DELIMITER_TAB + af + "=" + annotation : af + "=" + annotation);
245-
} else {
246-
// System.out.println("Could not find field [" + af + "] in SNP_EFF_ANNOTATION_FIELDS_AND_POSITIONS map!");
247-
// System.out.println("arrayPosition.intValue(): " + arrayPosition.intValue() + ", consequenceArray.length: " + consequenceArray.length);
248-
}
249183

184+
if (null != arrayPosition) {
185+
StringBuilder fieldValues = new StringBuilder();
186+
for (String consequence : consequences) {
187+
String[] consequenceArray = TabTokenizer.tokenize(consequence, '|');
188+
if (arrayPosition >= 0 && arrayPosition < consequenceArray.length) {
189+
String annotation = consequenceArray[arrayPosition];
190+
fieldValues.append(fieldValues.isEmpty() ? annotation : "|" + annotation);
191+
}
192+
}
193+
dataToReturn.append((!dataToReturn.isEmpty())
194+
? FIELD_DELIMITER_TAB + af + "=" + fieldValues
195+
: af + "=" + fieldValues);
196+
}
250197
}
251198
}
252199
return (dataToReturn.isEmpty()) ? emptyInfoFieldResult : dataToReturn.toString();
@@ -283,19 +230,33 @@ public static String getWorstConsequence(String info, String alt) {
283230
* Pick the first one as that is the one with the highest effect as decreed by snpEff
284231
*/
285232
int annoIndex = info.indexOf("ANN=");
233+
if (annoIndex < 0) {
234+
return "";
235+
}
286236
int end = info.indexOf(FIELD_DELIMITER_SEMI_COLON, annoIndex);
287237
String ann = info.substring(annoIndex + 4, end == -1 ? info.length() : end);
288238

289239

290240
String[] annArray = ann.split(",");
291-
String worstConsequence = "";
241+
Map<String, String> worstByGene = new java.util.LinkedHashMap<>();
292242
for (String aa : annArray) {
293-
if (aa.startsWith(alt)) {
294-
worstConsequence = aa;
295-
break;
243+
int pipeIndex = aa.indexOf('|');
244+
if (pipeIndex <= 0) {
245+
// Malformed ANN entry or missing allele token; skip
246+
continue;
247+
}
248+
String alleleToken = aa.substring(0, pipeIndex);
249+
if (alleleToken.equals(alt)) {
250+
String[] parts = TabTokenizer.tokenize(aa, '|');
251+
if (parts.length > 3) {
252+
String gene = parts[3];
253+
if (!StringUtils.isNullOrEmpty(gene) && !worstByGene.containsKey(gene)) {
254+
worstByGene.put(gene, aa);
255+
}
256+
}
296257
}
297258
}
298-
return worstConsequence;
259+
return String.join(",", worstByGene.values());
299260
}
300261

301262
@Override

qannotate/src/au/edu/qimr/qannotate/nanno/AnnotationSourceTSV.java

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import java.io.IOException;
44
import java.util.Arrays;
55
import java.util.Collections;
6-
import java.util.HashMap;
6+
import java.util.LinkedHashMap;
77
import java.util.List;
88
import java.util.Map;
99
import java.util.Map.Entry;
@@ -19,6 +19,9 @@ public class AnnotationSourceTSV extends AnnotationSource {
1919
List<String> headerLines;
2020
Map<String, Integer> headerNameAndPosition;
2121

22+
private String[] fieldNames;
23+
private int[] fieldPositions;
24+
2225
public AnnotationSourceTSV(RecordReader<String> reader, int chrPositionInRecord, int positionPositionInRecord,
2326
int refPositionInFile, int altPositionInFile, String fieldNames, boolean chrStartsWithChr) {
2427
super(reader, chrPositionInRecord, positionPositionInRecord, refPositionInFile, altPositionInFile, chrStartsWithChr);
@@ -46,6 +49,13 @@ public AnnotationSourceTSV(RecordReader<String> reader, int chrPositionInRecord,
4649
if (headerNameAndPosition.isEmpty()) {
4750
throw new IllegalArgumentException("Could not find requested fields (" + fieldNames + ") in header: " + headerLine);
4851
}
52+
// precompute arrays for fast extraction, preserving user-requested field order
53+
String[] requestedFields = fieldNames.split(",");
54+
this.fieldNames = requestedFields;
55+
this.fieldPositions = new int[requestedFields.length];
56+
for (int i = 0; i < requestedFields.length; i++) {
57+
this.fieldPositions[i] = headerNameAndPosition.get(requestedFields[i]);
58+
}
4959
}
5060

5161
/*
@@ -72,7 +82,7 @@ public static String getLastHeaderLine(List<String> headerLines) {
7282
* return an empty map if any of the fields are not in the header
7383
*/
7484
public static Map<String, Integer> getHeaderNameAndPositions(String fieldNames, String header) {
75-
Map<String, Integer> namePositions = new HashMap<>();
85+
Map<String, Integer> namePositions = new LinkedHashMap<>();
7686

7787
System.out.println("header: " + header);
7888

@@ -96,20 +106,18 @@ public String annotationToReturn(String[] record) {
96106
/*
97107
* entries in the INFO field are delimited by ';'
98108
*/
99-
return extractFieldsFromRecord(record, headerNameAndPosition);
109+
return extractFieldsFromRecord(record, fieldNames, fieldPositions);
100110
}
101111

102-
public static String extractFieldsFromRecord(String[] record, Map<String, Integer> fields) {
112+
public static String extractFieldsFromRecord(String[] record, String[] fieldNames, int[] fieldPositions) {
103113
StringBuilder dataToReturn = new StringBuilder();
104114
int recordLength = null != record ? record.length : 0;
105-
if ( recordLength > 0 && null != fields) {
106-
// String [] recordArray = TabTokenizer.tokenize(record);
107-
for (Entry<String, Integer> entry : fields.entrySet()) {
108-
/*
109-
* make sure that array length is not shorter than entry value
110-
*/
111-
if (recordLength > entry.getValue()) {
112-
dataToReturn.append(( ! dataToReturn.isEmpty()) ? FIELD_DELIMITER_TAB : "").append(entry.getKey()).append("=").append(record[entry.getValue()]);
115+
if (recordLength > 0 && null != fieldNames && null != fieldPositions) {
116+
for (int i = 0; i < Math.min(fieldNames.length, fieldPositions.length); i++) {
117+
int pos = fieldPositions[i];
118+
if (recordLength > pos) {
119+
dataToReturn.append((!dataToReturn.isEmpty()) ? FIELD_DELIMITER_TAB : "")
120+
.append(fieldNames[i]).append("=").append(record[pos]);
113121
}
114122
}
115123
}

qannotate/src/au/edu/qimr/qannotate/nanno/AnnotationSourceVCF.java

Lines changed: 44 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22

33
import java.io.IOException;
44
import java.util.Arrays;
5+
import java.util.HashMap;
56
import java.util.List;
7+
import java.util.Map;
68
import java.util.stream.Collectors;
79

810
import org.qcmg.common.string.StringUtils;
@@ -11,7 +13,8 @@
1113
public class AnnotationSourceVCF extends AnnotationSource {
1214

1315
public static final String FIELD_DELIMITER_SEMI_COLON = ";";
14-
16+
private static final int INFO_LENGTH_PARSE_THRESHOLD = 2000;
17+
private static final int FIELDS_PARSE_THRESHOLD = 3;
1518

1619

1720
List<String> annotationFields;
@@ -52,24 +55,52 @@ public String annotationToReturn(String [] record) {
5255

5356

5457
public static String extractFieldsFromInfoField(String info, List<String> fields, String emptyInfoFieldResult) {
55-
if (StringUtils.isNullOrEmptyOrMissingData(info)) {
58+
if (StringUtils.isNullOrEmptyOrMissingData(info) || fields == null) {
5659
return emptyInfoFieldResult;
5760
}
58-
StringBuilder dataToReturn = new StringBuilder();
59-
for (String af : fields) {
60-
if ( ! StringUtils.isNullOrEmpty(af)) {
61-
int start = info.indexOf(af + "=");
62-
if (start > -1) {
63-
int end = info.indexOf(FIELD_DELIMITER_SEMI_COLON, start);
64-
if (end == -1) {
65-
dataToReturn.append((!dataToReturn.isEmpty()) ? FIELD_DELIMITER_TAB + info.substring(start) : info.substring(start));
61+
boolean parseOnce = (fields.size() > FIELDS_PARSE_THRESHOLD) || info.length() > INFO_LENGTH_PARSE_THRESHOLD;
62+
if ( ! parseOnce) {
63+
StringBuilder dataToReturn = new StringBuilder();
64+
for (String af : fields) {
65+
if (!StringUtils.isNullOrEmpty(af)) {
66+
int start = info.indexOf(af + "=");
67+
if (start > -1) {
68+
int end = info.indexOf(FIELD_DELIMITER_SEMI_COLON, start);
69+
if (end == -1) {
70+
dataToReturn.append((!dataToReturn.isEmpty()) ? FIELD_DELIMITER_TAB + info.substring(start) : info.substring(start));
71+
} else {
72+
dataToReturn.append((!dataToReturn.isEmpty()) ? FIELD_DELIMITER_TAB + info.substring(start, end) : info.substring(start, end));
73+
}
6674
} else {
67-
dataToReturn.append((!dataToReturn.isEmpty()) ? FIELD_DELIMITER_TAB + info.substring(start, end) : info.substring(start, end));
75+
dataToReturn.append((!dataToReturn.isEmpty()) ? FIELD_DELIMITER_TAB + af + "=" : af + "=");
6876
}
69-
} else {
70-
dataToReturn.append((!dataToReturn.isEmpty()) ? FIELD_DELIMITER_TAB + af + "=" : af + "=");
7177
}
7278
}
79+
return (dataToReturn.isEmpty()) ? emptyInfoFieldResult : dataToReturn.toString();
80+
}
81+
Map<String, String> infoMap = new HashMap<>();
82+
int start = 0;
83+
while (start <= info.length()) {
84+
int end = info.indexOf(FIELD_DELIMITER_SEMI_COLON, start);
85+
if (end == -1) end = info.length();
86+
87+
String token = info.substring(start, end);
88+
int eq = token.indexOf('=');
89+
if (eq > -1) {
90+
infoMap.put(token.substring(0, eq), token.substring(eq + 1));
91+
} else if (!token.isEmpty()) {
92+
infoMap.put(token, "");
93+
}
94+
95+
start = end + 1;
96+
}
97+
StringBuilder dataToReturn = new StringBuilder();
98+
for (String af : fields) {
99+
if (!StringUtils.isNullOrEmpty(af)) {
100+
String value = infoMap.get(af);
101+
String entry = (value != null) ? af + "=" + value : af + "=";
102+
dataToReturn.append((!dataToReturn.isEmpty()) ? FIELD_DELIMITER_TAB : "").append(entry);
103+
}
73104
}
74105
return (dataToReturn.isEmpty()) ? emptyInfoFieldResult : dataToReturn.toString();
75106
}

qannotate/test/au/edu/qimr/qannotate/nanno/AnnotateTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ public void jsonInputsTSV() throws IOException {
115115
AnnotateUtils.populateAnnotationSources(ais, sources);
116116
assertEquals(1, sources.size());
117117
String annotation = sources.getFirst().getAnnotation(ChrPositionUtils.convertContigAndPositionToLong("1", 655652), new ChrPositionRefAlt("chr1", 655652, 655652, "A", "T"));
118-
assertEquals("HGVSc_VEP=c.1A>C\tHGVSp_VEP=p.Met1?\taaref=M", annotation);
118+
assertEquals("aaref=M\tHGVSc_VEP=c.1A>C\tHGVSp_VEP=p.Met1?", annotation);
119119
}
120120

121121
@Test

0 commit comments

Comments
 (0)