@@ -62,8 +62,6 @@ public AnnotationSourceSnpEffVCF(RecordReader<String> reader, int chrPositionInR
6262 @ Override
6363 public String getAnnotation (long requestedCpAsLong , ChrPosition requestedCp ) {
6464
65- // logger.debug(reader.getFile().getName() + ": requestedCp is " + (null != requestedCp ? requestedCp.toIGVString() : null) + ", currentCP: " + (null != currentCP ? currentCP.toIGVString() : null) + ", nextCP: " + (null != nextCP ? nextCP.toIGVString() : null));
66-
6765 /*
6866 * check to see if the records we currently have stored are a match
6967 */
@@ -73,70 +71,21 @@ public String getAnnotation(long requestedCpAsLong, ChrPosition requestedCp) {
7371 * we match on position
7472 * lets see if there are any records that match on ref and alt
7573 */
76- // return getAnnotationsFromRecords(requestedCp);
77- if (requestedCp instanceof ChrPositionRefAlt reqCpRefAlt ) {
78- String reqRef = reqCpRefAlt .getRef ();
79- String reqAlt = reqCpRefAlt .getAlt ();
80- for (String rec : currentRecords ) {
81- String [] recArray = TabTokenizer .tokenize (rec , DEFAULT_DELIMITER );
82- String recRef = recArray [refPositionInFile ];
83- String recAlt = recArray [altPositionInFile ];
84-
85- if (recAlt .contains ("," )) {
86- String [] recAltArray = recAlt .split ("," );
87- for (String recAltValue : recAltArray ) {
88- if (reqRef .equals (recRef ) && reqAlt .equals (recAltValue )) {
89- return annotationToReturnWithAlt (rec , recAltValue );
90- }
91- }
92- } else {
93- if (reqRef .equals (recRef ) && reqAlt .equals (recAlt )) {
94- return annotationToReturnWithAlt (rec , recAlt );
95- }
96- }
97- }
98- }
99-
74+ return getAnnotationsFromRecords (requestedCp );
10075 } else {
10176 int matchWithNextCP = Long .compare (requestedCpAsLong , nextCPAsLong );
10277 if (nextCPAsLong > -1 && matchWithNextCP < 0 ) {
10378
10479 } else {
10580
106- // logger.debug(reader.getFile().getName() + ": getting next record. requestedCp: " + (null != requestedCp ? requestedCp.toIGVString() : null) + ", currentCP: " + (null != currentCP ? currentCP.toIGVString() : null));
10781 getNextRecord (requestedCpAsLong , matchWithNextCP );
10882 if (requestedCpAsLong == currentCPAsLong ) {
10983 /*
11084 * we match on position
11185 * lets see if there are any records that match on ref and alt
11286 */
113- if (requestedCp instanceof ChrPositionRefAlt reqCpRefAlt ) {
114- String reqRef = reqCpRefAlt .getRef ();
115- String reqAlt = reqCpRefAlt .getAlt ();
116- for (String rec : currentRecords ) {
117- String [] recArray = TabTokenizer .tokenize (rec , DEFAULT_DELIMITER );
118- String recRef = recArray [refPositionInFile ];
119- String recAlt = recArray [altPositionInFile ];
120-
121- if (recAlt .contains ("," )) {
122- String [] recAltArray = recAlt .split ("," );
123- for (String recAltValue : recAltArray ) {
124- if (reqRef .equals (recRef ) && reqAlt .equals (recAltValue )) {
125- return annotationToReturnWithAlt (rec , recAltValue );
126- }
127- }
128- } else {
129- if (reqRef .equals (recRef ) && reqAlt .equals (recAlt )) {
130- return annotationToReturnWithAlt (rec , recAlt );
131- }
132- }
133- }
134- }
135- // return getAnnotationsFromRecords(requestedCp);
87+ return getAnnotationsFromRecords (requestedCp );
13688 }
137- /*
138- * requestedCP and currentCP are not equal
139- */
14089 }
14190 }
14291 return annotationToReturn (null );
@@ -151,7 +100,7 @@ private String getAnnotationsFromRecords(ChrPosition requestedCp){
151100 String recRef = recArray [refPositionInFile ];
152101 String recAlt = recArray [altPositionInFile ];
153102
154- if (recAlt .contains ( "," ) ) {
103+ if (recAlt .indexOf ( ',' ) >= 0 ) {
155104 String [] recAltArray = recAlt .split ("," );
156105 for (String recAltValue : recAltArray ) {
157106 if (reqRef .equals (recRef ) && reqAlt .equals (recAltValue )) {
@@ -177,7 +126,6 @@ public String annotationToReturn(String[] record) {
177126 * dealing with a vcf file and assuming that the required annotation fields are in the INFO field
178127 * so get that and go from there.
179128 */
180- // String[] recordArray = record.split("\t");
181129 String info = record [7 ];
182130 String alt = record [4 ];
183131
@@ -221,32 +169,31 @@ public static String extractFieldsFromInfoField(String info, List<String> fields
221169 if (StringUtils .isNullOrEmpty (worstConsequence )) {
222170 return emptyInfoFieldResult ;
223171 }
224-
225172 /*
226- * we have our consequence
227- * split by pipe and then get our fields
173+ * we have our consequences (comma-delimited)
174+ * split by comma into consequences, then by pipe into fields
228175 */
229- String [] consequenceArray = TabTokenizer . tokenize ( worstConsequence , '|' );
176+ String [] consequences = worstConsequence . split ( "," );
230177
231178 for (String af : fields ) {
232179 if (!StringUtils .isNullOrEmpty (af )) {
233180
234- /*
235- * get position from map
236- */
237181 String aflc = af .toLowerCase ();
238182 Integer arrayPosition = SNP_EFF_ANNOTATION_FIELDS_AND_POSITIONS .get (aflc );
239- if (null != arrayPosition && arrayPosition >= 0 && arrayPosition < consequenceArray .length ) {
240- /*
241- * good
242- */
243- String annotation = consequenceArray [arrayPosition ];
244- dataToReturn .append ((!dataToReturn .isEmpty ()) ? FIELD_DELIMITER_TAB + af + "=" + annotation : af + "=" + annotation );
245- } else {
246- // System.out.println("Could not find field [" + af + "] in SNP_EFF_ANNOTATION_FIELDS_AND_POSITIONS map!");
247- // System.out.println("arrayPosition.intValue(): " + arrayPosition.intValue() + ", consequenceArray.length: " + consequenceArray.length);
248- }
249183
184+ if (null != arrayPosition ) {
185+ StringBuilder fieldValues = new StringBuilder ();
186+ for (String consequence : consequences ) {
187+ String [] consequenceArray = TabTokenizer .tokenize (consequence , '|' );
188+ if (arrayPosition >= 0 && arrayPosition < consequenceArray .length ) {
189+ String annotation = consequenceArray [arrayPosition ];
190+ fieldValues .append (fieldValues .isEmpty () ? annotation : "|" + annotation );
191+ }
192+ }
193+ dataToReturn .append ((!dataToReturn .isEmpty ())
194+ ? FIELD_DELIMITER_TAB + af + "=" + fieldValues
195+ : af + "=" + fieldValues );
196+ }
250197 }
251198 }
252199 return (dataToReturn .isEmpty ()) ? emptyInfoFieldResult : dataToReturn .toString ();
@@ -283,19 +230,33 @@ public static String getWorstConsequence(String info, String alt) {
283230 * Pick the first one as that is the one with the highest effect as decreed by snpEff
284231 */
285232 int annoIndex = info .indexOf ("ANN=" );
233+ if (annoIndex < 0 ) {
234+ return "" ;
235+ }
286236 int end = info .indexOf (FIELD_DELIMITER_SEMI_COLON , annoIndex );
287237 String ann = info .substring (annoIndex + 4 , end == -1 ? info .length () : end );
288238
289239
290240 String [] annArray = ann .split ("," );
291- String worstConsequence = "" ;
241+ Map < String , String > worstByGene = new java . util . LinkedHashMap <>() ;
292242 for (String aa : annArray ) {
293- if (aa .startsWith (alt )) {
294- worstConsequence = aa ;
295- break ;
243+ int pipeIndex = aa .indexOf ('|' );
244+ if (pipeIndex <= 0 ) {
245+ // Malformed ANN entry or missing allele token; skip
246+ continue ;
247+ }
248+ String alleleToken = aa .substring (0 , pipeIndex );
249+ if (alleleToken .equals (alt )) {
250+ String [] parts = TabTokenizer .tokenize (aa , '|' );
251+ if (parts .length > 3 ) {
252+ String gene = parts [3 ];
253+ if (!StringUtils .isNullOrEmpty (gene ) && !worstByGene .containsKey (gene )) {
254+ worstByGene .put (gene , aa );
255+ }
256+ }
296257 }
297258 }
298- return worstConsequence ;
259+ return String . join ( "," , worstByGene . values ()) ;
299260 }
300261
301262 @ Override
0 commit comments