77package org .qcmg .qbamfilter .filter ;
88
99import htsjdk .samtools .SAMRecord ;
10+ import htsjdk .samtools .SAMTag ;
1011import htsjdk .samtools .filter .SamRecordFilter ;
1112
13+ import java .util .BitSet ;
14+
1215public class MDFilter implements SamRecordFilter {
1316 private final boolean mismatchFilter ;
1417 private final int value ;
1518 private final Comparator op ;
19+ public static final short MD_TAG = SAMTag .makeBinaryTag ("MD" );
1620
1721 /**
18- * Initialise cigar operator name, comparator and operator value
19- * @param operatorName : At moment the valid name is "mismatch".
20- * @param comp: see details of valid comparator on org.qcmg.qbamfilter.filter.Comparator.
21- * @param value: a integer string.
22- * @throws Exception
23- * See usage on method filterout.
22+ * Constructs an MDFilter object to filter SAM records based on the mismatch condition in the MD field.
23+ * The filter checks whether the specified mismatch condition satisfies the given comparison criteria.
24+ *
25+ * @param operatorName The name of the operator being used. Only "mismatch" is valid for this filter.
26+ * @param comp The comparator defining the condition to be applied. E.g., GreaterEqual, LessThan, etc.
27+ * @param value A string representing the numeric threshold for the mismatch condition. Must be a valid integer.
28+ * @throws Exception If the value is not a valid integer or if an invalid operator is provided.
2429 */
2530 public MDFilter (String operatorName , Comparator comp , String value ) throws Exception {
2631 try {
2732 this .value = Integer .parseInt (value );
2833 } catch (Exception e ) {
29- throw new Exception ("non integer value used in DM field filter: MD_" +operatorName + comp .getString () + value );
34+ throw new Exception ("non integer value used in MD field filter: MD_" +operatorName + comp .getString () + value );
3035 }
3136 op = comp ;
3237 if (operatorName .equalsIgnoreCase ("mismatch" )){
3338 mismatchFilter = true ;
3439 } else {
35- throw new Exception ("invalid MD String operator: " + operatorName + "in query condition Cigar_ " + operatorName );
40+ throw new Exception ("invalid MD String operator: " + operatorName + "in query condition MD_ " + operatorName );
3641 }
3742 }
38-
3943
40- private static int tallyMDMismatches (String mdData ) {
44+ public static int tallyMDMismatches (String mdData ) {
45+ if (mdData == null || mdData .isEmpty ()) {
46+ return 0 ;
47+ }
48+
4149 int count = 0 ;
42- if (null != mdData ) {
43- for (int i = 0 , size = mdData .length () ; i < size ; ) {
44- char c = mdData .charAt (i );
45- if (isValidMismatch (c )) {
46- count ++;
50+ int size = mdData .length ();
51+ int i = 0 ;
52+
53+ while (i < size ) {
54+ char c = mdData .charAt (i );
55+
56+ if (Character .isDigit (c )) {
57+ i ++;
58+ while (i < size && Character .isDigit (mdData .charAt (i ))) {
59+ i ++;
60+ }
61+ } else if (c == 'A' || c == 'C' || c == 'G' || c == 'T' || c == 'N' ) {
62+ count ++;
63+ i ++;
64+ } else if (c == '^' ) {
65+ // Skip the segment after '^' (indicating a deletion)
66+ i ++;
67+ while (i < size && Character .isLetter (mdData .charAt (i ))) {
4768 i ++;
48- } else if ('^' == c ) {
49- while (++i < size && Character .isLetter (mdData .charAt (i ))) {}
50- } else i ++; // need to increment this or could end up with infinite loop...
69+ }
5170 }
5271 }
5372 return count ;
5473 }
55-
56- private static boolean isValidMismatch (char c ) {
57- return c == 'A' || c == 'C' || c == 'G' || c == 'T' || c == 'N' ;
58- }
5974
6075 /**
6176 * check the record base length with required CigarOperator.
@@ -67,7 +82,7 @@ private static boolean isValidMismatch(char c) {
6782 */
6883 @ Override
6984 public boolean filterOut (final SAMRecord record ){
70- String attribute = (String )record .getAttribute ("MD" );
85+ String attribute = (String )record .getAttribute (MD_TAG );
7186
7287 if (attribute == null ) {
7388 return false ;
@@ -88,5 +103,4 @@ public boolean filterOut(SAMRecord arg0, SAMRecord arg1) {
88103 // TODO Auto-generated method stub
89104 return false ;
90105 }
91-
92106}
0 commit comments