diff --git a/filters/ref-confidence.go b/filters/ref-confidence.go index ca581c1..817dee7 100644 --- a/filters/ref-confidence.go +++ b/filters/ref-confidence.go @@ -45,14 +45,14 @@ func getBasesAndBaseQualitiesAlignedOneToOne(aln *sam.Alignment) (sam.Sequence, paddedBaseQualities := make([]byte, 0, nofRefBases) var pos int32 for _, element := range aln.CIGAR { - if operatorConsumesReadBases[element.Operation] { + if operatorConsumesReadBases.Contains(element.Operation) { end := pos + element.Length - if operatorConsumesReferenceBases[element.Operation] { + if operatorConsumesReferenceBases.Contains(element.Operation) { paddedBases = paddedBases.AppendSlice(nibbles.Nibbles(bases.Slice(int(pos), int(end)))) paddedBaseQualities = append(paddedBaseQualities, baseQualities[pos:end]...) } pos = end - } else if operatorConsumesReferenceBases[element.Operation] { + } else if operatorConsumesReferenceBases.Contains(element.Operation) { for j := int32(0); j < element.Length; j++ { paddedBases = paddedBases.Append('-') paddedBaseQualities = append(paddedBaseQualities, 0) diff --git a/filters/simple-filters.go b/filters/simple-filters.go index 7d7e56a..b8e0d7a 100644 --- a/filters/simple-filters.go +++ b/filters/simple-filters.go @@ -23,6 +23,7 @@ import ( "math/rand" "strconv" + "github.com/elliotwutingfeng/asciiset" "github.com/exascience/elprep/v5/bed" "github.com/exascience/elprep/v5/intervals" "github.com/exascience/elprep/v5/sam" @@ -82,7 +83,7 @@ func RemoveUnmappedReadsStrict(_ *sam.Header) sam.AlignmentFilter { } } -var nonExactMappingOperator = map[byte]bool{'I': true, 'D': true, 'N': true, 'H': true, 'P': true, 'X': true, '=': true} +var nonExactMappingOperator, _ = asciiset.MakeASCIISet("IDNHPX=") // RemoveNonExactMappingReads is a filter that removes all reads that // are not exact matches with the reference (soft-clipping ok), based @@ -90,7 +91,7 @@ var nonExactMappingOperator = map[byte]bool{'I': true, 'D': true, 'N': true, 'H' func RemoveNonExactMappingReads(_ *sam.Header) sam.AlignmentFilter { return func(aln *sam.Alignment) bool { for _, op := range aln.CIGAR { - if nonExactMappingOperator[op.Operation] { + if nonExactMappingOperator.Contains(op.Operation) { return false } } diff --git a/filters/utils.go b/filters/utils.go index 99d8dc5..f18b0cc 100644 --- a/filters/utils.go +++ b/filters/utils.go @@ -21,6 +21,7 @@ package filters import ( "log" + "github.com/elliotwutingfeng/asciiset" "github.com/exascience/elprep/v5/sam" ) @@ -74,13 +75,13 @@ func absInt32(x int32) int32 { } var ( - operatorConsumesReadBases = map[byte]bool{'M': true, 'I': true, 'S': true, '=': true, 'X': true} - operatorConsumesReferenceBases = map[byte]bool{'M': true, 'D': true, 'N': true, '=': true, 'X': true} + operatorConsumesReadBases, _ = asciiset.MakeASCIISet("MIS=X") + operatorConsumesReferenceBases, _ = asciiset.MakeASCIISet("MDN=X") ) func elementStradlessClippedRead(newCigar []sam.CigarOperation, operator byte, relativeClippingPosition, clippedBases int32) []sam.CigarOperation { - if operatorConsumesReadBases[operator] { - if operatorConsumesReferenceBases[operator] { + if operatorConsumesReadBases.Contains(operator) { + if operatorConsumesReferenceBases.Contains(operator) { if relativeClippingPosition > 0 { newCigar = append(newCigar, sam.CigarOperation{ Length: relativeClippingPosition, @@ -279,7 +280,7 @@ func computeReadCoordinateForReferenceCoordinate(cigarVec []sam.CigarOperation, index++ elementLength := int(element.Length) var shift int - if operatorConsumesReferenceBases[element.Operation] || element.Operation == 'S' { + if operatorConsumesReferenceBases.Contains(element.Operation) || element.Operation == 'S' { if refBases+elementLength < goal { shift = elementLength } else { diff --git a/go.mod b/go.mod index fd45a86..136a04f 100644 --- a/go.mod +++ b/go.mod @@ -4,6 +4,7 @@ go 1.18 require ( github.com/bits-and-blooms/bitset v1.2.2 + github.com/elliotwutingfeng/asciiset v0.0.0-20230602022725-51bbb787efab github.com/exascience/pargo v1.1.0 github.com/google/uuid v1.3.0 golang.org/x/sys v0.0.0-20220422013727-9388b58f7150 diff --git a/go.sum b/go.sum index aa56fd8..d9fcf74 100644 --- a/go.sum +++ b/go.sum @@ -1,6 +1,8 @@ github.com/ajstarks/svgo v0.0.0-20180226025133-644b8db467af/go.mod h1:K08gAheRH3/J6wwsYMMT4xOr94bZjxIelGM0+d/wbFw= github.com/bits-and-blooms/bitset v1.2.2 h1:J5gbX05GpMdBjCvQ9MteIg2KKDExr7DrgK+Yc15FvIk= github.com/bits-and-blooms/bitset v1.2.2/go.mod h1:gIdJ4wp64HaoK2YrL1Q5/N7Y16edYb8uY+O0FJTyyDA= +github.com/elliotwutingfeng/asciiset v0.0.0-20230602022725-51bbb787efab h1:h1UgjJdAAhj+uPL68n7XASS6bU+07ZX1WJvVS2eyoeY= +github.com/elliotwutingfeng/asciiset v0.0.0-20230602022725-51bbb787efab/go.mod h1:GLo/8fDswSAniFG+BFIaiSPcK610jyzgEhWYPQwuQdw= github.com/exascience/pargo v1.1.0 h1:pBKDhJYoH2ekBehnPCErSIDoi9DqiWL1V70s5kGZANI= github.com/exascience/pargo v1.1.0/go.mod h1:8GeMktPA5KycHMfqXXOfiQzlazfbFSURzGZIJUO0tfk= github.com/fogleman/gg v1.2.1-0.20190220221249-0403632d5b90/go.mod h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k=