Search in sources :

Example 46 with IndexedFastaSequenceFile

use of htsjdk.samtools.reference.IndexedFastaSequenceFile in project jvarkit by lindenb.

the class NoZeroVariationVCF method doWork.

@Override
public int doWork(List<String> args) {
    if (faidx == null) {
        LOG.error("Indexed fasta file missing.");
        return -1;
    }
    try {
        this.indexedFastaSequenceFile = new IndexedFastaSequenceFile(faidx);
        return doVcfToVcf(args, this.outputFile);
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(this.indexedFastaSequenceFile);
        this.indexedFastaSequenceFile = null;
    }
}
Also used : IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile)

Example 47 with IndexedFastaSequenceFile

use of htsjdk.samtools.reference.IndexedFastaSequenceFile in project jvarkit by lindenb.

the class ReferenceToVCF method doWork.

@Override
public int doWork(List<String> args) {
    if (this.bedFile != null) {
        if (limitBed == null)
            limitBed = new IntervalTreeMap<Boolean>();
        try {
            Pattern tab = Pattern.compile("[\t]");
            BufferedReader r = IOUtils.openFileForBufferedReading(this.bedFile);
            String line;
            while ((line = r.readLine()) != null) {
                if (BedLine.isBedHeader(line))
                    continue;
                if (line.startsWith("#") || line.isEmpty())
                    continue;
                String[] tokens = tab.split(line, 4);
                limitBed.put(new Interval(tokens[0], 1 + Integer.parseInt(tokens[1]), 1 + Integer.parseInt(tokens[2])), true);
            }
            CloserUtil.close(r);
        } catch (Exception err) {
            LOG.error(err);
            return -1;
        }
    }
    Random random = new Random(0L);
    VariantContextWriter out = null;
    try {
        final IndexedFastaSequenceFile fasta = new IndexedFastaSequenceFile(new File(oneAndOnlyOneFile(args)));
        SAMSequenceDictionary dict = fasta.getSequenceDictionary();
        out = super.openVariantContextWriter(this.outputFile);
        SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict);
        VCFHeader header = new VCFHeader();
        header.setSequenceDictionary(dict);
        out.writeHeader(header);
        final List<List<Allele>> combination = new ArrayList<List<Allele>>(4);
        // always keep REF as first allele please
        combination.add(Arrays.asList(Allele.create("A", true), Allele.create("C", false), Allele.create("G", false), Allele.create("T", false)));
        combination.add(Arrays.asList(Allele.create("C", true), Allele.create("A", false), Allele.create("G", false), Allele.create("T", false)));
        combination.add(Arrays.asList(Allele.create("G", true), Allele.create("A", false), Allele.create("C", false), Allele.create("T", false)));
        combination.add(Arrays.asList(Allele.create("T", true), Allele.create("A", false), Allele.create("C", false), Allele.create("G", false)));
        for (SAMSequenceRecord ssr : dict.getSequences()) {
            GenomicSequence genome = new GenomicSequence(fasta, ssr.getSequenceName());
            if (this.limitBed != null) {
                Interval interval = new Interval(ssr.getSequenceName(), 1, genome.length());
                if (!this.limitBed.containsOverlapping(interval))
                    continue;
            }
            for (int n = 0; n < genome.length(); ++n) {
                progress.watch(ssr.getSequenceIndex(), n);
                List<Allele> alleles = null;
                byte ref = (byte) genome.charAt(n);
                switch(ref) {
                    case 'a':
                    case 'A':
                        alleles = combination.get(0);
                        break;
                    case 'c':
                    case 'C':
                        alleles = combination.get(1);
                        break;
                    case 'g':
                    case 'G':
                        alleles = combination.get(2);
                        break;
                    case 't':
                    case 'T':
                        alleles = combination.get(3);
                        break;
                    default:
                        break;
                }
                if (alleles == null)
                    continue;
                if (this.limitBed != null) {
                    Interval interval = new Interval(ssr.getSequenceName(), n + 1, n + 1);
                    if (!this.limitBed.containsOverlapping(interval))
                        continue;
                }
                if (!disjoint_alts) {
                    VariantContextBuilder vcb = new VariantContextBuilder();
                    vcb.chr(ssr.getSequenceName());
                    vcb.start(n + 1);
                    vcb.stop(n + 1);
                    vcb.alleles(alleles);
                    out.add(vcb.make());
                } else {
                    for (// index 0 is always REF
                    int a = 1; // index 0 is always REF
                    a < 4; // index 0 is always REF
                    ++a) {
                        VariantContextBuilder vcb = new VariantContextBuilder();
                        vcb.chr(ssr.getSequenceName());
                        vcb.start(n + 1);
                        vcb.stop(n + 1);
                        // index 0 is always REF
                        vcb.alleles(Arrays.asList(alleles.get(0), alleles.get(a)));
                        out.add(vcb.make());
                    }
                }
                if (insertion_size > 0 && n + 1 < genome.length()) {
                    alleles = new ArrayList<Allele>(2);
                    // REFERENCE
                    alleles.add(Allele.create("" + genome.charAt(n) + genome.charAt(n + 1), true));
                    StringBuilder sb = new StringBuilder(insertion_size + 2);
                    sb.append(genome.charAt(n));
                    for (int n2 = 0; n2 < insertion_size; ++n2) {
                        switch(random.nextInt(4)) {
                            case 0:
                                sb.append('A');
                                break;
                            case 1:
                                sb.append('C');
                                break;
                            case 2:
                                sb.append('G');
                                break;
                            case 3:
                                sb.append('T');
                                break;
                        }
                    }
                    sb.append(genome.charAt(n + 1));
                    alleles.add(Allele.create(sb.toString(), false));
                    VariantContextBuilder vcb = new VariantContextBuilder();
                    vcb.chr(ssr.getSequenceName());
                    vcb.start(n + 1);
                    vcb.alleles(alleles);
                    vcb.computeEndFromAlleles(alleles, n + 1);
                    out.add(vcb.make());
                }
                if (deletion_size > 0 && n + deletion_size + 1 < genome.length()) {
                    alleles = new ArrayList<Allele>(2);
                    // REF
                    StringBuilder sb = new StringBuilder(deletion_size + 2);
                    sb.append(genome.charAt(n));
                    int lastpos = n + 1;
                    for (int n2 = 0; n2 < deletion_size; ++n2, lastpos++) {
                        sb.append(genome.charAt(lastpos));
                    }
                    sb.append(genome.charAt(lastpos));
                    alleles.add(Allele.create(sb.toString(), true));
                    alleles.add(Allele.create("" + genome.charAt(n) + genome.charAt(lastpos), false));
                    VariantContextBuilder vcb = new VariantContextBuilder();
                    vcb.chr(ssr.getSequenceName());
                    vcb.start(n + 1);
                    vcb.alleles(alleles);
                    vcb.computeEndFromAlleles(alleles, n + 1);
                    out.add(vcb.make());
                }
                if (out.checkError())
                    break;
            }
            if (out.checkError())
                break;
        }
        progress.finish();
        return 0;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(out);
    }
}
Also used : ArrayList(java.util.ArrayList) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) Random(java.util.Random) ArrayList(java.util.ArrayList) List(java.util.List) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) Pattern(java.util.regex.Pattern) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) Allele(htsjdk.variant.variantcontext.Allele) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) BufferedReader(java.io.BufferedReader) IntervalTreeMap(htsjdk.samtools.util.IntervalTreeMap) File(java.io.File) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) Interval(htsjdk.samtools.util.Interval)

Example 48 with IndexedFastaSequenceFile

use of htsjdk.samtools.reference.IndexedFastaSequenceFile in project jvarkit by lindenb.

the class CopyNumber01 method doWork.

@Override
public int doWork(final List<String> args) {
    if (this.refFile == null) {
        LOG.error("Undefined REF file");
        return -1;
    }
    if (this.archiveFile == null) {
        LOG.error("Undefined output file.");
        return -1;
    }
    if (!StringUtil.isBlank(prefix)) {
        if (!(prefix.endsWith(".") || prefix.endsWith("_"))) {
            prefix = prefix + ".";
        }
    }
    SamReader samReader = null;
    ArchiveFactory archive = null;
    try {
        final String input = oneAndOnlyOneFile(args);
        samReader = super.openSamReader(input);
        if (!samReader.hasIndex()) {
            LOG.error("file is not indexed " + input);
            return -1;
        }
        final String sampleName = samReader.getFileHeader().getReadGroups().stream().map(RG -> RG.getSample()).filter(S -> !StringUtil.isBlank(S)).findFirst().orElse("SAMPLE");
        final SAMFileHeader header = samReader.getFileHeader();
        this.samDictionary = header.getSequenceDictionary();
        if (this.samDictionary == null || this.samDictionary.isEmpty()) {
            throw new JvarkitException.DictionaryMissing(input);
        }
        /* loading REF Reference */
        this.indexedFastaSequenceFile = new IndexedFastaSequenceFile(refFile);
        final SAMSequenceDictionary dict = this.indexedFastaSequenceFile.getSequenceDictionary();
        if (dict == null) {
            throw new JvarkitException.DictionaryMissing(refFile.getPath());
        }
        this.sam2faiContigNameConverter = ContigNameConverter.fromDictionaries(this.samDictionary, dict);
        this.sam2faiContigNameConverter.setOnNotFound(OnNotFound.SKIP);
        archive = ArchiveFactory.open(this.archiveFile);
        PrintWriter mkw = archive.openWriter(this.prefix + "cnv01.mk");
        mkw.println("## " + this.getProgramCommandLine());
        mkw.println(".PHONY=all all2");
        mkw.println("TARGETS=");
        mkw.println("SCREEN_WIDTH?=2600");
        mkw.println("SCREEN_HEIGHT?=1000");
        mkw.println("GNUPLOT?=gnuplot");
        mkw.println("all : all2");
        final List<String> all_chrom_files = new ArrayList<>();
        double maxDepth = 0.0;
        for (final SAMSequenceRecord ssr : this.samDictionary.getSequences()) {
            if (!StringUtil.isBlank(this.limitToChrom) && !this.limitToChrom.equals(ssr.getSequenceName())) {
                LOG.info("Skipping " + ssr.getSequenceName() + "....");
                continue;
            }
            if (this.sam2faiContigNameConverter.apply(ssr.getSequenceName()) == null) {
                LOG.info("Ignoring " + ssr.getSequenceName() + " because it's not in REF");
                continue;
            }
            if (ignoreChromosomeName(ssr.getSequenceName())) {
                LOG.info("Ignoring " + ssr.getSequenceName());
                continue;
            }
            LOG.info("processing chromosome " + ssr.getSequenceName());
            ContigProcessor proc = new ContigProcessor(samReader, ssr, sampleName);
            proc.run();
            final String tsvFilename = this.prefix + proc.getContig() + "." + proc.sampleName + ".bed";
            final String pngFilename = "$(addsuffix .png,$(basename " + tsvFilename + "))";
            final double depth = proc.items.stream().mapToDouble(I -> I.norm_depth).max().orElse(4);
            maxDepth = Math.max(maxDepth, depth);
            all_chrom_files.add(tsvFilename);
            mkw.println("TARGETS+=" + pngFilename);
            mkw.println(pngFilename + ":" + tsvFilename);
            mkw.println("\techo 'set terminal png truecolor size ${SCREEN_WIDTH},${SCREEN_HEIGHT};" + "set key autotitle columnhead;" + "set datafile separator \"\t\";" + "set title \"" + proc.sampleName + " " + ssr.getSequenceName() + "\";" + "set ylabel \"Normalized Depth - Number of copies\";" + "set xlabel \"Position on " + ssr.getSequenceName() + "\";" + // + "set style circle radius 0.02;"
            "set nokey;" + "set yrange [0:" + Math.min(4, Math.ceil(depth)) + "];" + "set xrange [1:" + ssr.getSequenceLength() + "];" + "set xtic rotate by 90 right;" + "set output \"$@\";" + // + "plot \"$<\" using 1:2:1 w points linecolor variable "
            "plot \"$<\" using 2:7 w lines " + "' | ${GNUPLOT}");
            PrintWriter pw = archive.openWriter(tsvFilename);
            proc.saveCoverage(pw);
            pw.flush();
            pw.close();
            proc = null;
        }
        samReader.close();
        final String pngFilename = this.prefix + "wholeGenome.png";
        mkw.println("TARGETS+=" + pngFilename);
        mkw.println(pngFilename + ":" + String.join(" ", all_chrom_files));
        mkw.println("\tgrep -v '^#' $^ | cut -f 1,4,7 | sed " + samDictionary.getSequences().stream().map(SSR -> " -e 's/^" + SSR.getSequenceName() + "\t/" + SSR.getSequenceIndex() + "\t/' ").collect(Collectors.joining()) + "| sort -t '\t' -k2,2n > $(addsuffix .tmp.tsv,$@)");
        mkw.println("\techo 'set terminal png truecolor size ${SCREEN_WIDTH},${SCREEN_HEIGHT};" + "set datafile separator \"\t\";" + "set title \"" + sampleName + " (Whole Genome)\";" + "set ylabel \"Normalized Depth - Number of copies\";" + "set xlabel \"Genomic Position\";" + "set yrange [0:" + Math.min(4, Math.ceil(maxDepth)) + "];" + "set xrange [1:" + this.samDictionary.getReferenceLength() + ".0];" + "set xtic rotate by 90 right;" + "set nokey;" + "set output \"$@\";" + "plot \"$(addsuffix .tmp.tsv,$@)\" using 2:3:1 w points linecolor variable " + "' | ${GNUPLOT}");
        mkw.println("\trm -f $(addsuffix .tmp.tsv,$@)");
        mkw.println("all2: ${TARGETS}");
        mkw.flush();
        mkw.close();
        mkw = null;
        archive.close();
        archive = null;
        return 0;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(samReader);
        CloserUtil.close(archive);
        CloserUtil.close(this.indexedFastaSequenceFile);
    }
}
Also used : Arrays(java.util.Arrays) NevilleInterpolator(org.apache.commons.math3.analysis.interpolation.NevilleInterpolator) LineIterator(htsjdk.tribble.readers.LineIterator) Program(com.github.lindenb.jvarkit.util.jcommander.Program) CigarElement(htsjdk.samtools.CigarElement) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) UnivariateInterpolator(org.apache.commons.math3.analysis.interpolation.UnivariateInterpolator) SAMFileHeader(htsjdk.samtools.SAMFileHeader) StringUtil(htsjdk.samtools.util.StringUtil) Map(java.util.Map) Median(org.apache.commons.math3.stat.descriptive.rank.Median) MathIllegalArgumentException(org.apache.commons.math3.exception.MathIllegalArgumentException) CloserUtil(htsjdk.samtools.util.CloserUtil) PrintWriter(java.io.PrintWriter) LinearInterpolator(org.apache.commons.math3.analysis.interpolation.LinearInterpolator) IntervalTreeMap(htsjdk.samtools.util.IntervalTreeMap) LoessInterpolator(org.apache.commons.math3.analysis.interpolation.LoessInterpolator) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) SamFilterParser(com.github.lindenb.jvarkit.util.bio.samfilter.SamFilterParser) Collectors(java.util.stream.Collectors) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) SAMRecord(htsjdk.samtools.SAMRecord) List(java.util.List) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) UnivariateFunction(org.apache.commons.math3.analysis.UnivariateFunction) GZIPOutputStream(java.util.zip.GZIPOutputStream) Pattern(java.util.regex.Pattern) BBFileReader(org.broad.igv.bbfile.BBFileReader) SamReaderFactory(htsjdk.samtools.SamReaderFactory) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) SplineInterpolator(org.apache.commons.math3.analysis.interpolation.SplineInterpolator) Cigar(htsjdk.samtools.Cigar) SequenceUtil(htsjdk.samtools.util.SequenceUtil) GcPercentAndDepth(com.github.lindenb.jvarkit.tools.misc.GcPercentAndDepth) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Parameter(com.beust.jcommander.Parameter) BedLineCodec(com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec) HashMap(java.util.HashMap) Function(java.util.function.Function) ValidationStringency(htsjdk.samtools.ValidationStringency) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Interval(htsjdk.samtools.util.Interval) OnNotFound(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter.OnNotFound) AbstractUnivariateStatistic(org.apache.commons.math3.stat.descriptive.AbstractUnivariateStatistic) Mean(org.apache.commons.math3.stat.descriptive.moment.Mean) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) BEDCodec(htsjdk.tribble.bed.BEDCodec) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) Locatable(htsjdk.samtools.util.Locatable) Iterator(java.util.Iterator) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) DividedDifferenceInterpolator(org.apache.commons.math3.analysis.interpolation.DividedDifferenceInterpolator) FileOutputStream(java.io.FileOutputStream) IOException(java.io.IOException) IntervalTree(htsjdk.samtools.util.IntervalTree) Identity(org.apache.commons.math3.analysis.function.Identity) DimensionMismatchException(org.apache.commons.math3.exception.DimensionMismatchException) SamReader(htsjdk.samtools.SamReader) File(java.io.File) SamRecordFilter(htsjdk.samtools.filter.SamRecordFilter) WigItem(org.broad.igv.bbfile.WigItem) BufferedReader(java.io.BufferedReader) BitSet(java.util.BitSet) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) Comparator(java.util.Comparator) Collections(java.util.Collections) BigWigIterator(org.broad.igv.bbfile.BigWigIterator) ArchiveFactory(com.github.lindenb.jvarkit.io.ArchiveFactory) ArchiveFactory(com.github.lindenb.jvarkit.io.ArchiveFactory) ArrayList(java.util.ArrayList) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) MathIllegalArgumentException(org.apache.commons.math3.exception.MathIllegalArgumentException) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) IOException(java.io.IOException) DimensionMismatchException(org.apache.commons.math3.exception.DimensionMismatchException) SamReader(htsjdk.samtools.SamReader) SAMFileHeader(htsjdk.samtools.SAMFileHeader) PrintWriter(java.io.PrintWriter)

Example 49 with IndexedFastaSequenceFile

use of htsjdk.samtools.reference.IndexedFastaSequenceFile in project jvarkit by lindenb.

the class SamFixCigar method doWork.

@Override
public int doWork(List<String> args) {
    if (this.faidx == null) {
        LOG.error("Reference was not specified.");
        return -1;
    }
    GenomicSequence genomicSequence = null;
    SamReader sfr = null;
    SAMFileWriter sfw = null;
    try {
        this.indexedFastaSequenceFile = new IndexedFastaSequenceFile(faidx);
        sfr = openSamReader(oneFileOrNull(args));
        final SAMFileHeader header = sfr.getFileHeader();
        sfw = this.writingBamArgs.setReferenceFile(this.faidx).openSAMFileWriter(outputFile, header, true);
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header);
        final List<CigarElement> newCigar = new ArrayList<CigarElement>();
        final SAMRecordIterator iter = sfr.iterator();
        while (iter.hasNext()) {
            final SAMRecord rec = progress.watch(iter.next());
            Cigar cigar = rec.getCigar();
            byte[] bases = rec.getReadBases();
            if (rec.getReadUnmappedFlag() || cigar == null || cigar.getCigarElements().isEmpty() || bases == null) {
                sfw.addAlignment(rec);
                continue;
            }
            if (genomicSequence == null || genomicSequence.getSAMSequenceRecord().getSequenceIndex() != rec.getReferenceIndex()) {
                genomicSequence = new GenomicSequence(indexedFastaSequenceFile, rec.getReferenceName());
            }
            newCigar.clear();
            int refPos1 = rec.getAlignmentStart();
            int readPos0 = 0;
            for (final CigarElement ce : cigar.getCigarElements()) {
                final CigarOperator op = ce.getOperator();
                if (op.equals(CigarOperator.M)) {
                    for (int i = 0; i < ce.getLength(); ++i) {
                        char c1 = Character.toUpperCase((char) bases[readPos0]);
                        char c2 = Character.toUpperCase(refPos1 - 1 < genomicSequence.length() ? genomicSequence.charAt(refPos1 - 1) : '*');
                        if (c2 == 'N' || c1 == c2) {
                            newCigar.add(new CigarElement(1, CigarOperator.EQ));
                        } else {
                            newCigar.add(new CigarElement(1, CigarOperator.X));
                        }
                        refPos1++;
                        readPos0++;
                    }
                } else {
                    newCigar.add(ce);
                    if (op.consumesReadBases())
                        readPos0 += ce.getLength();
                    if (op.consumesReferenceBases())
                        refPos1 += ce.getLength();
                }
            }
            int i = 0;
            while (i < newCigar.size()) {
                final CigarOperator op1 = newCigar.get(i).getOperator();
                final int length1 = newCigar.get(i).getLength();
                if (i + 1 < newCigar.size() && newCigar.get(i + 1).getOperator() == op1) {
                    final CigarOperator op2 = newCigar.get(i + 1).getOperator();
                    int length2 = newCigar.get(i + 1).getLength();
                    newCigar.set(i, new CigarElement(length1 + length2, op2));
                    newCigar.remove(i + 1);
                } else {
                    ++i;
                }
            }
            cigar = new Cigar(newCigar);
            // info("changed "+rec.getCigarString()+" to "+newCigarStr+" "+rec.getReadName()+" "+rec.getReadString());
            rec.setCigar(cigar);
            sfw.addAlignment(rec);
        }
        progress.finish();
        return RETURN_OK;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(this.indexedFastaSequenceFile);
        CloserUtil.close(sfr);
        CloserUtil.close(sfw);
    }
}
Also used : SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) SAMFileWriter(htsjdk.samtools.SAMFileWriter) ArrayList(java.util.ArrayList) CigarOperator(htsjdk.samtools.CigarOperator) CigarElement(htsjdk.samtools.CigarElement) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) SamReader(htsjdk.samtools.SamReader) Cigar(htsjdk.samtools.Cigar) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader)

Example 50 with IndexedFastaSequenceFile

use of htsjdk.samtools.reference.IndexedFastaSequenceFile in project jvarkit by lindenb.

the class TView method paint.

void paint(final PrintStream out) {
    final Colorizer colorizer;
    switch(this.formatOut) {
        case html:
            colorizer = new HtmlColorizer(out);
            break;
        case tty:
            colorizer = new AnsiColorizer(out);
            break;
        case plain:
            colorizer = new Colorizer(out);
            break;
        default:
            throw new IllegalStateException();
    }
    if (interval == null) {
        LOG.warn("No interval defined");
        return;
    }
    final GenomicSequence contigSequence;
    final Function<Integer, Character> refPosToBase;
    if (indexedFastaSequenceFile != null) {
        final SAMSequenceDictionary dict = SAMSequenceDictionaryExtractor.extractDictionary(referenceFile);
        if (dict.getSequence(this.interval.getContig()) == null) {
            LOG.warn("No interval with contig " + interval + " in REF");
            return;
        }
        contigSequence = new GenomicSequence(indexedFastaSequenceFile, interval.getContig());
        refPosToBase = POS -> {
            if (POS < 0 || POS >= contigSequence.length())
                return 'N';
            return contigSequence.charAt(POS);
        };
    } else {
        contigSequence = null;
        refPosToBase = POS -> 'N';
    }
    /**
     * test if genomic position is in interval
     */
    final Predicate<Integer> testInInterval = new Predicate<Integer>() {

        @Override
        public boolean test(final Integer pos) {
            return interval.getStart() <= pos && pos <= interval.getEnd();
        }
    };
    final int pixelWidth = this.interval.length();
    final Map<Integer, Integer> genomicpos2insertlen = new TreeMap<>();
    final Map<String, List<SAMRecord>> group2record = new TreeMap<>();
    for (final SamReader samReader : this.samReaders) {
        SAMRecordIterator iter = samReader.query(this.interval.getContig(), this.interval.getStart(), this.interval.getEnd(), false);
        while (iter.hasNext()) {
            final SAMRecord rec = iter.next();
            if (rec.getReadUnmappedFlag())
                continue;
            if (rec.getCigar() == null)
                continue;
            if (getRecordFilter().filterOut(rec))
                continue;
            if (!rec.getContig().equals(interval.getContig()))
                continue;
            if (right().apply(rec) < this.interval.getStart())
                continue;
            if (this.interval.getEnd() < left().apply(rec))
                continue;
            String group = this.groupBy.getPartion(rec);
            if (group == null || group.isEmpty()) {
                group = "undefined_" + this.groupBy.name();
            }
            List<SAMRecord> records = group2record.get(group);
            if (records == null) {
                records = new ArrayList<>();
                group2record.put(group, records);
            }
            records.add(rec);
            // loop over cigar, get the longest insert
            int refpos = rec.getAlignmentStart();
            for (final CigarElement ce : rec.getCigar().getCigarElements()) {
                if (!this.showInsertions)
                    break;
                final CigarOperator op = ce.getOperator();
                if (op.equals(CigarOperator.I) && testInInterval.test(refpos)) {
                    final Integer longestInsert = genomicpos2insertlen.get(refpos);
                    if (longestInsert == null || longestInsert.compareTo(ce.getLength()) < 0) {
                        genomicpos2insertlen.put(refpos, ce.getLength());
                    }
                }
                if (op.consumesReferenceBases()) {
                    refpos += ce.getLength();
                }
                if (refpos > interval.getEnd())
                    break;
            }
        }
        CloserUtil.close(iter);
        CloserUtil.close(samReader);
    }
    /**
     * compute where are the insertions
     */
    // LOG.debug(genomicpos2insertlen);
    final Predicate<Integer> insertIsPresentAtX = SCREENX -> {
        int x = 0;
        int ref = interval.getStart();
        while (x < pixelWidth) {
            if (x > SCREENX)
                return false;
            final Integer insertLen = genomicpos2insertlen.get(ref);
            if (insertLen == null) {
                ++x;
                ++ref;
            } else {
                if (x <= SCREENX && SCREENX < x + insertLen)
                    return true;
                // (+1) I DON'T UNDERSTAND WHY, BUT IT WORKS
                x += (insertLen + 1);
                ++ref;
            }
        }
        return false;
    };
    final Function<Character, AnsiColor> base2ansiColor = BASE -> {
        switch(Character.toUpperCase(BASE)) {
            case 'A':
                return AnsiColor.BLUE;
            case 'T':
                return AnsiColor.GREEN;
            case 'G':
                return AnsiColor.CYAN;
            case 'C':
                return AnsiColor.YELLOW;
            default:
                return null;
        }
    };
    /**
     * print interval title
     */
    out.println(interval.getContig() + ":" + interval.getStart() + "-" + interval.getEnd());
    /**
     * paint base position
     */
    int ref = this.interval.getStart();
    int x = 0;
    out.print(margin("POS:"));
    while (x < pixelWidth) {
        if (insertIsPresentAtX.test(x)) {
            colorizer.pen(AnsiColor.RED).print("^");
            ++x;
        } else if ((ref - this.interval.getStart()) % 10 == 0) {
            final String f = String.format("%d", ref);
            for (int i = 0; i < f.length() && x < pixelWidth; ++i) {
                colorizer.pen(AnsiColor.GREEN).print(f.charAt(i));
                if (!insertIsPresentAtX.test(x))
                    ++ref;
                ++x;
            }
        } else {
            out.print(".");
            ++ref;
            ++x;
        }
    }
    out.println();
    /* paint ref base */
    out.print(margin("REF:"));
    ref = this.interval.getStart();
    x = 0;
    while (x < pixelWidth) {
        if (insertIsPresentAtX.test(x)) {
            colorizer.paper(AnsiColor.YELLOW).print("*");
            ++x;
        } else {
            char refBase = refPosToBase.apply(ref - 1);
            colorizer.pen(base2ansiColor.apply(refBase)).print(refBase);
            ++ref;
            ++x;
        }
    }
    out.println();
    /* loop over samples **/
    for (final String groupName : group2record.keySet()) {
        if (this.maxReadRowPerGroup == 0)
            continue;
        final ConsensusBuilder consensus = new ConsensusBuilder();
        int y_group = 0;
        final List<List<SAMRecord>> rows = new ArrayList<>();
        out.println(margin(""));
        switch(this.layoutReads) {
            case name:
                {
                    rows.addAll(group2record.get(groupName).stream().sorted((R1, R2) -> R1.getReadName().compareTo(R2.getReadName())).map(R -> Collections.singletonList(R)).collect(Collectors.toList()));
                    break;
                }
            default:
                {
                    /* pileup reads */
                    for (final SAMRecord rec : group2record.get(groupName)) {
                        int y = 0;
                        for (y = 0; y < rows.size(); ++y) {
                            final List<SAMRecord> row = rows.get(y);
                            final SAMRecord last = row.get(row.size() - 1);
                            if (right().apply(last) + this.distance_between_reads < left().apply(rec)) {
                                row.add(rec);
                                break;
                            }
                        }
                        if (y == rows.size()) {
                            final List<SAMRecord> row = new ArrayList<>();
                            row.add(rec);
                            rows.add(row);
                        }
                    }
                    break;
                }
        }
        // each row is only one read, so we need to print the groupName
        if (layoutReads == LayoutReads.name) {
            out.print(margin(groupName));
            out.println();
        }
        /* print each row */
        for (final List<SAMRecord> row : rows) {
            ++y_group;
            boolean print_this_line = (this.maxReadRowPerGroup < 0 || y_group <= this.maxReadRowPerGroup);
            if (print_this_line) {
                // each row is only one read, print the read name
                if (layoutReads == LayoutReads.name) {
                    String readName = row.get(0).getReadName();
                    if (row.get(0).getReadPairedFlag()) {
                        if (row.get(0).getFirstOfPairFlag()) {
                            readName += "/1";
                        }
                        if (row.get(0).getSecondOfPairFlag()) {
                            readName += "/2";
                        }
                    }
                    out.print(margin(readName));
                } else {
                    out.print(margin(y_group == 1 ? groupName : ""));
                }
            }
            ref = interval.getStart();
            x = 0;
            for (final SAMRecord rec : row) {
                int readRef = left().apply(rec);
                // pad before record
                while (x < pixelWidth && ref < readRef && testInInterval.test(ref)) {
                    if (!insertIsPresentAtX.test(x))
                        ++ref;
                    ++x;
                    if (print_this_line)
                        out.print(' ');
                    consensus.add(' ');
                }
                int readpos = 0;
                /* get read base function */
                final Function<Integer, Character> baseAt = new Function<Integer, Character>() {

                    @Override
                    public Character apply(final Integer readpos) {
                        final byte[] readBases = rec.getReadBases();
                        if (readBases == SAMRecord.NULL_SEQUENCE)
                            return 'N';
                        if (readpos < 0 || readpos >= rec.getReadLength())
                            return '?';
                        return (char) readBases[readpos];
                    }
                };
                for (final CigarElement ce : rec.getCigar()) {
                    final CigarOperator op = ce.getOperator();
                    if (op.equals(CigarOperator.PADDING))
                        continue;
                    /* IN INSERTION, only print if showInsertions is true */
                    if (this.showInsertions && op.equals(CigarOperator.I)) {
                        int cigarIdx = 0;
                        while (x < pixelWidth && cigarIdx < ce.getLength()) {
                            if (testInInterval.test(readRef)) {
                                final char readbase = baseAt.apply(readpos);
                                if (print_this_line)
                                    colorizer.paper(AnsiColor.RED).print(readbase);
                                consensus.add(readbase);
                                ++x;
                            }
                            ++cigarIdx;
                            ++readpos;
                        }
                        continue;
                    }
                    int cigarIdx = 0;
                    while (x < pixelWidth && cigarIdx < ce.getLength()) {
                        colorizer.clear();
                        // pad before base
                        while (x < pixelWidth && testInInterval.test(readRef) && (insertIsPresentAtX.test(x))) {
                            ++x;
                            if (print_this_line)
                                colorizer.paper(AnsiColor.YELLOW).print("*");
                            consensus.add(' ');
                            continue;
                        }
                        switch(op) {
                            case I:
                                {
                                    // if visible, processed above
                                    if (showInsertions)
                                        throw new IllegalStateException();
                                    readpos++;
                                    break;
                                }
                            case P:
                                break;
                            case H:
                                {
                                    if (showClip) {
                                        if (testInInterval.test(readRef)) {
                                            if (print_this_line)
                                                colorizer.paper(AnsiColor.YELLOW).print('N');
                                            // CLIPPED base not part of consensus
                                            consensus.add(' ');
                                            ++x;
                                        }
                                        ++readRef;
                                    }
                                    break;
                                }
                            case S:
                                {
                                    if (showClip) {
                                        if (testInInterval.test(readRef)) {
                                            final char readBase = baseAt.apply(readpos);
                                            if (print_this_line)
                                                colorizer.paper(AnsiColor.YELLOW).print(readBase);
                                            // CLIPPED base not part of consensus
                                            consensus.add(' ');
                                            ++x;
                                        }
                                        ++readpos;
                                        ++readRef;
                                    } else {
                                        readpos++;
                                    }
                                    break;
                                }
                            case D:
                            case N:
                                {
                                    if (testInInterval.test(readRef)) {
                                        if (print_this_line)
                                            colorizer.paper(AnsiColor.RED).print('-');
                                        // deletion not not part of consensus
                                        consensus.add(' ');
                                        ++x;
                                    }
                                    ++readRef;
                                    break;
                                }
                            case EQ:
                            case M:
                            case X:
                                {
                                    if (testInInterval.test(readRef)) {
                                        final char refBase = Character.toUpperCase(refPosToBase.apply(readRef - 1));
                                        char readBase = Character.toUpperCase(baseAt.apply(readpos));
                                        consensus.add(readBase);
                                        colorizer.pen(base2ansiColor.apply(readBase));
                                        if (op.equals(CigarOperator.X) || (refBase != 'N' && readBase != 'N' && readBase != refBase)) {
                                            colorizer.pen(AnsiColor.RED);
                                        } else if (hideBases) {
                                            if (rec.getReadNegativeStrandFlag()) {
                                                readBase = ',';
                                            } else {
                                                readBase = '.';
                                            }
                                        }
                                        if (showReadName) {
                                            final String readName = rec.getReadName();
                                            if (readpos < 0 || readpos >= readName.length()) {
                                                readBase = '_';
                                            } else {
                                                readBase = readName.charAt(readpos);
                                            }
                                        }
                                        if (rec.getReadNegativeStrandFlag()) {
                                            readBase = Character.toLowerCase(readBase);
                                        } else {
                                            readBase = Character.toUpperCase(readBase);
                                        }
                                        if (print_this_line)
                                            colorizer.print(readBase);
                                        ++x;
                                    }
                                    ++readpos;
                                    ++readRef;
                                    break;
                                }
                        }
                        ++cigarIdx;
                    }
                }
                // end of loop cigar
                ref = readRef;
            }
            // out.println( " "+ref+" "+row.get(0).getAlignmentStart()+" "+row.get(0).getCigarString()+" "+row.get(0).getReadString());
            while (x < pixelWidth) {
                if (print_this_line)
                    out.print(" ");
                ++x;
            }
            if (print_this_line)
                out.println();
            consensus.eol();
            if (out.checkError())
                break;
        }
        if (out.checkError())
            break;
        if (!this.hideConsensus && consensus.bases.stream().anyMatch(C -> C.getCoverage() > 0)) {
            out.print(margin(groupName + " CONSENSUS"));
            x = 0;
            ref = interval.getStart();
            while (x < consensus.bases.size()) {
                final char refBase = Character.toUpperCase(refPosToBase.apply(ref - 1));
                final char consensusBase = consensus.bases.get(x).getConsensus();
                if (Character.isWhitespace(consensusBase)) {
                // nothing
                } else if (refBase != 'N' && consensusBase != refBase) {
                    colorizer.pen(AnsiColor.RED);
                } else {
                    colorizer.pen(base2ansiColor.apply(consensusBase));
                }
                if (!insertIsPresentAtX.test(x))
                    ++ref;
                colorizer.print(consensusBase);
                ++x;
            }
            out.println();
        }
        if (this.numCoverageRows > 0) {
            int minCov = consensus.bases.stream().mapToInt(C -> C.getCoverage()).min().orElse(0);
            final int maxCov = consensus.bases.stream().mapToInt(C -> C.getCoverage()).max().orElse(0);
            for (int y = 0; maxCov > 0 && y < this.numCoverageRows; ++y) {
                if (minCov == maxCov)
                    minCov--;
                double fract = (maxCov - minCov) / ((double) this.numCoverageRows);
                int inverse_y = (this.numCoverageRows - 1) - y;
                int d0 = (int) ((fract) * inverse_y);
                // int d1 = (int)((fract) * (inverse_y+1));
                out.print(margin(y == 0 ? groupName + " " + maxCov : (y + 1 == this.numCoverageRows ? String.valueOf(minCov) : "")));
                for (x = 0; x < consensus.bases.size(); ++x) {
                    int depth = consensus.bases.get(x).getCoverage() - minCov;
                    colorizer.print(depth >= d0 ? BLACK_SQUARE : ' ');
                }
                out.println();
            }
        }
    }
    if (this.tabixKnownGene != null && this.indexedFastaSequenceFile != null) {
        final List<KnownGene> genes = this.tabixKnownGene.getItemsInInterval(this.interval);
        if (!genes.isEmpty()) {
            out.println(this.knownGeneUri);
            for (final KnownGene gene : genes) {
                final KnownGene.CodingRNA codingRna = gene.getCodingRNA(contigSequence);
                final KnownGene.Peptide peptide = codingRna.getPeptide();
                out.print(margin(gene.getName()));
                x = 0;
                int ref0 = this.interval.getStart() - 1;
                while (x < pixelWidth) {
                    if (insertIsPresentAtX.test(x)) {
                        out.print("*");
                        ++x;
                    } else {
                        char pepChar = ' ';
                        if (ref0 >= gene.getTxStart() && ref0 < gene.getTxEnd()) {
                            pepChar = (gene.isPositiveStrand() ? '>' : '<');
                            int pepIdx = peptide.convertGenomicToPeptideCoordinate(ref0);
                            if (pepIdx != -1) {
                                final String aa3 = GeneticCode.aminoAcidTo3Letters(peptide.charAt(pepIdx));
                                final int[] offset = peptide.convertToGenomicCoordinates(pepIdx);
                                if (offset != null && offset.length == 3 && aa3 != null && aa3.length() == 3) {
                                    if (offset[0] == ref0)
                                        pepChar = aa3.charAt(0);
                                    else if (offset[1] == ref0)
                                        pepChar = aa3.charAt(1);
                                    else if (offset[2] == ref0)
                                        pepChar = aa3.charAt(2);
                                    else
                                        pepChar = '?';
                                } else {
                                    pepChar = '?';
                                }
                            }
                        }
                        out.print(pepChar);
                        ++ref0;
                        ++x;
                    }
                }
                while (x < pixelWidth) {
                    out.print(" ");
                    ++x;
                }
                out.println();
            }
        }
        out.println();
    }
    /**
     * variant section
     */
    if (!this.vcfReaders.isEmpty() && !out.checkError()) {
        final Function<GenotypeType, Character> gTypeToSymbol = new Function<GenotypeType, Character>() {

            @Override
            public Character apply(final GenotypeType gt) {
                switch(gt) {
                    case NO_CALL:
                        return '?';
                    case HOM_REF:
                        return '0';
                    case HET:
                        return '1';
                    case HOM_VAR:
                        return '2';
                    case MIXED:
                        return 'm';
                    case UNAVAILABLE:
                        return 'u';
                    default:
                        return '.';
                }
            }
        };
        out.println();
        for (final VcfSource r : this.vcfReaders) {
            if (out.checkError())
                break;
            final VCFHeader header = r.vcfFileReader.getFileHeader();
            final CloseableIterator<VariantContext> iter = r.vcfFileReader.query(this.interval.getContig(), interval.getStart(), interval.getEnd());
            final List<VariantContext> variants = new ArrayList<>();
            while (iter.hasNext()) {
                variants.add(iter.next());
            }
            iter.close();
            if (variants.isEmpty())
                continue;
            out.println(r.vcfFile.getPath());
            if (header.hasGenotypingData()) {
                for (final String sample : header.getSampleNamesInOrder()) {
                    if (!variants.stream().map(V -> V.getGenotype(sample)).filter(G -> !hideNoCall || (hideNoCall && !G.isNoCall())).filter(G -> !hideHomRef || (hideHomRef && !G.isHomRef())).findAny().isPresent()) {
                        continue;
                    }
                    out.print(margin(sample));
                    ref = this.interval.getStart();
                    x = 0;
                    while (x < pixelWidth) {
                        if (insertIsPresentAtX.test(x)) {
                            out.print("*");
                            ++x;
                        } else {
                            char refBase = ' ';
                            for (final VariantContext ctx : variants) {
                                if (ctx.getStart() == ref) {
                                    final Genotype g = ctx.getGenotype(sample);
                                    if (g.isNoCall() && this.hideNoCall)
                                        continue;
                                    if (g.isHomRef() && this.hideHomRef)
                                        continue;
                                    refBase = gTypeToSymbol.apply(g.getType());
                                    break;
                                }
                            }
                            out.print(refBase);
                            ++ref;
                            ++x;
                        }
                    }
                    out.println();
                }
            } else // no genotype
            {
                for (final VariantContext ctx : variants) {
                    out.print(margin(String.valueOf(ctx.getStart()) + ":" + ctx.getReference().getDisplayString() + "/" + ctx.getAlternateAlleles().stream().map(A -> A.getDisplayString()).collect(Collectors.joining(","))));
                    ref = this.interval.getStart();
                    x = 0;
                    while (x < pixelWidth) {
                        if (insertIsPresentAtX.test(x)) {
                            out.print("*");
                            ++x;
                        } else {
                            out.print(ctx.getStart() == ref ? '+' : ' ');
                            ++ref;
                            ++x;
                        }
                    }
                    out.println();
                }
            }
        }
    }
}
Also used : Genotype(htsjdk.variant.variantcontext.Genotype) CloseableIterator(htsjdk.samtools.util.CloseableIterator) Parameter(com.beust.jcommander.Parameter) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VCFHeader(htsjdk.variant.vcf.VCFHeader) CigarElement(htsjdk.samtools.CigarElement) CigarOperator(htsjdk.samtools.CigarOperator) SAMRecordPartition(com.github.lindenb.jvarkit.util.samtools.SAMRecordPartition) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) Function(java.util.function.Function) ValidationStringency(htsjdk.samtools.ValidationStringency) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Interval(htsjdk.samtools.util.Interval) Map(java.util.Map) XMLStreamException(javax.xml.stream.XMLStreamException) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) XMLStreamWriter(javax.xml.stream.XMLStreamWriter) SAMSequenceDictionaryExtractor(htsjdk.variant.utils.SAMSequenceDictionaryExtractor) GeneticCode(com.github.lindenb.jvarkit.util.bio.GeneticCode) CloserUtil(htsjdk.samtools.util.CloserUtil) PrintStream(java.io.PrintStream) Counter(com.github.lindenb.jvarkit.util.Counter) Predicate(java.util.function.Predicate) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) Logger(com.github.lindenb.jvarkit.util.log.Logger) GenotypeType(htsjdk.variant.variantcontext.GenotypeType) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) Set(java.util.Set) SamFilterParser(com.github.lindenb.jvarkit.util.bio.samfilter.SamFilterParser) IOException(java.io.IOException) SamReader(htsjdk.samtools.SamReader) Collectors(java.util.stream.Collectors) KnownGene(com.github.lindenb.jvarkit.util.ucsc.KnownGene) TabixKnownGeneFileReader(com.github.lindenb.jvarkit.util.ucsc.TabixKnownGeneFileReader) File(java.io.File) SAMRecord(htsjdk.samtools.SAMRecord) XMLOutputFactory(javax.xml.stream.XMLOutputFactory) SamRecordFilter(htsjdk.samtools.filter.SamRecordFilter) List(java.util.List) SamInputResource(htsjdk.samtools.SamInputResource) TreeMap(java.util.TreeMap) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) Closeable(java.io.Closeable) VariantContext(htsjdk.variant.variantcontext.VariantContext) Collections(java.util.Collections) SamReaderFactory(htsjdk.samtools.SamReaderFactory) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) Predicate(java.util.function.Predicate) SamReader(htsjdk.samtools.SamReader) Function(java.util.function.Function) ArrayList(java.util.ArrayList) List(java.util.List) VCFHeader(htsjdk.variant.vcf.VCFHeader) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) Genotype(htsjdk.variant.variantcontext.Genotype) CigarOperator(htsjdk.samtools.CigarOperator) TreeMap(java.util.TreeMap) CigarElement(htsjdk.samtools.CigarElement) SAMRecord(htsjdk.samtools.SAMRecord) GenotypeType(htsjdk.variant.variantcontext.GenotypeType) KnownGene(com.github.lindenb.jvarkit.util.ucsc.KnownGene)

Aggregations

IndexedFastaSequenceFile (htsjdk.samtools.reference.IndexedFastaSequenceFile)57 File (java.io.File)34 SamReader (htsjdk.samtools.SamReader)22 SAMRecord (htsjdk.samtools.SAMRecord)20 GenomicSequence (com.github.lindenb.jvarkit.util.picard.GenomicSequence)16 SAMFileHeader (htsjdk.samtools.SAMFileHeader)16 ArrayList (java.util.ArrayList)16 IOException (java.io.IOException)15 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)14 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)13 SamReaderFactory (htsjdk.samtools.SamReaderFactory)12 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)11 CigarElement (htsjdk.samtools.CigarElement)11 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)11 List (java.util.List)11 FileNotFoundException (java.io.FileNotFoundException)10 BufferedReader (java.io.BufferedReader)9 Collectors (java.util.stream.Collectors)9 Cigar (htsjdk.samtools.Cigar)8 CigarOperator (htsjdk.samtools.CigarOperator)7