Search in sources :

Example 41 with IntervalList

use of htsjdk.samtools.util.IntervalList in project jvarkit by lindenb.

the class ImpactOfDuplicates method doWork.

@Override
public int doWork(final List<String> args) {
    CloseableIterator<Duplicate> dupIter = null;
    final List<File> INPUT = args.stream().map(S -> new File(S)).collect(Collectors.toList());
    try {
        this.duplicates = SortingCollection.newInstance(Duplicate.class, new DuplicateCodec(), new Comparator<Duplicate>() {

            @Override
            public int compare(Duplicate o1, Duplicate o2) {
                return o1.compareTo(o2);
            }
        }, this.sortingCollectionArgs.getMaxRecordsInRam(), this.sortingCollectionArgs.getTmpPaths());
        for (this.bamIndex = 0; this.bamIndex < INPUT.size(); this.bamIndex++) {
            int prev_tid = -1;
            int prev_pos = -1;
            long nLines = 0L;
            File inFile = INPUT.get(this.bamIndex);
            LOG.info("Processing " + inFile);
            IOUtil.assertFileIsReadable(inFile);
            SamReader samReader = null;
            CloseableIterator<SAMRecord> iter = null;
            try {
                samReader = SamReaderFactory.make().validationStringency(ValidationStringency.LENIENT).open(inFile);
                final SAMFileHeader header = samReader.getFileHeader();
                this.samFileDicts.add(header.getSequenceDictionary());
                if (BEDFILE == null) {
                    iter = samReader.iterator();
                } else {
                    IntervalList intervalList = new IntervalList(header);
                    BufferedReader in = new BufferedReader(new FileReader(BEDFILE));
                    String line = null;
                    while ((line = in.readLine()) != null) {
                        if (line.isEmpty() || line.startsWith("#"))
                            continue;
                        String[] tokens = line.split("[\t]");
                        Interval interval = new Interval(tokens[0], 1 + Integer.parseInt(tokens[1]), Integer.parseInt(tokens[2]));
                        intervalList.add(interval);
                    }
                    in.close();
                    intervalList = intervalList.sorted();
                    List<Interval> uniqueIntervals = IntervalList.getUniqueIntervals(intervalList, false);
                    SamRecordIntervalIteratorFactory sriif = new SamRecordIntervalIteratorFactory();
                    iter = sriif.makeSamRecordIntervalIterator(samReader, uniqueIntervals, false);
                }
                while (iter.hasNext()) {
                    SAMRecord rec = iter.next();
                    if (rec.getReadUnmappedFlag())
                        continue;
                    if (!rec.getReadPairedFlag())
                        continue;
                    if (rec.getReferenceIndex() != rec.getMateReferenceIndex())
                        continue;
                    if (!rec.getProperPairFlag())
                        continue;
                    if (!rec.getFirstOfPairFlag())
                        continue;
                    if (prev_tid != -1) {
                        if (prev_tid > rec.getReferenceIndex()) {
                            throw new IOException("Bad sort order from " + rec);
                        } else if (prev_tid == rec.getReferenceIndex() && prev_pos > rec.getAlignmentStart()) {
                            throw new IOException("Bad sort order from " + rec);
                        } else {
                            prev_pos = rec.getAlignmentStart();
                        }
                    } else {
                        prev_tid = rec.getReferenceIndex();
                        prev_pos = -1;
                    }
                    if ((++nLines) % 1000000 == 0) {
                        LOG.info("In " + inFile + " N=" + nLines);
                    }
                    Duplicate dup = new Duplicate();
                    dup.bamIndex = this.bamIndex;
                    dup.pos = Math.min(rec.getAlignmentStart(), rec.getMateAlignmentStart());
                    dup.tid = rec.getReferenceIndex();
                    dup.size = Math.abs(rec.getInferredInsertSize());
                    this.duplicates.add(dup);
                }
            } finally {
                if (iter != null)
                    iter.close();
                if (samReader != null)
                    samReader.close();
            }
            LOG.info("done " + inFile);
        }
        /**
         * loop done, now scan the duplicates
         */
        LOG.info("doneAdding");
        this.duplicates.doneAdding();
        this.out = super.openFileOrStdoutAsPrintStream(outputFile);
        out.print("#INTERVAL\tMAX\tMEAN");
        for (int i = 0; i < INPUT.size(); ++i) {
            out.print('\t');
            out.print(INPUT.get(i));
        }
        out.println();
        dupIter = this.duplicates.iterator();
        while (dupIter.hasNext()) {
            Duplicate dup = dupIter.next();
            if (this.duplicatesBuffer.isEmpty() || dup.compareChromPosSize(this.duplicatesBuffer.get(0)) == 0) {
                this.duplicatesBuffer.add(dup);
            } else {
                dumpDuplicatesBuffer(INPUT);
                this.duplicatesBuffer.add(dup);
            }
        }
        dumpDuplicatesBuffer(INPUT);
        LOG.info("end iterator");
        out.flush();
        out.close();
    } catch (Exception e) {
        LOG.error(e);
        return -1;
    } finally {
        if (dupIter != null)
            dupIter.close();
        LOG.info("cleaning duplicates");
        this.duplicates.cleanup();
    }
    return 0;
}
Also used : RuntimeEOFException(htsjdk.samtools.util.RuntimeEOFException) CloseableIterator(htsjdk.samtools.util.CloseableIterator) Arrays(java.util.Arrays) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) IOUtil(htsjdk.samtools.util.IOUtil) SAMFileHeader(htsjdk.samtools.SAMFileHeader) ValidationStringency(htsjdk.samtools.ValidationStringency) ParametersDelegate(com.beust.jcommander.ParametersDelegate) ArrayList(java.util.ArrayList) Interval(htsjdk.samtools.util.Interval) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) PrintStream(java.io.PrintStream) SortingCollection(htsjdk.samtools.util.SortingCollection) BinaryCodec(htsjdk.samtools.util.BinaryCodec) Logger(com.github.lindenb.jvarkit.util.log.Logger) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) IntervalList(htsjdk.samtools.util.IntervalList) IOException(java.io.IOException) SamReader(htsjdk.samtools.SamReader) Collectors(java.util.stream.Collectors) File(java.io.File) SAMRecord(htsjdk.samtools.SAMRecord) List(java.util.List) SamRecordIntervalIteratorFactory(htsjdk.samtools.util.SamRecordIntervalIteratorFactory) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader) Comparator(java.util.Comparator) SamReaderFactory(htsjdk.samtools.SamReaderFactory) IOException(java.io.IOException) SamRecordIntervalIteratorFactory(htsjdk.samtools.util.SamRecordIntervalIteratorFactory) RuntimeEOFException(htsjdk.samtools.util.RuntimeEOFException) IOException(java.io.IOException) Comparator(java.util.Comparator) SamReader(htsjdk.samtools.SamReader) IntervalList(htsjdk.samtools.util.IntervalList) SAMRecord(htsjdk.samtools.SAMRecord) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File) Interval(htsjdk.samtools.util.Interval)

Example 42 with IntervalList

use of htsjdk.samtools.util.IntervalList in project jvarkit by lindenb.

the class VCFAnnoBam method doVcfToVcf.

@Override
protected int doVcfToVcf(final String inputName, final VcfIterator r, final VariantContextWriter w) {
    BufferedReader bedIn = null;
    List<SamReader> samReaders = new ArrayList<SamReader>();
    IntervalTreeMap<Rgn> capture = new IntervalTreeMap<Rgn>();
    try {
        SAMFileHeader firstHeader = null;
        for (final File samFile : new HashSet<File>(BAMFILE)) {
            LOG.info("open bam " + samFile);
            final SamReader samReader = super.openSamReader(samFile.getPath());
            final SAMFileHeader samHeader = samReader.getFileHeader();
            samReaders.add(samReader);
            if (firstHeader == null) {
                firstHeader = samHeader;
            } else if (!SequenceUtil.areSequenceDictionariesEqual(firstHeader.getSequenceDictionary(), samHeader.getSequenceDictionary())) {
                throw new JvarkitException.DictionariesAreNotTheSame(firstHeader.getSequenceDictionary(), samHeader.getSequenceDictionary());
            }
        }
        IntervalList intervalList = new IntervalList(firstHeader);
        LOG.info("read bed " + BEDILE);
        bedIn = IOUtils.openFileForBufferedReading(BEDILE);
        String line;
        final BedLineCodec bedCodec = new BedLineCodec();
        while ((line = bedIn.readLine()) != null) {
            if (line.isEmpty() || line.startsWith("#"))
                continue;
            final BedLine bed = bedCodec.decode(line);
            if (bed == null)
                continue;
            if (firstHeader.getSequenceDictionary().getSequence(bed.getContig()) == null) {
                LOG.error("error in BED +" + BEDILE + " : " + line + " chromosome is not in sequence dict of " + BAMFILE);
                continue;
            }
            intervalList.add(bed.toInterval());
        }
        bedIn.close();
        bedIn = null;
        intervalList = intervalList.sorted();
        for (final Interval interval : intervalList.uniqued()) {
            final Rgn rgn = new Rgn();
            rgn.interval = interval;
            capture.put(rgn.interval, rgn);
        }
        intervalList = null;
        VCFHeader header = r.getHeader();
        VCFHeader h2 = new VCFHeader(header.getMetaDataInInputOrder(), header.getSampleNamesInOrder());
        h2.addMetaDataLine(new VCFInfoHeaderLine(this.capture_tag, 1, VCFHeaderLineType.String, "Capture stats: Format is (start|end|mean|min|max|length|not_covered|percent_covered) BAM files: " + BAMFILE + " CAPTURE:" + BEDILE));
        w.writeHeader(h2);
        while (r.hasNext()) {
            final VariantContext ctx = r.next();
            Interval interval = new Interval(ctx.getContig(), ctx.getStart(), ctx.getEnd());
            Collection<Rgn> rgns = capture.getOverlapping(interval);
            Iterator<Rgn> it = rgns.iterator();
            if (!it.hasNext()) {
                w.add(ctx);
                continue;
            }
            final Rgn rgn = it.next();
            if (!rgn.processed) {
                // LOG.info("processing "+rgn.interval);
                process(rgn, samReaders);
            }
            final VariantContextBuilder b = new VariantContextBuilder(ctx);
            b.attribute(this.capture_tag, rgn.toString());
            w.add(b.make());
        }
        return 0;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        for (final SamReader samReader : samReaders) CloserUtil.close(samReader);
    }
}
Also used : ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) BedLineCodec(com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) SamReader(htsjdk.samtools.SamReader) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) IntervalList(htsjdk.samtools.util.IntervalList) BufferedReader(java.io.BufferedReader) SAMFileHeader(htsjdk.samtools.SAMFileHeader) VCFHeader(htsjdk.variant.vcf.VCFHeader) IntervalTreeMap(htsjdk.samtools.util.IntervalTreeMap) File(java.io.File) HashSet(java.util.HashSet) Interval(htsjdk.samtools.util.Interval)

Aggregations

IntervalList (htsjdk.samtools.util.IntervalList)42 Interval (htsjdk.samtools.util.Interval)28 File (java.io.File)22 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)12 SAMFileHeader (htsjdk.samtools.SAMFileHeader)10 SamLocusIterator (htsjdk.samtools.util.SamLocusIterator)8 IOException (java.io.IOException)7 List (java.util.List)7 UserException (org.broadinstitute.hellbender.exceptions.UserException)7 SamReader (htsjdk.samtools.SamReader)6 SamRecordFilter (htsjdk.samtools.filter.SamRecordFilter)6 AllelicCount (org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount)6 Nucleotide (org.broadinstitute.hellbender.utils.Nucleotide)6 ParamUtils (org.broadinstitute.hellbender.utils.param.ParamUtils)6 DataProvider (org.testng.annotations.DataProvider)6 Arrays (java.util.Arrays)5 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)5 Test (org.testng.annotations.Test)5 htsjdk.samtools (htsjdk.samtools)4 QueryInterval (htsjdk.samtools.QueryInterval)4