Search in sources :

Example 26 with SamReaderFactory

use of htsjdk.samtools.SamReaderFactory in project jvarkit by lindenb.

the class SoftClipAnnotator method initialize.

public void initialize() {
    final Set<VCFHeaderLine> hInfo = new HashSet<>();
    final List<String> rodName = Arrays.asList(variantCollection.variants.getName());
    final Set<String> samples = SampleUtils.getUniqueSamplesFromRods(getToolkit(), rodName);
    for (final String sample : samples) {
        this.sample2bam.put(sample, new ArrayList<>());
    }
    for (final VCFHeaderLine line : GATKVCFUtils.getHeaderFields(getToolkit(), rodName)) {
        if (isUniqueHeaderLine(line, hInfo))
            hInfo.add(line);
    }
    VCFHeader vcfHeader = new VCFHeader(hInfo, samples);
    VCFHeader header2 = new VCFHeader(vcfHeader);
    header2.addMetaDataLine(this.numClipInGenotypeFormatHeaderLine);
    header2.addMetaDataLine(this.filterHaveClipInVariant);
    vcfWriter.writeHeader(header2);
    final SamReaderFactory srf = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT);
    for (final File samFilename : IOUtil.unrollFiles(this.samFilenames, ".bam")) {
        logger.info("opening " + samFilename);
        final SamReader r = srf.open(samFilename);
        final Set<String> sampleset = new HashSet<>();
        for (final SAMReadGroupRecord g : r.getFileHeader().getReadGroups()) {
            if (g.getSample() == null || !this.sample2bam.containsKey(g.getSample()))
                continue;
            sampleset.add(g.getSample());
        }
        if (sampleset.isEmpty()) {
            logger.info("no VCF sample in " + samFilename);
            CloserUtil.close(r);
            continue;
        }
        this.samReaders.add(r);
        for (final String sample : sampleset) {
            if (!this.sample2bam.containsKey(sample))
                continue;
            this.sample2bam.get(sample).add(r);
        }
    }
}
Also used : SamReader(htsjdk.samtools.SamReader) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) SamReaderFactory(htsjdk.samtools.SamReaderFactory) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) VCFHeader(htsjdk.variant.vcf.VCFHeader) File(java.io.File) HashSet(java.util.HashSet)

Example 27 with SamReaderFactory

use of htsjdk.samtools.SamReaderFactory in project jvarkit by lindenb.

the class ExtendReferenceWithReads method doWork.

@Override
public int doWork(List<String> args) {
    if (this.faidx == null) {
        LOG.error("No REF defined");
        return -1;
    }
    this.samReaders.clear();
    PrintStream out = null;
    try {
        this.indexedFastaSequenceFile = new IndexedFastaSequenceFile(faidx);
        SAMSequenceDictionary dict = this.indexedFastaSequenceFile.getSequenceDictionary();
        if (dict == null) {
            LOG.error("Reference file is missing a sequence dictionary (use picard)");
            return -1;
        }
        final SamReaderFactory srf = super.createSamReaderFactory();
        srf.setOption(Option.CACHE_FILE_BASED_INDEXES, true);
        for (final String uri : IOUtils.unrollFiles(args)) {
            LOG.info("opening BAM " + uri);
            final SamReader sr = srf.open(SamInputResource.of(uri));
            /* doesn't work with remote ?? */
            if (!sr.hasIndex()) {
                LOG.error("file " + uri + " is not indexed");
                return -1;
            }
            final SAMFileHeader header = sr.getFileHeader();
            if (!header.getSortOrder().equals(SortOrder.coordinate)) {
                LOG.error("file " + uri + " not sorted on coordinate but " + header.getSortOrder());
                return -1;
            }
            final SAMSequenceDictionary dict2 = header.getSequenceDictionary();
            if (dict2 == null) {
                LOG.error("file " + uri + " needs a sequence dictionary");
                return -1;
            }
            samReaders.add(sr);
        }
        if (samReaders.isEmpty()) {
            LOG.error("No BAM defined");
            return -1;
        }
        out = super.openFileOrStdoutAsPrintStream(this.outputFile);
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict);
        for (final SAMSequenceRecord ssr : dict.getSequences()) {
            final GenomicSequence genomic = new GenomicSequence(this.indexedFastaSequenceFile, ssr.getSequenceName());
            int chromStart = 0;
            int nPrinted = 0;
            out.print(">");
            out.print(ssr.getSequenceName());
            for (final Rescue side : Rescue.values()) {
                switch(side) {
                    case LEFT:
                        /* look before position 0 of chromosome */
                        {
                            final Map<Integer, Counter<Byte>> pos2bases = scanRegion(ssr, -1, -1, side);
                            int newstart = 0;
                            for (final Integer pos : pos2bases.keySet()) {
                                if (pos >= 0)
                                    continue;
                                newstart = Math.min(newstart, pos);
                            }
                            while (newstart < 0) {
                                final Counter<Byte> count = pos2bases.get(newstart);
                                if (nPrinted % 60 == 0)
                                    out.println();
                                out.print(consensus(count));
                                newstart++;
                                ++nPrinted;
                            }
                            break;
                        }
                    case RIGHT:
                        /* look after position > length(chromosome) */
                        {
                            final Map<Integer, Counter<Byte>> pos2bases = this.scanRegion(ssr, -1, -1, side);
                            int newend = ssr.getSequenceLength();
                            for (final Integer pos : pos2bases.keySet()) {
                                if (pos < ssr.getSequenceLength())
                                    continue;
                                newend = Math.max(newend, pos + 1);
                            }
                            for (int i = ssr.getSequenceLength(); i < newend; i++) {
                                final Counter<Byte> count = pos2bases.get(i);
                                if (nPrinted % 60 == 0)
                                    out.println();
                                out.print(consensus(count));
                                ++nPrinted;
                            }
                            break;
                        }
                    case CENTER:
                        /* 0 to chromosome-length */
                        {
                            chromStart = 0;
                            while (chromStart < genomic.length()) {
                                final char base = Character.toUpperCase(genomic.charAt(chromStart));
                                if (base != 'N') {
                                    progress.watch(ssr.getSequenceName(), chromStart);
                                    if (nPrinted % 60 == 0)
                                        out.println();
                                    out.print(base);
                                    ++chromStart;
                                    ++nPrinted;
                                    continue;
                                }
                                int chromEnd = chromStart + 1;
                                while (chromEnd < genomic.length() && Character.toUpperCase(genomic.charAt(chromEnd)) == 'N') {
                                    ++chromEnd;
                                }
                                if (chromEnd - chromStart < this.minLenNNNNContig) {
                                    while (chromStart < chromEnd) {
                                        progress.watch(ssr.getSequenceName(), chromStart);
                                        if (nPrinted % 60 == 0)
                                            out.println();
                                        out.print(base);
                                        ++chromStart;
                                        ++nPrinted;
                                    }
                                    continue;
                                }
                                final Map<Integer, Counter<Byte>> pos2bases = scanRegion(ssr, chromStart, chromEnd, side);
                                while (chromStart < chromEnd) {
                                    final Counter<Byte> count = pos2bases.get(chromStart);
                                    if (nPrinted % 60 == 0)
                                        out.println();
                                    if (count == null) {
                                        out.print('N');
                                    } else {
                                        out.print(consensus(count));
                                    }
                                    ++chromStart;
                                    ++nPrinted;
                                    continue;
                                }
                            }
                            break;
                        }
                }
            // end switch type
            }
            out.println();
        }
        progress.finish();
        out.flush();
        out.close();
        return RETURN_OK;
    } catch (final Exception e) {
        LOG.error(e);
        return -1;
    } finally {
        CloserUtil.close(this.indexedFastaSequenceFile);
        for (final SamReader r : samReaders) CloserUtil.close(r);
        samReaders.clear();
    }
}
Also used : PrintStream(java.io.PrintStream) SamReaderFactory(htsjdk.samtools.SamReaderFactory) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) SamReader(htsjdk.samtools.SamReader) Counter(com.github.lindenb.jvarkit.util.Counter) SAMFileHeader(htsjdk.samtools.SAMFileHeader) HashMap(java.util.HashMap) Map(java.util.Map)

Example 28 with SamReaderFactory

use of htsjdk.samtools.SamReaderFactory in project jvarkit by lindenb.

the class BamFile method reOpen.

@Override
public BamFile reOpen() throws IOException {
    final String url = this.getSource();
    final SamReaderFactory srf = SamReaderFactory.makeDefault();
    srf.validationStringency(ValidationStringency.LENIENT);
    final SamInputResource sir;
    if (IOUtil.isUrl(url)) {
        sir = SamInputResource.of(new URL(url));
        if (!this.indexFile.isPresent())
            throw new IOException("Boum");
        sir.index(this.indexFile.get());
    } else {
        sir = SamInputResource.of(new File(url));
    }
    final BamFile bf = new BamFile(url, srf.open(sir), this.indexFile);
    bf.delete_index_on_close = false;
    return bf;
}
Also used : SamReaderFactory(htsjdk.samtools.SamReaderFactory) IOException(java.io.IOException) File(java.io.File) URL(java.net.URL) SamInputResource(htsjdk.samtools.SamInputResource)

Example 29 with SamReaderFactory

use of htsjdk.samtools.SamReaderFactory in project jvarkit by lindenb.

the class LumpyMoreSamples method doWork.

@Override
public int doWork(final List<String> args) {
    VcfIterator r = null;
    VariantContextWriter vcw = null;
    final Map<String, SamReader> sample2samreaders = new HashMap<>();
    try {
        r = super.openVcfIterator(oneFileOrNull(args));
        final VCFHeader headerIn = r.getHeader();
        final SAMSequenceDictionary dict = headerIn.getSequenceDictionary();
        if (dict == null) {
            LOG.error(JvarkitException.VcfDictionaryMissing.getMessage("input vcf"));
            return -1;
        }
        final SamReaderFactory samReaderFactory = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT);
        IOUtil.slurpLines(this.bamFileList).stream().forEach(F -> {
            if (F.trim().isEmpty())
                return;
            final SamReader sr = samReaderFactory.open(SamInputResource.of(F));
            final SAMFileHeader samHeader = sr.getFileHeader();
            final SAMSequenceDictionary dict2 = samHeader.getSequenceDictionary();
            if (dict2 == null) {
                throw new JvarkitException.BamDictionaryMissing(F);
            }
            if (!SequenceUtil.areSequenceDictionariesEqual(dict, dict2)) {
                throw new JvarkitException.DictionariesAreNotTheSame(dict, dict2);
            }
            for (final SAMReadGroupRecord rg : samHeader.getReadGroups()) {
                final String sample = rg.getSample();
                if (StringUtil.isBlank(sample))
                    continue;
                final SamReader reader = sample2samreaders.get(sample);
                if (reader == null) {
                    sample2samreaders.put(sample, reader);
                } else if (reader == sr) {
                    continue;
                } else {
                    throw new JvarkitException.UserError("more than one sample per bam:" + F);
                }
            }
        });
        final Set<String> inVcfSampleNames = new HashSet<>(headerIn.getSampleNamesInOrder());
        final Set<String> outVcfSampleNames = new HashSet<>(inVcfSampleNames);
        outVcfSampleNames.addAll(sample2samreaders.keySet());
        final VCFHeader headerOut = new VCFHeader(headerIn.getMetaDataInInputOrder(), outVcfSampleNames);
        final VCFFormatHeaderLine SU2 = new VCFFormatHeaderLine("SU2", 1, VCFHeaderLineType.Integer, "Number of pieces of evidence supporting the variant");
        final VCFFormatHeaderLine PE2 = new VCFFormatHeaderLine("PE2", 1, VCFHeaderLineType.Integer, "Number of split reads supporting the variant");
        final VCFFormatHeaderLine SR2 = new VCFFormatHeaderLine("SR2", 1, VCFHeaderLineType.Integer, "Number of paired-end reads supporting the variant");
        headerOut.addMetaDataLine(SU2);
        headerOut.addMetaDataLine(PE2);
        headerOut.addMetaDataLine(SR2);
        vcw = super.openVariantContextWriter(this.outputFile);
        vcw.writeHeader(headerOut);
        while (r.hasNext()) {
            final VariantContext ctx = r.next();
            final StructuralVariantType sttype = ctx.getStructuralVariantType();
            if (sttype == null)
                continue;
            final int tid = dict.getSequenceIndex(ctx.getContig());
            final Map<String, Genotype> genotypeMap = new HashMap<>();
            ctx.getGenotypes().stream().forEach(G -> genotypeMap.put(G.getSampleName(), G));
            for (final String sample : sample2samreaders.keySet()) {
                final SamReader samReader = sample2samreaders.get(sample);
                final SupportingReads sr = new SupportingReads();
                switch(sttype) {
                    case DEL:
                        {
                            int pos = ctx.getStart();
                            int[] ci = confidenceIntervalPos(ctx);
                            final QueryInterval left = new QueryInterval(tid, pos - ci[0], pos + ci[1]);
                            int end = ctx.getEnd();
                            ci = confidenceIntervalEnd(ctx);
                            final QueryInterval right = new QueryInterval(tid, end - ci[0], end + ci[1]);
                            for (final SAMRecord rec : extractSupportingReads(ctx, sample, samReader, new QueryInterval[] { left, right })) {
                                final Cigar cigar = rec.getCigar();
                                if (cigar.isLeftClipped()) {
                                    final QueryInterval qi = new QueryInterval(tid, rec.getUnclippedStart(), rec.getStart() - 1);
                                    if (qi.overlaps(left)) {
                                        sr.splitReads++;
                                        if (rec.getReadPairedFlag())
                                            sr.pairedReads++;
                                    }
                                }
                                if (cigar.isRightClipped()) {
                                    final QueryInterval qi = new QueryInterval(tid, rec.getEnd() + 1, rec.getUnclippedEnd());
                                    if (qi.overlaps(right)) {
                                        sr.splitReads++;
                                        if (rec.getReadPairedFlag())
                                            sr.pairedReads++;
                                    }
                                }
                            }
                            break;
                        }
                    default:
                        break;
                }
                final GenotypeBuilder gb;
                if (genotypeMap.containsKey(sample)) {
                    gb = new GenotypeBuilder(genotypeMap.get(sample));
                } else {
                    gb = new GenotypeBuilder(sample, Arrays.asList(Allele.NO_CALL, Allele.NO_CALL));
                }
                gb.attribute(SR2.getID(), sr.splitReads);
                gb.attribute(PE2.getID(), sr.pairedReads);
                gb.attribute(SU2.getID(), 0);
                genotypeMap.put(sample, gb.make());
            }
            final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
            // add missing samples.
            for (final String sampleName : outVcfSampleNames) {
                if (genotypeMap.containsKey(sampleName))
                    continue;
                genotypeMap.put(sampleName, new GenotypeBuilder(sampleName, Arrays.asList(Allele.NO_CALL, Allele.NO_CALL)).make());
            }
            vcb.genotypes(genotypeMap.values());
            vcw.add(vcb.make());
        }
        r.close();
        r = null;
        sample2samreaders.values().stream().forEach(R -> CloserUtil.close(R));
        LOG.info("done");
        return 0;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(r);
    }
}
Also used : HashMap(java.util.HashMap) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) VariantContext(htsjdk.variant.variantcontext.VariantContext) QueryInterval(htsjdk.samtools.QueryInterval) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) SamReader(htsjdk.samtools.SamReader) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashSet(java.util.HashSet) VCFFormatHeaderLine(htsjdk.variant.vcf.VCFFormatHeaderLine) SamReaderFactory(htsjdk.samtools.SamReaderFactory) Genotype(htsjdk.variant.variantcontext.Genotype) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) Cigar(htsjdk.samtools.Cigar) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) StructuralVariantType(htsjdk.variant.variantcontext.StructuralVariantType)

Example 30 with SamReaderFactory

use of htsjdk.samtools.SamReaderFactory in project gatk by broadinstitute.

the class CompareSAMs method doWork.

@Override
protected Object doWork() {
    IOUtil.assertFileIsReadable(samFiles.get(0));
    IOUtil.assertFileIsReadable(samFiles.get(1));
    SamReaderFactory factory = SamReaderFactory.makeDefault().validationStringency(VALIDATION_STRINGENCY);
    SamReader sam1 = factory.referenceSequence(REFERENCE_SEQUENCE).open(samFiles.get(0));
    SamReader sam2 = factory.referenceSequence(REFERENCE_SEQUENCE).open(samFiles.get(1));
    SamComparison comparison = new SamComparison(sam1, sam2);
    comparison.printReport();
    if (comparison.areEqual()) {
        System.out.println("Files match.");
    } else {
        System.out.println("Files differ.");
    }
    CloserUtil.close(sam1);
    CloserUtil.close(sam2);
    return comparison.areEqual();
}
Also used : SamReader(htsjdk.samtools.SamReader) SamReaderFactory(htsjdk.samtools.SamReaderFactory) SamComparison(org.broadinstitute.hellbender.utils.read.SamComparison)

Aggregations

SamReaderFactory (htsjdk.samtools.SamReaderFactory)57 SamReader (htsjdk.samtools.SamReader)51 File (java.io.File)43 SAMRecord (htsjdk.samtools.SAMRecord)27 IOException (java.io.IOException)26 SAMFileHeader (htsjdk.samtools.SAMFileHeader)18 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)17 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)17 IndexedFastaSequenceFile (htsjdk.samtools.reference.IndexedFastaSequenceFile)14 ArrayList (java.util.ArrayList)13 List (java.util.List)11 SAMFileWriterFactory (htsjdk.samtools.SAMFileWriterFactory)10 HashSet (java.util.HashSet)10 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)9 SAMFileWriter (htsjdk.samtools.SAMFileWriter)8 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)8 URL (java.net.URL)8 HashMap (java.util.HashMap)8 BufferedReader (java.io.BufferedReader)7 PrintWriter (java.io.PrintWriter)7