Search in sources :

Example 26 with SAMRecordIterator

use of htsjdk.samtools.SAMRecordIterator in project jvarkit by lindenb.

the class CompareBamAndBuild method doWork.

@Override
public int doWork(final List<String> args) {
    PrintWriter out = null;
    SortingCollection<Match> database = null;
    if (this.chainFile == null) {
        LOG.error("Chain file is not defined Option");
        return -1;
    }
    if (args.size() != 2) {
        LOG.error("Illegal number of arguments. Expected two indexed BAMS.");
        return -1;
    }
    try {
        LOG.info("load chain file");
        this.liftOver = new LiftOver(this.chainFile);
        database = SortingCollection.newInstance(Match.class, new MatchCodec(), new MatchOrdererInSortingCollection(), this.maxRecordsInRam, this.tmpDir.toPath());
        database.setDestructiveIteration(true);
        for (int currentSamFileIndex = 0; currentSamFileIndex < 2; currentSamFileIndex++) {
            final File samFile = new File(args.get(currentSamFileIndex));
            LOG.info("read " + samFile);
            this.bamFiles[currentSamFileIndex] = samFile;
            SamReader samFileReader = CompareBamAndBuild.this.openSamReader(samFile.getPath());
            final SAMSequenceDictionary dict = samFileReader.getFileHeader().getSequenceDictionary();
            this.sequenceDictionaries[currentSamFileIndex] = dict;
            if (dict.isEmpty()) {
                samFileReader.close();
                LOG.error("Empty Dict  in " + samFile);
                return -1;
            }
            final Interval interval;
            if (REGION != null) {
                IntervalParser intervalParser = new IntervalParser(dict);
                interval = intervalParser.parse(this.REGION);
                if (interval == null) {
                    samFileReader.close();
                    LOG.error("Cannot parse " + REGION + " (bad syntax or not in dictionary");
                    return -1;
                }
            } else {
                interval = null;
            }
            final SAMRecordIterator iter;
            if (interval == null) {
                iter = samFileReader.iterator();
            } else {
                iter = samFileReader.queryOverlapping(interval.getContig(), interval.getStart(), interval.getEnd());
            }
            final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict);
            while (iter.hasNext()) {
                final SAMRecord rec = progress.watch(iter.next());
                if (rec.isSecondaryOrSupplementary())
                    continue;
                final Match m = new Match();
                m.flag = rec.getFlags();
                m.readName = rec.getReadName();
                m.firstBamFile = currentSamFileIndex == 0;
                if (rec.getReadUnmappedFlag()) {
                    m.tid = -1;
                    m.pos = -1;
                } else {
                    m.tid = rec.getReferenceIndex();
                    m.pos = rec.getAlignmentStart();
                }
                database.add(m);
            }
            iter.close();
            samFileReader.close();
            samFileReader = null;
            LOG.info("Close " + samFile);
        }
        database.doneAdding();
        LOG.info("Writing results....");
        out = super.openFileOrStdoutAsPrintWriter(this.outputFile);
        // compute the differences for each read
        out.print("#READ-Name\tCOMPARE");
        for (final File f : this.bamFiles) {
            out.print("\t" + f);
        }
        out.println();
        /* create an array of set<Match> */
        final MatchComparator match_comparator = new MatchComparator();
        final List<Set<Match>> matches = new ArrayList<Set<CompareBamAndBuild.Match>>(2);
        while (matches.size() < 2) {
            matches.add(new TreeSet<CompareBamAndBuild.Match>(match_comparator));
        }
        CloseableIterator<Match> iter = database.iterator();
        String currReadName = null;
        int curr_num_in_pair = -1;
        for (; ; ) {
            final Match nextMatch;
            if (iter.hasNext()) {
                nextMatch = iter.next();
            } else {
                nextMatch = null;
            }
            if (nextMatch == null || (currReadName != null && !currReadName.equals(nextMatch.readName)) || (curr_num_in_pair != -1 && curr_num_in_pair != nextMatch.indexInPair())) {
                if (currReadName != null) {
                    out.print(currReadName);
                    if (curr_num_in_pair > 0) {
                        out.print("/");
                        out.print(curr_num_in_pair);
                    }
                    out.print("\t");
                    if (same(matches.get(0), matches.get(1))) {
                        out.print("EQ");
                    } else {
                        out.print("NE");
                    }
                    for (int x = 0; x < 2; ++x) {
                        out.print("\t");
                        print(out, matches.get(x));
                    }
                    out.println();
                }
                if (nextMatch == null)
                    break;
                for (final Set<Match> set : matches) set.clear();
            }
            currReadName = nextMatch.readName;
            curr_num_in_pair = nextMatch.indexInPair();
            matches.get(nextMatch.firstBamFile ? 0 : 1).add(nextMatch);
        }
        iter.close();
        out.flush();
        out.close();
        database.cleanup();
        return RETURN_OK;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(out);
        this.liftOver = null;
    }
}
Also used : LiftOver(htsjdk.samtools.liftover.LiftOver) IntervalParser(com.github.lindenb.jvarkit.util.bio.IntervalParser) TreeSet(java.util.TreeSet) Set(java.util.Set) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) ArrayList(java.util.ArrayList) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) IOException(java.io.IOException) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) File(java.io.File) PrintWriter(java.io.PrintWriter) Interval(htsjdk.samtools.util.Interval)

Example 27 with SAMRecordIterator

use of htsjdk.samtools.SAMRecordIterator in project jvarkit by lindenb.

the class CompareBams method doWork.

@Override
public int doWork(final List<String> args) {
    this.IN.addAll(args.stream().map(S -> new File(S)).collect(Collectors.toList()));
    SortingCollection<Match> database = null;
    SamReader samFileReader = null;
    CloseableIterator<Match> iter = null;
    try {
        if (this.IN.size() < 2) {
            LOG.error("Need more bams please");
            return -1;
        }
        database = SortingCollection.newInstance(Match.class, new MatchCodec(), new MatchOrderer(), this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
        this.samSequenceDictAreTheSame = true;
        database.setDestructiveIteration(true);
        for (int currentSamFileIndex = 0; currentSamFileIndex < this.IN.size(); currentSamFileIndex++) {
            final File samFile = this.IN.get(currentSamFileIndex);
            LOG.info("Opening " + samFile);
            samFileReader = super.createSamReaderFactory().open(samFile);
            final SAMSequenceDictionary dict = samFileReader.getFileHeader().getSequenceDictionary();
            if (dict == null || dict.isEmpty()) {
                LOG.error("Empty Dict  in " + samFile);
                return -1;
            }
            if (!this.sequenceDictionaries.isEmpty() && !SequenceUtil.areSequenceDictionariesEqual(this.sequenceDictionaries.get(0), dict)) {
                this.samSequenceDictAreTheSame = false;
                LOG.warn("FOOL !! THE SEQUENCE DICTIONARIES ARE **NOT** THE SAME. I will try to compare anyway but it will be slower.");
            }
            this.sequenceDictionaries.add(dict);
            final Optional<Interval> interval;
            if (REGION != null && !REGION.trim().isEmpty()) {
                final IntervalParser dix = new IntervalParser(dict);
                interval = Optional.ofNullable(dix.parse(REGION));
                if (!interval.isPresent()) {
                    LOG.error("Cannot parse " + REGION + " (bad syntax or not in dictionary)");
                    return -1;
                }
            } else {
                interval = Optional.empty();
            }
            SAMRecordIterator it = null;
            if (!interval.isPresent()) {
                it = samFileReader.iterator();
            } else {
                it = samFileReader.queryOverlapping(interval.get().getContig(), interval.get().getStart(), interval.get().getEnd());
            }
            final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict);
            while (it.hasNext()) {
                final SAMRecord rec = progress.watch(it.next());
                if (!rec.getReadUnmappedFlag()) {
                    if (rec.getMappingQuality() < this.min_mapq)
                        continue;
                    if (rec.isSecondaryOrSupplementary())
                        continue;
                }
                final Match m = new Match();
                if (rec.getReadPairedFlag()) {
                    m.num_in_pair = (rec.getFirstOfPairFlag() ? 1 : 2);
                } else {
                    m.num_in_pair = 0;
                }
                m.readName = rec.getReadName();
                m.bamIndex = currentSamFileIndex;
                m.flag = rec.getFlags();
                m.cigar = rec.getCigarString();
                if (m.cigar == null)
                    m.cigar = "";
                if (rec.getReadUnmappedFlag()) {
                    m.tid = -1;
                    m.pos = -1;
                } else {
                    m.tid = rec.getReferenceIndex();
                    m.pos = rec.getAlignmentStart();
                }
                database.add(m);
            }
            it.close();
            samFileReader.close();
            samFileReader = null;
            LOG.info("Close " + samFile);
        }
        database.doneAdding();
        LOG.info("Writing results....");
        this.out = super.openFileOrStdoutAsPrintWriter(this.outputFile);
        // compute the differences for each read
        this.out.print("#READ-Name\t");
        for (int x = 0; x < this.IN.size(); ++x) {
            for (int y = x + 1; y < this.IN.size(); ++y) {
                if (!(x == 0 && y == 1))
                    this.out.print("|");
                this.out.print(IN.get(x));
                this.out.print(" ");
                this.out.print(IN.get(y));
            }
        }
        for (int x = 0; x < this.IN.size(); ++x) {
            this.out.print("\t" + IN.get(x));
        }
        this.out.println();
        /* create an array of set<Match> */
        final MatchComparator match_comparator = new MatchComparator();
        final List<Set<Match>> matches = new ArrayList<Set<CompareBams.Match>>(this.IN.size());
        while (matches.size() < this.IN.size()) {
            matches.add(new TreeSet<CompareBams.Match>(match_comparator));
        }
        iter = database.iterator();
        String currReadName = null;
        int curr_num_in_pair = -1;
        for (; ; ) {
            Match nextMatch = null;
            if (iter.hasNext()) {
                nextMatch = iter.next();
            }
            if (nextMatch == null || (currReadName != null && !currReadName.equals(nextMatch.readName)) || (curr_num_in_pair != -1 && curr_num_in_pair != nextMatch.num_in_pair)) {
                if (currReadName != null) {
                    this.out.print(currReadName);
                    if (curr_num_in_pair > 0) {
                        this.out.print("/");
                        this.out.print(curr_num_in_pair);
                    }
                    this.out.print("\t");
                    for (int x = 0; x < this.IN.size(); ++x) {
                        final Set<Match> first = matches.get(x);
                        for (int y = x + 1; y < this.IN.size(); ++y) {
                            if (!(x == 0 && y == 1))
                                this.out.print("|");
                            Set<Match> second = matches.get(y);
                            if (same(first, second)) {
                                this.out.print("EQ");
                            } else {
                                this.out.print("NE");
                            }
                        }
                    }
                    for (int x = 0; x < this.IN.size(); ++x) {
                        this.out.print("\t");
                        print(matches.get(x), sequenceDictionaries.get(x));
                    }
                    this.out.println();
                }
                if (nextMatch == null)
                    break;
                for (Set<Match> set : matches) set.clear();
            }
            currReadName = nextMatch.readName;
            curr_num_in_pair = nextMatch.num_in_pair;
            matches.get(nextMatch.bamIndex).add(nextMatch);
            if (this.out.checkError())
                break;
        }
        iter.close();
        this.out.flush();
        return RETURN_OK;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        if (database != null)
            database.cleanup();
        CloserUtil.close(samFileReader);
        CloserUtil.close(this.out);
        this.out = null;
    }
}
Also used : IntervalParser(com.github.lindenb.jvarkit.util.bio.IntervalParser) TreeSet(java.util.TreeSet) Set(java.util.Set) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) ArrayList(java.util.ArrayList) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) IOException(java.io.IOException) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) File(java.io.File) Interval(htsjdk.samtools.util.Interval)

Example 28 with SAMRecordIterator

use of htsjdk.samtools.SAMRecordIterator in project jvarkit by lindenb.

the class HaloplexParasite method doVcfToVcf.

@Override
protected int doVcfToVcf(String inputName, VcfIterator in, VariantContextWriter out) {
    SamReader samReader = null;
    final List<Mutation> mutations = new ArrayList<>();
    try {
        final Set<File> bamFiles = Files.lines(this.bamList.toPath()).filter(T -> !(T.isEmpty() || T.startsWith("#"))).map(T -> new File(T)).collect(Collectors.toSet());
        final VCFHeader header = new VCFHeader();
        header.setSequenceDictionary(in.getHeader().getSequenceDictionary());
        final VCFFilterHeaderLine filter = new VCFFilterHeaderLine("HALOPLEXPARASITE", "fails Parasite Haloplex Sequence");
        final VCFInfoHeaderLine infoWord = new VCFInfoHeaderLine(filter.getID(), 1, VCFHeaderLineType.String, "Parasite Haloplex Sequence (Word|Count|Fraction)");
        super.addMetaData(header);
        out.writeHeader(header);
        header.addMetaDataLine(filter);
        header.addMetaDataLine(infoWord);
        while (in.hasNext()) {
            final VariantContext ctx = in.next();
            final VariantContextBuilder vcb = new VariantContextBuilder(inputName, ctx.getContig(), ctx.getStart(), ctx.getEnd(), ctx.getAlleles());
            if (!(ctx.isIndel() || ctx.isMixed())) {
                out.add(vcb.make());
                continue;
            }
            if (!vcb.getAlleles().stream().filter(A -> !(A.isSymbolic() || A.isNoCall() || A.length() < this.minClipSize)).findAny().isPresent()) {
                out.add(vcb.make());
                continue;
            }
            final Mutation mut = new Mutation(ctx);
            mutations.add(mut);
        }
        final Counter<String> words = new Counter<>();
        for (final File bamFile : bamFiles) {
            LOG.info("Opening " + bamFile);
            samReader = createSamReaderFactory().open(bamFile);
            for (final Mutation mut : mutations) {
                // words seen in that mutation : don't use a Counter
                final Set<String> mutWords = new HashSet<>();
                /* loop over reads overlapping that mutation */
                final SAMRecordIterator sri = samReader.queryOverlapping(mut.contig, mut.start, mut.end);
                while (sri.hasNext()) {
                    final SAMRecord rec = sri.next();
                    if (rec.getReadUnmappedFlag())
                        continue;
                    if (rec.isSecondaryOrSupplementary())
                        continue;
                    if (rec.getDuplicateReadFlag())
                        continue;
                    if (rec.getReadFailsVendorQualityCheckFlag())
                        continue;
                    final Cigar cigar = rec.getCigar();
                    if (cigar.numCigarElements() == 1)
                        continue;
                    final byte[] bases = rec.getReadBases();
                    int refpos = rec.getUnclippedStart();
                    int readpos = 0;
                    /* scan cigar overlapping that mutation */
                    for (final CigarElement ce : cigar) {
                        final CigarOperator op = ce.getOperator();
                        final int ref_end = refpos + (op.consumesReferenceBases() || op.isClipping() ? ce.getLength() : 0);
                        final int read_end = readpos + (op.consumesReadBases() ? ce.getLength() : 0);
                        /* check clip is large enough */
                        if (op.equals(CigarOperator.S) && ce.getLength() >= this.minClipSize) {
                            /* check clip overlap mutation */
                            if (!(ref_end < mut.start || refpos > mut.end)) {
                                /* break read of soft clip into words */
                                for (int x = 0; x + this.minClipSize <= ce.getLength(); ++x) {
                                    final String substr = new String(bases, readpos + x, this.minClipSize);
                                    if (!substr.contains("N")) {
                                        final String revcomp = AcidNucleics.reverseComplement(substr);
                                        mutWords.add(substr);
                                        if (!revcomp.equals(substr))
                                            mutWords.add(revcomp);
                                    }
                                }
                            }
                        }
                        refpos = ref_end;
                        readpos = read_end;
                    }
                }
                sri.close();
                for (final String w : mutWords) {
                    words.incr(w);
                }
            }
            // end of for(mutations)
            samReader.close();
            samReader = null;
        }
        LOG.info("mutations:" + mutations.size());
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header);
        for (final Mutation mut : mutations) {
            progress.watch(mut.contig, mut.start);
            final VariantContextBuilder vcb = new VariantContextBuilder(inputName, mut.contig, mut.start, mut.end, mut.alleles);
            String worstString = null;
            Double worstFraction = null;
            final double totalWords = words.getTotal();
            for (final Allele a : mut.alleles) {
                if (a.isSymbolic() || a.isNoCall() || a.length() < this.minClipSize)
                    continue;
                for (int x = 0; x + this.minClipSize <= a.length(); ++x) {
                    final String substr = new String(a.getBases(), x, this.minClipSize);
                    final long count = words.count(substr);
                    final double fraction = count / totalWords;
                    if (worstFraction == null || worstFraction < fraction) {
                        worstString = substr + "|" + count + "|" + fraction;
                        worstFraction = fraction;
                    }
                }
            }
            if (worstString != null) {
                vcb.attribute(infoWord.getID(), worstString);
            }
            if (worstFraction != null && worstFraction.doubleValue() >= this.tresholdFraction) {
                vcb.filter(filter.getID());
            }
            out.add(vcb.make());
        }
        progress.finish();
        return RETURN_OK;
    } catch (Exception e) {
        LOG.error(e);
        return -1;
    } finally {
        CloserUtil.close(samReader);
    }
}
Also used : Cigar(htsjdk.samtools.Cigar) Allele(htsjdk.variant.variantcontext.Allele) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) VCFHeader(htsjdk.variant.vcf.VCFHeader) CigarElement(htsjdk.samtools.CigarElement) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) CigarOperator(htsjdk.samtools.CigarOperator) AcidNucleics(com.github.lindenb.jvarkit.util.bio.AcidNucleics) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) CloserUtil(htsjdk.samtools.util.CloserUtil) Counter(com.github.lindenb.jvarkit.util.Counter) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) VCFHeaderLineType(htsjdk.variant.vcf.VCFHeaderLineType) Files(java.nio.file.Files) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) SamReader(htsjdk.samtools.SamReader) Collectors(java.util.stream.Collectors) File(java.io.File) SAMRecord(htsjdk.samtools.SAMRecord) List(java.util.List) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) VariantContext(htsjdk.variant.variantcontext.VariantContext) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) SamReader(htsjdk.samtools.SamReader) Counter(com.github.lindenb.jvarkit.util.Counter) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashSet(java.util.HashSet) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) CigarOperator(htsjdk.samtools.CigarOperator) CigarElement(htsjdk.samtools.CigarElement) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) Allele(htsjdk.variant.variantcontext.Allele) Cigar(htsjdk.samtools.Cigar) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) SAMRecord(htsjdk.samtools.SAMRecord) File(java.io.File)

Example 29 with SAMRecordIterator

use of htsjdk.samtools.SAMRecordIterator in project jvarkit by lindenb.

the class Biostar90204 method doWork.

@Override
public int doWork(final List<String> args) {
    if (this.suffix_length < 0) {
        LOG.error("Bad value of suffix_length:" + this.suffix_length);
        return -1;
    }
    if (this.record_per_file < 1L) {
        LOG.error("Bad value of record_per_file:" + this.record_per_file);
        return -1;
    }
    SAMFileWriter sfw = null;
    SAMRecordIterator iter = null;
    SamReader samFileReader = null;
    PrintWriter manifest = new PrintWriter(new NullOuputStream());
    try {
        samFileReader = super.openSamReader(oneFileOrNull(args));
        final SAMFileHeader header = samFileReader.getFileHeader();
        int split_file_number = 0;
        long nReads = 0L;
        iter = samFileReader.iterator();
        if (this.manifestFile != null) {
            manifest.close();
            manifest = new PrintWriter(manifestFile);
        }
        while (iter.hasNext()) {
            final SAMRecord rec = iter.next();
            if (this.samRecordFilter.filterOut(rec))
                continue;
            ++nReads;
            if (sfw == null) {
                split_file_number++;
                final String pathname = (this.prefix.isEmpty() ? "" : this.prefix + ".") + String.format("%0" + suffix_length + "d", split_file_number) + ".bam";
                final File out = new File(pathname);
                manifest.write(pathname);
                manifest.write("\t" + (nReads) + "\t");
                final SAMFileHeader header2 = header.clone();
                header2.addComment("SPLIT:" + split_file_number);
                header2.addComment("SPLIT:Starting from Read" + nReads);
                sfw = this.writingBamArgs.openSAMFileWriter(out, header2, true);
            }
            sfw.addAlignment(rec);
            if (nReads % record_per_file == 0) {
                sfw.close();
                manifest.write((nReads) + "\n");
                sfw = null;
            }
        }
        if (sfw != null) {
            sfw.close();
            manifest.write((nReads) + "\n");
        }
        manifest.flush();
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(manifest);
        CloserUtil.close(sfw);
        CloserUtil.close(iter);
        CloserUtil.close(samFileReader);
    }
    return 0;
}
Also used : SamReader(htsjdk.samtools.SamReader) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMRecord(htsjdk.samtools.SAMRecord) NullOuputStream(com.github.lindenb.jvarkit.io.NullOuputStream) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File) PrintWriter(java.io.PrintWriter)

Example 30 with SAMRecordIterator

use of htsjdk.samtools.SAMRecordIterator in project jvarkit by lindenb.

the class Biostar59647 method doWork.

@Override
public int doWork(final List<String> args) {
    IndexedFastaSequenceFile indexedFastaSequenceFile = null;
    SamReader samFileReader = null;
    PrintStream pout;
    try {
        GenomicSequence genomicSequence = null;
        indexedFastaSequenceFile = new IndexedFastaSequenceFile(refFile);
        samFileReader = null;
        final String bamFile = oneFileOrNull(args);
        samFileReader = super.openSamReader(bamFile);
        if (!SequenceUtil.areSequenceDictionariesEqual(indexedFastaSequenceFile.getSequenceDictionary(), samFileReader.getFileHeader().getSequenceDictionary())) {
            LOG.warning("Not the same sequence dictionaries");
        }
        final XMLOutputFactory xmlfactory = XMLOutputFactory.newInstance();
        pout = (outputFile == null ? stdout() : new PrintStream(this.outputFile));
        final XMLStreamWriter w = xmlfactory.createXMLStreamWriter(pout, "UTF-8");
        w.writeStartDocument("UTF-8", "1.0");
        w.writeStartElement("sam");
        w.writeAttribute("bam", (bamFile == null ? "stdin" : bamFile));
        w.writeAttribute("ref", refFile.getPath());
        w.writeComment(getProgramCommandLine());
        final SAMSequenceDictionaryProgress progess = new SAMSequenceDictionaryProgress(samFileReader.getFileHeader().getSequenceDictionary());
        final SAMRecordIterator iter = samFileReader.iterator();
        while (iter.hasNext()) {
            final SAMRecord rec = iter.next();
            progess.watch(rec);
            final byte[] readbases = rec.getReadBases();
            w.writeStartElement("read");
            w.writeStartElement("name");
            w.writeCharacters(rec.getReadName());
            w.writeEndElement();
            w.writeStartElement("sequence");
            w.writeCharacters(new String(readbases));
            w.writeEndElement();
            w.writeStartElement("flags");
            for (SAMFlag f : SAMFlag.values()) {
                w.writeAttribute(f.name(), String.valueOf(f.isSet(rec.getFlags())));
            }
            w.writeCharacters(String.valueOf(rec.getFlags()));
            // flags
            w.writeEndElement();
            if (!rec.getReadUnmappedFlag()) {
                w.writeStartElement("qual");
                w.writeCharacters(String.valueOf(rec.getMappingQuality()));
                w.writeEndElement();
                w.writeStartElement("chrom");
                w.writeAttribute("index", String.valueOf(rec.getReferenceIndex()));
                w.writeCharacters(rec.getReferenceName());
                w.writeEndElement();
                w.writeStartElement("pos");
                w.writeCharacters(String.valueOf(rec.getAlignmentStart()));
                w.writeEndElement();
                w.writeStartElement("cigar");
                w.writeCharacters(rec.getCigarString());
                w.writeEndElement();
            }
            if (rec.getReadPairedFlag() && !rec.getMateUnmappedFlag()) {
                w.writeStartElement("mate-chrom");
                w.writeAttribute("index", String.valueOf(rec.getMateReferenceIndex()));
                w.writeCharacters(rec.getMateReferenceName());
                w.writeEndElement();
                w.writeStartElement("mate-pos");
                w.writeCharacters(String.valueOf(rec.getMateAlignmentStart()));
                w.writeEndElement();
            }
            if (!rec.getReadUnmappedFlag() && rec.getCigar() != null) {
                if (genomicSequence == null || !genomicSequence.getChrom().equals(rec.getReferenceName())) {
                    genomicSequence = new GenomicSequence(indexedFastaSequenceFile, rec.getReferenceName());
                }
                w.writeStartElement("align");
                int readIndex = 0;
                int refIndex = rec.getAlignmentStart();
                for (final CigarElement e : rec.getCigar().getCigarElements()) {
                    switch(e.getOperator()) {
                        // ignore hard clips
                        case H:
                            break;
                        // ignore pads
                        case P:
                            break;
                        // cont.
                        case I:
                        case S:
                            {
                                final int length = e.getLength();
                                for (int i = 0; i < length; ++i) {
                                    w.writeEmptyElement(e.getOperator().name());
                                    w.writeAttribute("read-index", String.valueOf(readIndex + 1));
                                    if (readIndex >= 0 && readIndex < readbases.length) {
                                        w.writeAttribute("read-base", String.valueOf((char) (readbases[readIndex])));
                                    }
                                    readIndex++;
                                }
                                break;
                            }
                        // cont. -- reference skip
                        case N:
                        case D:
                            {
                                final int length = e.getLength();
                                for (int i = 0; i < length; ++i) {
                                    w.writeEmptyElement(e.getOperator().name());
                                    w.writeAttribute("ref-index", String.valueOf(refIndex));
                                    if (refIndex >= 1 && refIndex <= genomicSequence.length()) {
                                        w.writeAttribute("ref-base", String.valueOf(genomicSequence.charAt(refIndex - 1)));
                                    }
                                    refIndex++;
                                }
                                break;
                            }
                        case M:
                        case EQ:
                        case X:
                            {
                                final int length = e.getLength();
                                for (int i = 0; i < length; ++i) {
                                    w.writeEmptyElement(e.getOperator().name());
                                    char baseRead = '\0';
                                    if (readIndex >= 0 && readIndex < readbases.length) {
                                        baseRead = (char) (rec.getReadBases()[readIndex]);
                                        w.writeAttribute("read-index", String.valueOf(readIndex + 1));
                                        w.writeAttribute("read-base", String.valueOf(baseRead));
                                    }
                                    w.writeAttribute("ref-index", String.valueOf(refIndex));
                                    if (refIndex >= 1 && refIndex <= genomicSequence.length()) {
                                        char baseRef = genomicSequence.charAt(refIndex - 1);
                                        w.writeAttribute("ref-base", String.valueOf(baseRef));
                                        if (Character.toUpperCase(baseRef) != Character.toUpperCase(baseRead)) {
                                            w.writeAttribute("mismatch", "true");
                                        }
                                    }
                                    refIndex++;
                                    readIndex++;
                                }
                                break;
                            }
                        default:
                            throw new IllegalStateException("Case statement didn't deal with cigar op: " + e.getOperator());
                    }
                }
                w.writeEndElement();
            }
            w.writeEndElement();
        }
        iter.close();
        w.writeEndElement();
        w.writeEndDocument();
        w.flush();
        pout.flush();
        CloserUtil.close(w);
        CloserUtil.close(pout);
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(samFileReader);
        CloserUtil.close(indexedFastaSequenceFile);
    }
    return 0;
}
Also used : PrintStream(java.io.PrintStream) XMLOutputFactory(javax.xml.stream.XMLOutputFactory) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMFlag(htsjdk.samtools.SAMFlag) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) CigarElement(htsjdk.samtools.CigarElement) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) SamReader(htsjdk.samtools.SamReader) XMLStreamWriter(javax.xml.stream.XMLStreamWriter) SAMRecord(htsjdk.samtools.SAMRecord)

Aggregations

SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)107 SAMRecord (htsjdk.samtools.SAMRecord)92 SamReader (htsjdk.samtools.SamReader)83 SAMFileHeader (htsjdk.samtools.SAMFileHeader)49 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)47 File (java.io.File)47 SAMFileWriter (htsjdk.samtools.SAMFileWriter)45 IOException (java.io.IOException)41 ArrayList (java.util.ArrayList)34 CigarElement (htsjdk.samtools.CigarElement)30 Cigar (htsjdk.samtools.Cigar)26 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)24 SamReaderFactory (htsjdk.samtools.SamReaderFactory)21 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)18 CigarOperator (htsjdk.samtools.CigarOperator)16 Interval (htsjdk.samtools.util.Interval)16 PrintWriter (java.io.PrintWriter)15 HashMap (java.util.HashMap)15 SAMFileWriterFactory (htsjdk.samtools.SAMFileWriterFactory)14 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)14