Search in sources :

Example 66 with SamReader

use of htsjdk.samtools.SamReader in project jvarkit by lindenb.

the class CommBams method doWork.

@Override
public int doWork(final List<String> args) {
    if (args.size() != 2) {
        LOG.info("Expected two and only two bams please, but got " + args.size());
        return -1;
    }
    final Comparator<SAMRecord> comparator = this.sortMethod.get();
    final SamReader[] samFileReaders = { null, null };
    @SuppressWarnings("unchecked") final PeekableIterator<SAMRecord>[] iters = new PeekableIterator[] { null, null };
    PrintWriter out = null;
    Optional<SAMRecord> rec0 = Optional.empty();
    Optional<SAMRecord> rec1 = Optional.empty();
    if (whatPrint.equals(WhatToPrint.name) && hide1 && hide2 && hide3) {
        LOG.error("all flags hide** are on");
        return -1;
    }
    try {
        if (this.delim.equals("\\n"))
            this.delim = "\n";
        for (int i = 0; i < args.size() && i < samFileReaders.length; ++i) {
            final String samFile = args.get(i);
            LOG.info("opening " + samFile);
            samFileReaders[i] = super.openSamReader(samFile);
            final SAMFileHeader header = samFileReaders[i].getFileHeader();
            if (header.getSortOrder() != SAMFileHeader.SortOrder.queryname) {
                LOG.error("Expected " + samFile + " to be sorted on " + SAMFileHeader.SortOrder.queryname + " but got " + header.getSortOrder());
                return -1;
            }
            iters[i] = new PeekableIterator<>(samFileReaders[i].iterator());
        }
        out = super.openFileOrStdoutAsPrintWriter(outputFile);
        for (; ; ) {
            for (int i = 0; i < 2; ++i) {
                Optional<SAMRecord> optRec = (i == 0 ? rec0 : rec1);
                if (!optRec.isPresent()) {
                    while (iters[i].hasNext()) {
                        final SAMRecord rec = iters[i].peek();
                        if (optRec.isPresent() && comparator.compare(optRec.get(), rec) > 0) {
                            LOG.error("Something is wrong in sort order of " + args.get(i) + " : got\n\t" + rec + " " + side(rec) + "\nafter\n\t" + optRec.get() + " " + side(optRec.get()) + "\nSee also option (samtools querysort)");
                            return -1;
                        } else // equals
                        if (optRec.isPresent() && comparator.compare(optRec.get(), rec) == 0) {
                            // consumme
                            iters[i].next();
                        } else // it's a new
                        if (!optRec.isPresent()) {
                            // consumme
                            optRec = Optional.of(iters[i].next());
                            if (i == 0) {
                                rec0 = optRec;
                            } else {
                                rec1 = optRec;
                            }
                        } else // compare <0
                        {
                            break;
                        }
                    }
                }
            }
            if (!rec0.isPresent() && !rec1.isPresent())
                break;
            if ((!rec0.isPresent() && rec1.isPresent()) || (rec0.isPresent() && rec1.isPresent() && comparator.compare(rec0.get(), rec1.get()) > 0)) {
                dump(out, null, rec1.get());
                rec1 = Optional.empty();
            } else if ((rec0.isPresent() && !rec1.isPresent()) || (rec0.isPresent() && rec1.isPresent() && comparator.compare(rec0.get(), rec1.get()) < 0)) {
                dump(out, rec0.get(), null);
                rec0 = Optional.empty();
            } else {
                dump(out, rec0.get(), rec1.get());
                rec0 = Optional.empty();
                rec1 = Optional.empty();
            }
        }
        out.flush();
        out.close();
        out = null;
        return 0;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        for (int i = 0; i < samFileReaders.length; ++i) {
            CloserUtil.close(iters[i]);
            CloserUtil.close(samFileReaders[i]);
        }
        CloserUtil.close(out);
    }
}
Also used : SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) PeekableIterator(htsjdk.samtools.util.PeekableIterator) SAMFileHeader(htsjdk.samtools.SAMFileHeader) PrintWriter(java.io.PrintWriter)

Example 67 with SamReader

use of htsjdk.samtools.SamReader in project jvarkit by lindenb.

the class CompareBamAndBuild method doWork.

@Override
public int doWork(final List<String> args) {
    PrintWriter out = null;
    SortingCollection<Match> database = null;
    if (this.chainFile == null) {
        LOG.error("Chain file is not defined Option");
        return -1;
    }
    if (args.size() != 2) {
        LOG.error("Illegal number of arguments. Expected two indexed BAMS.");
        return -1;
    }
    try {
        LOG.info("load chain file");
        this.liftOver = new LiftOver(this.chainFile);
        database = SortingCollection.newInstance(Match.class, new MatchCodec(), new MatchOrdererInSortingCollection(), this.maxRecordsInRam, this.tmpDir.toPath());
        database.setDestructiveIteration(true);
        for (int currentSamFileIndex = 0; currentSamFileIndex < 2; currentSamFileIndex++) {
            final File samFile = new File(args.get(currentSamFileIndex));
            LOG.info("read " + samFile);
            this.bamFiles[currentSamFileIndex] = samFile;
            SamReader samFileReader = CompareBamAndBuild.this.openSamReader(samFile.getPath());
            final SAMSequenceDictionary dict = samFileReader.getFileHeader().getSequenceDictionary();
            this.sequenceDictionaries[currentSamFileIndex] = dict;
            if (dict.isEmpty()) {
                samFileReader.close();
                LOG.error("Empty Dict  in " + samFile);
                return -1;
            }
            final Interval interval;
            if (REGION != null) {
                IntervalParser intervalParser = new IntervalParser(dict);
                interval = intervalParser.parse(this.REGION);
                if (interval == null) {
                    samFileReader.close();
                    LOG.error("Cannot parse " + REGION + " (bad syntax or not in dictionary");
                    return -1;
                }
            } else {
                interval = null;
            }
            final SAMRecordIterator iter;
            if (interval == null) {
                iter = samFileReader.iterator();
            } else {
                iter = samFileReader.queryOverlapping(interval.getContig(), interval.getStart(), interval.getEnd());
            }
            final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict);
            while (iter.hasNext()) {
                final SAMRecord rec = progress.watch(iter.next());
                if (rec.isSecondaryOrSupplementary())
                    continue;
                final Match m = new Match();
                m.flag = rec.getFlags();
                m.readName = rec.getReadName();
                m.firstBamFile = currentSamFileIndex == 0;
                if (rec.getReadUnmappedFlag()) {
                    m.tid = -1;
                    m.pos = -1;
                } else {
                    m.tid = rec.getReferenceIndex();
                    m.pos = rec.getAlignmentStart();
                }
                database.add(m);
            }
            iter.close();
            samFileReader.close();
            samFileReader = null;
            LOG.info("Close " + samFile);
        }
        database.doneAdding();
        LOG.info("Writing results....");
        out = super.openFileOrStdoutAsPrintWriter(this.outputFile);
        // compute the differences for each read
        out.print("#READ-Name\tCOMPARE");
        for (final File f : this.bamFiles) {
            out.print("\t" + f);
        }
        out.println();
        /* create an array of set<Match> */
        final MatchComparator match_comparator = new MatchComparator();
        final List<Set<Match>> matches = new ArrayList<Set<CompareBamAndBuild.Match>>(2);
        while (matches.size() < 2) {
            matches.add(new TreeSet<CompareBamAndBuild.Match>(match_comparator));
        }
        CloseableIterator<Match> iter = database.iterator();
        String currReadName = null;
        int curr_num_in_pair = -1;
        for (; ; ) {
            final Match nextMatch;
            if (iter.hasNext()) {
                nextMatch = iter.next();
            } else {
                nextMatch = null;
            }
            if (nextMatch == null || (currReadName != null && !currReadName.equals(nextMatch.readName)) || (curr_num_in_pair != -1 && curr_num_in_pair != nextMatch.indexInPair())) {
                if (currReadName != null) {
                    out.print(currReadName);
                    if (curr_num_in_pair > 0) {
                        out.print("/");
                        out.print(curr_num_in_pair);
                    }
                    out.print("\t");
                    if (same(matches.get(0), matches.get(1))) {
                        out.print("EQ");
                    } else {
                        out.print("NE");
                    }
                    for (int x = 0; x < 2; ++x) {
                        out.print("\t");
                        print(out, matches.get(x));
                    }
                    out.println();
                }
                if (nextMatch == null)
                    break;
                for (final Set<Match> set : matches) set.clear();
            }
            currReadName = nextMatch.readName;
            curr_num_in_pair = nextMatch.indexInPair();
            matches.get(nextMatch.firstBamFile ? 0 : 1).add(nextMatch);
        }
        iter.close();
        out.flush();
        out.close();
        database.cleanup();
        return RETURN_OK;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(out);
        this.liftOver = null;
    }
}
Also used : LiftOver(htsjdk.samtools.liftover.LiftOver) IntervalParser(com.github.lindenb.jvarkit.util.bio.IntervalParser) TreeSet(java.util.TreeSet) Set(java.util.Set) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) ArrayList(java.util.ArrayList) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) IOException(java.io.IOException) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) File(java.io.File) PrintWriter(java.io.PrintWriter) Interval(htsjdk.samtools.util.Interval)

Example 68 with SamReader

use of htsjdk.samtools.SamReader in project jvarkit by lindenb.

the class CompareBams method doWork.

@Override
public int doWork(final List<String> args) {
    this.IN.addAll(args.stream().map(S -> new File(S)).collect(Collectors.toList()));
    SortingCollection<Match> database = null;
    SamReader samFileReader = null;
    CloseableIterator<Match> iter = null;
    try {
        if (this.IN.size() < 2) {
            LOG.error("Need more bams please");
            return -1;
        }
        database = SortingCollection.newInstance(Match.class, new MatchCodec(), new MatchOrderer(), this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
        this.samSequenceDictAreTheSame = true;
        database.setDestructiveIteration(true);
        for (int currentSamFileIndex = 0; currentSamFileIndex < this.IN.size(); currentSamFileIndex++) {
            final File samFile = this.IN.get(currentSamFileIndex);
            LOG.info("Opening " + samFile);
            samFileReader = super.createSamReaderFactory().open(samFile);
            final SAMSequenceDictionary dict = samFileReader.getFileHeader().getSequenceDictionary();
            if (dict == null || dict.isEmpty()) {
                LOG.error("Empty Dict  in " + samFile);
                return -1;
            }
            if (!this.sequenceDictionaries.isEmpty() && !SequenceUtil.areSequenceDictionariesEqual(this.sequenceDictionaries.get(0), dict)) {
                this.samSequenceDictAreTheSame = false;
                LOG.warn("FOOL !! THE SEQUENCE DICTIONARIES ARE **NOT** THE SAME. I will try to compare anyway but it will be slower.");
            }
            this.sequenceDictionaries.add(dict);
            final Optional<Interval> interval;
            if (REGION != null && !REGION.trim().isEmpty()) {
                final IntervalParser dix = new IntervalParser(dict);
                interval = Optional.ofNullable(dix.parse(REGION));
                if (!interval.isPresent()) {
                    LOG.error("Cannot parse " + REGION + " (bad syntax or not in dictionary)");
                    return -1;
                }
            } else {
                interval = Optional.empty();
            }
            SAMRecordIterator it = null;
            if (!interval.isPresent()) {
                it = samFileReader.iterator();
            } else {
                it = samFileReader.queryOverlapping(interval.get().getContig(), interval.get().getStart(), interval.get().getEnd());
            }
            final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict);
            while (it.hasNext()) {
                final SAMRecord rec = progress.watch(it.next());
                if (!rec.getReadUnmappedFlag()) {
                    if (rec.getMappingQuality() < this.min_mapq)
                        continue;
                    if (rec.isSecondaryOrSupplementary())
                        continue;
                }
                final Match m = new Match();
                if (rec.getReadPairedFlag()) {
                    m.num_in_pair = (rec.getFirstOfPairFlag() ? 1 : 2);
                } else {
                    m.num_in_pair = 0;
                }
                m.readName = rec.getReadName();
                m.bamIndex = currentSamFileIndex;
                m.flag = rec.getFlags();
                m.cigar = rec.getCigarString();
                if (m.cigar == null)
                    m.cigar = "";
                if (rec.getReadUnmappedFlag()) {
                    m.tid = -1;
                    m.pos = -1;
                } else {
                    m.tid = rec.getReferenceIndex();
                    m.pos = rec.getAlignmentStart();
                }
                database.add(m);
            }
            it.close();
            samFileReader.close();
            samFileReader = null;
            LOG.info("Close " + samFile);
        }
        database.doneAdding();
        LOG.info("Writing results....");
        this.out = super.openFileOrStdoutAsPrintWriter(this.outputFile);
        // compute the differences for each read
        this.out.print("#READ-Name\t");
        for (int x = 0; x < this.IN.size(); ++x) {
            for (int y = x + 1; y < this.IN.size(); ++y) {
                if (!(x == 0 && y == 1))
                    this.out.print("|");
                this.out.print(IN.get(x));
                this.out.print(" ");
                this.out.print(IN.get(y));
            }
        }
        for (int x = 0; x < this.IN.size(); ++x) {
            this.out.print("\t" + IN.get(x));
        }
        this.out.println();
        /* create an array of set<Match> */
        final MatchComparator match_comparator = new MatchComparator();
        final List<Set<Match>> matches = new ArrayList<Set<CompareBams.Match>>(this.IN.size());
        while (matches.size() < this.IN.size()) {
            matches.add(new TreeSet<CompareBams.Match>(match_comparator));
        }
        iter = database.iterator();
        String currReadName = null;
        int curr_num_in_pair = -1;
        for (; ; ) {
            Match nextMatch = null;
            if (iter.hasNext()) {
                nextMatch = iter.next();
            }
            if (nextMatch == null || (currReadName != null && !currReadName.equals(nextMatch.readName)) || (curr_num_in_pair != -1 && curr_num_in_pair != nextMatch.num_in_pair)) {
                if (currReadName != null) {
                    this.out.print(currReadName);
                    if (curr_num_in_pair > 0) {
                        this.out.print("/");
                        this.out.print(curr_num_in_pair);
                    }
                    this.out.print("\t");
                    for (int x = 0; x < this.IN.size(); ++x) {
                        final Set<Match> first = matches.get(x);
                        for (int y = x + 1; y < this.IN.size(); ++y) {
                            if (!(x == 0 && y == 1))
                                this.out.print("|");
                            Set<Match> second = matches.get(y);
                            if (same(first, second)) {
                                this.out.print("EQ");
                            } else {
                                this.out.print("NE");
                            }
                        }
                    }
                    for (int x = 0; x < this.IN.size(); ++x) {
                        this.out.print("\t");
                        print(matches.get(x), sequenceDictionaries.get(x));
                    }
                    this.out.println();
                }
                if (nextMatch == null)
                    break;
                for (Set<Match> set : matches) set.clear();
            }
            currReadName = nextMatch.readName;
            curr_num_in_pair = nextMatch.num_in_pair;
            matches.get(nextMatch.bamIndex).add(nextMatch);
            if (this.out.checkError())
                break;
        }
        iter.close();
        this.out.flush();
        return RETURN_OK;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        if (database != null)
            database.cleanup();
        CloserUtil.close(samFileReader);
        CloserUtil.close(this.out);
        this.out = null;
    }
}
Also used : IntervalParser(com.github.lindenb.jvarkit.util.bio.IntervalParser) TreeSet(java.util.TreeSet) Set(java.util.Set) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) ArrayList(java.util.ArrayList) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) IOException(java.io.IOException) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) File(java.io.File) Interval(htsjdk.samtools.util.Interval)

Example 69 with SamReader

use of htsjdk.samtools.SamReader in project jvarkit by lindenb.

the class HaloplexParasite method doVcfToVcf.

@Override
protected int doVcfToVcf(String inputName, VcfIterator in, VariantContextWriter out) {
    SamReader samReader = null;
    final List<Mutation> mutations = new ArrayList<>();
    try {
        final Set<File> bamFiles = Files.lines(this.bamList.toPath()).filter(T -> !(T.isEmpty() || T.startsWith("#"))).map(T -> new File(T)).collect(Collectors.toSet());
        final VCFHeader header = new VCFHeader();
        header.setSequenceDictionary(in.getHeader().getSequenceDictionary());
        final VCFFilterHeaderLine filter = new VCFFilterHeaderLine("HALOPLEXPARASITE", "fails Parasite Haloplex Sequence");
        final VCFInfoHeaderLine infoWord = new VCFInfoHeaderLine(filter.getID(), 1, VCFHeaderLineType.String, "Parasite Haloplex Sequence (Word|Count|Fraction)");
        super.addMetaData(header);
        out.writeHeader(header);
        header.addMetaDataLine(filter);
        header.addMetaDataLine(infoWord);
        while (in.hasNext()) {
            final VariantContext ctx = in.next();
            final VariantContextBuilder vcb = new VariantContextBuilder(inputName, ctx.getContig(), ctx.getStart(), ctx.getEnd(), ctx.getAlleles());
            if (!(ctx.isIndel() || ctx.isMixed())) {
                out.add(vcb.make());
                continue;
            }
            if (!vcb.getAlleles().stream().filter(A -> !(A.isSymbolic() || A.isNoCall() || A.length() < this.minClipSize)).findAny().isPresent()) {
                out.add(vcb.make());
                continue;
            }
            final Mutation mut = new Mutation(ctx);
            mutations.add(mut);
        }
        final Counter<String> words = new Counter<>();
        for (final File bamFile : bamFiles) {
            LOG.info("Opening " + bamFile);
            samReader = createSamReaderFactory().open(bamFile);
            for (final Mutation mut : mutations) {
                // words seen in that mutation : don't use a Counter
                final Set<String> mutWords = new HashSet<>();
                /* loop over reads overlapping that mutation */
                final SAMRecordIterator sri = samReader.queryOverlapping(mut.contig, mut.start, mut.end);
                while (sri.hasNext()) {
                    final SAMRecord rec = sri.next();
                    if (rec.getReadUnmappedFlag())
                        continue;
                    if (rec.isSecondaryOrSupplementary())
                        continue;
                    if (rec.getDuplicateReadFlag())
                        continue;
                    if (rec.getReadFailsVendorQualityCheckFlag())
                        continue;
                    final Cigar cigar = rec.getCigar();
                    if (cigar.numCigarElements() == 1)
                        continue;
                    final byte[] bases = rec.getReadBases();
                    int refpos = rec.getUnclippedStart();
                    int readpos = 0;
                    /* scan cigar overlapping that mutation */
                    for (final CigarElement ce : cigar) {
                        final CigarOperator op = ce.getOperator();
                        final int ref_end = refpos + (op.consumesReferenceBases() || op.isClipping() ? ce.getLength() : 0);
                        final int read_end = readpos + (op.consumesReadBases() ? ce.getLength() : 0);
                        /* check clip is large enough */
                        if (op.equals(CigarOperator.S) && ce.getLength() >= this.minClipSize) {
                            /* check clip overlap mutation */
                            if (!(ref_end < mut.start || refpos > mut.end)) {
                                /* break read of soft clip into words */
                                for (int x = 0; x + this.minClipSize <= ce.getLength(); ++x) {
                                    final String substr = new String(bases, readpos + x, this.minClipSize);
                                    if (!substr.contains("N")) {
                                        final String revcomp = AcidNucleics.reverseComplement(substr);
                                        mutWords.add(substr);
                                        if (!revcomp.equals(substr))
                                            mutWords.add(revcomp);
                                    }
                                }
                            }
                        }
                        refpos = ref_end;
                        readpos = read_end;
                    }
                }
                sri.close();
                for (final String w : mutWords) {
                    words.incr(w);
                }
            }
            // end of for(mutations)
            samReader.close();
            samReader = null;
        }
        LOG.info("mutations:" + mutations.size());
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header);
        for (final Mutation mut : mutations) {
            progress.watch(mut.contig, mut.start);
            final VariantContextBuilder vcb = new VariantContextBuilder(inputName, mut.contig, mut.start, mut.end, mut.alleles);
            String worstString = null;
            Double worstFraction = null;
            final double totalWords = words.getTotal();
            for (final Allele a : mut.alleles) {
                if (a.isSymbolic() || a.isNoCall() || a.length() < this.minClipSize)
                    continue;
                for (int x = 0; x + this.minClipSize <= a.length(); ++x) {
                    final String substr = new String(a.getBases(), x, this.minClipSize);
                    final long count = words.count(substr);
                    final double fraction = count / totalWords;
                    if (worstFraction == null || worstFraction < fraction) {
                        worstString = substr + "|" + count + "|" + fraction;
                        worstFraction = fraction;
                    }
                }
            }
            if (worstString != null) {
                vcb.attribute(infoWord.getID(), worstString);
            }
            if (worstFraction != null && worstFraction.doubleValue() >= this.tresholdFraction) {
                vcb.filter(filter.getID());
            }
            out.add(vcb.make());
        }
        progress.finish();
        return RETURN_OK;
    } catch (Exception e) {
        LOG.error(e);
        return -1;
    } finally {
        CloserUtil.close(samReader);
    }
}
Also used : Cigar(htsjdk.samtools.Cigar) Allele(htsjdk.variant.variantcontext.Allele) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) VCFHeader(htsjdk.variant.vcf.VCFHeader) CigarElement(htsjdk.samtools.CigarElement) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) CigarOperator(htsjdk.samtools.CigarOperator) AcidNucleics(com.github.lindenb.jvarkit.util.bio.AcidNucleics) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) CloserUtil(htsjdk.samtools.util.CloserUtil) Counter(com.github.lindenb.jvarkit.util.Counter) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) VCFHeaderLineType(htsjdk.variant.vcf.VCFHeaderLineType) Files(java.nio.file.Files) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) SamReader(htsjdk.samtools.SamReader) Collectors(java.util.stream.Collectors) File(java.io.File) SAMRecord(htsjdk.samtools.SAMRecord) List(java.util.List) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) VariantContext(htsjdk.variant.variantcontext.VariantContext) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) SamReader(htsjdk.samtools.SamReader) Counter(com.github.lindenb.jvarkit.util.Counter) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashSet(java.util.HashSet) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) CigarOperator(htsjdk.samtools.CigarOperator) CigarElement(htsjdk.samtools.CigarElement) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) Allele(htsjdk.variant.variantcontext.Allele) Cigar(htsjdk.samtools.Cigar) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) SAMRecord(htsjdk.samtools.SAMRecord) File(java.io.File)

Example 70 with SamReader

use of htsjdk.samtools.SamReader in project jvarkit by lindenb.

the class IgvReview method start.

@Override
public void start(final Stage stage) throws Exception {
    stage.setTitle(getClass().getSimpleName());
    Predicate<VariantContext> ctxFilter;
    Map<String, String> params = super.getParameters().getNamed();
    if (params.containsKey("--filter")) {
        ctxFilter = JexlVariantPredicate.create(params.get("--filter"));
    } else {
        ctxFilter = V -> true;
    }
    final List<String> args = super.getParameters().getUnnamed();
    final File configFile;
    if (args.isEmpty()) {
        final FileChooser fc = new FileChooser();
        final String lastDirStr = preferences.get(LAST_USED_DIR_KEY, null);
        if (lastDirStr != null && !lastDirStr.isEmpty()) {
            fc.setInitialDirectory(new File(lastDirStr));
        }
        fc.getExtensionFilters().addAll(Collections.singletonList(new FileChooser.ExtensionFilter("Config file", "*.config", "*.cfg", "*.list")));
        configFile = fc.showOpenDialog(stage);
    } else if (args.size() == 1) {
        configFile = new File(args.get(0));
    } else {
        configFile = null;
    }
    if (configFile == null || !configFile.exists()) {
        JfxUtils.dialog().cause("Illegal number of arguments or file doesn't exists.").show(stage);
        Platform.exit();
        return;
    }
    if (configFile.isFile() && configFile.getParentFile() != null) {
        this.preferences.put(LAST_USED_DIR_KEY, configFile.getParentFile().getPath());
    }
    final List<String> configLines = Files.readAllLines(configFile.toPath());
    final Predicate<String> ignoreComment = (S) -> !S.startsWith("#");
    final Predicate<String> predVcf = S -> S.endsWith(".vcf") || S.endsWith(".vcf.gz");
    if (configLines.stream().filter(ignoreComment).filter(predVcf).count() != 1) {
        JfxUtils.dialog().cause("Found more than one vcf file in " + configFile).show(stage);
        Platform.exit();
        return;
    }
    final File vcfFile = configLines.stream().filter(ignoreComment).filter(predVcf).map(S -> new File(S)).findFirst().get();
    LOG.info("Opening vcf file and loading in memory");
    VCFFileReader vfr = null;
    CloseableIterator<VariantContext> iter = null;
    final Set<String> sampleNames;
    try {
        this.variants.clear();
        vfr = new VCFFileReader(vcfFile, false);
        this.vcfHeader = vfr.getFileHeader();
        sampleNames = new HashSet<>(this.vcfHeader.getSampleNamesInOrder());
        if (sampleNames.isEmpty()) {
            JfxUtils.dialog().cause("No Genotypes in " + vcfFile).show(stage);
            Platform.exit();
            return;
        }
        iter = vfr.iterator();
        this.variants.addAll(iter.stream().filter(ctxFilter).filter(CTX -> CTX.isVariant()).collect(Collectors.toList()));
    } catch (final Exception err) {
        JfxUtils.dialog().cause(err).show(stage);
        Platform.exit();
        return;
    } finally {
        CloserUtil.close(iter);
        CloserUtil.close(vfr);
    }
    if (this.variants.isEmpty()) {
        JfxUtils.dialog().cause("No Variants").show(stage);
        Platform.exit();
        return;
    }
    final SAMSequenceDictionary dict = this.vcfHeader.getSequenceDictionary();
    if (dict == null || dict.isEmpty()) {
        JfxUtils.dialog().cause(JvarkitException.VcfDictionaryMissing.getMessage(vcfFile.getPath())).show(stage);
        Platform.exit();
        return;
    }
    final SamReaderFactory srf = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT);
    configLines.stream().filter(ignoreComment).filter(S -> S.endsWith(".bam")).map(S -> new File(S)).forEach(F -> {
        final SamReader samIn = srf.open(F);
        final SAMFileHeader header = samIn.getFileHeader();
        CloserUtil.close(samIn);
        String sample = null;
        for (final SAMReadGroupRecord rg : header.getReadGroups()) {
            String s = rg.getSample();
            if (s == null)
                continue;
            if (sample == null) {
                sample = s;
            } else if (!sample.equals(s)) {
                JfxUtils.dialog().cause("Two samples in " + F).show(stage);
                Platform.exit();
                return;
            }
        }
        if (sample == null) {
            JfxUtils.dialog().cause("No sample in " + F + ". Ignoring").show(stage);
            return;
        }
        if (!sampleNames.contains(sample)) {
            JfxUtils.dialog().cause("Not in VCF header " + sample + " / " + F + ". Ignoring").show(stage);
            return;
        }
        this.sample2bamFile.put(sample, F);
    });
    if (this.sample2bamFile.isEmpty()) {
        JfxUtils.dialog().cause("No valid bam file in " + configFile).show(stage);
        return;
    }
    sampleNames.retainAll(this.sample2bamFile.keySet());
    if (sampleNames.isEmpty()) {
        JfxUtils.dialog().cause("No Sample associated to bam").show(stage);
        return;
    }
    ObservableList<VariantAndGenotype> genotypes = FXCollections.observableArrayList(this.variants.stream().flatMap(CTX -> CTX.getGenotypes().stream().filter(G -> sampleNames.contains(G.getSampleName())).map(G -> new VariantAndGenotype(CTX, G))).collect(Collectors.toList()));
    if (genotypes.isEmpty()) {
        JfxUtils.dialog().cause("No Genotype to show").show(stage);
        return;
    }
    Menu menu = new Menu("File");
    MenuItem menuItem = new MenuItem("Save as...");
    menuItem.setOnAction(AE -> {
        saveVariantsAs(stage);
    });
    menu.getItems().add(menuItem);
    menuItem = new MenuItem("Save");
    menuItem.setOnAction(AE -> {
        if (this.saveAsFile != null) {
            saveVariants(stage, this.saveAsFile);
        } else {
            saveVariantsAs(stage);
        }
    });
    menu.getItems().add(menuItem);
    menu.getItems().add(new SeparatorMenuItem());
    menuItem = new MenuItem("Quit");
    menuItem.setOnAction(AE -> {
        Platform.exit();
    });
    menu.getItems().add(menuItem);
    MenuBar bar = new MenuBar(menu);
    this.genotypeTable = new TableView<>(genotypes);
    this.genotypeTable.getColumns().add(makeColumn("CHROM", G -> G.ctx.getContig()));
    this.genotypeTable.getColumns().add(makeColumn("POS", G -> G.ctx.getStart()));
    this.genotypeTable.getColumns().add(makeColumn("ID", G -> G.ctx.getID()));
    this.genotypeTable.getColumns().add(makeColumn("REF", G -> G.ctx.getReference().getDisplayString()));
    this.genotypeTable.getColumns().add(makeColumn("ALT", G -> G.ctx.getAlternateAlleles().stream().map(A -> A.getDisplayString()).collect(Collectors.joining(","))));
    this.genotypeTable.getColumns().add(makeColumn("Sample", G -> G.g.getSampleName()));
    this.genotypeTable.getColumns().add(makeColumn("Type", G -> G.g.getType().name()));
    this.genotypeTable.getColumns().add(makeColumn("Alleles", G -> G.g.getAlleles().stream().map(A -> A.getDisplayString()).collect(Collectors.joining(","))));
    TableColumn<VariantAndGenotype, String> reviewCol = new TableColumn<>("Review");
    reviewCol.setCellValueFactory(C -> C.getValue().getReviewProperty());
    reviewCol.setCellFactory(TextFieldTableCell.forTableColumn());
    reviewCol.setOnEditCommit((E) -> {
        int y = E.getTablePosition().getRow();
        this.genotypeTable.getItems().get(y).setReview(E.getNewValue());
    });
    reviewCol.setEditable(true);
    this.genotypeTable.getColumns().add(reviewCol);
    this.genotypeTable.getSelectionModel().cellSelectionEnabledProperty().set(true);
    this.genotypeTable.setEditable(true);
    final ContextMenu cm = new ContextMenu();
    MenuItem mi1 = new MenuItem("Menu 1");
    cm.getItems().add(mi1);
    MenuItem mi2 = new MenuItem("Menu 2");
    cm.getItems().add(mi2);
    this.genotypeTable.setOnMousePressed(event -> {
        if (event.isPrimaryButtonDown() && (event.getClickCount() == 3 || event.isShiftDown())) {
            moveIgvTo(stage, genotypeTable.getSelectionModel().getSelectedItem());
        } else if (event.isSecondaryButtonDown()) {
            cm.show(genotypeTable, event.getScreenX(), event.getScreenY());
        }
    });
    final BorderPane pane2 = new BorderPane(this.genotypeTable);
    pane2.setPadding(new Insets(10, 10, 10, 10));
    VBox vbox1 = new VBox(bar, pane2);
    final Scene scene = new Scene(vbox1, 500, 300);
    stage.setScene(scene);
    stage.show();
}
Also used : JexlVariantPredicate(com.github.lindenb.jvarkit.util.vcf.JexlVariantPredicate) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VCFHeader(htsjdk.variant.vcf.VCFHeader) VariantContextWriterBuilder(htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder) VBox(javafx.scene.layout.VBox) SAMFileHeader(htsjdk.samtools.SAMFileHeader) Application(javafx.application.Application) Vector(java.util.Vector) ContextMenu(javafx.scene.control.ContextMenu) IgvConstants(com.github.lindenb.jvarkit.util.igv.IgvConstants) Map(java.util.Map) TableView(javafx.scene.control.TableView) CloserUtil(htsjdk.samtools.util.CloserUtil) PrintWriter(java.io.PrintWriter) MenuItem(javafx.scene.control.MenuItem) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) Predicate(java.util.function.Predicate) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) Collectors(java.util.stream.Collectors) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) Platform(javafx.application.Platform) SeparatorMenuItem(javafx.scene.control.SeparatorMenuItem) List(java.util.List) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VariantContext(htsjdk.variant.variantcontext.VariantContext) ObservableList(javafx.collections.ObservableList) BorderPane(javafx.scene.layout.BorderPane) SamReaderFactory(htsjdk.samtools.SamReaderFactory) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) Genotype(htsjdk.variant.variantcontext.Genotype) CloseableIterator(htsjdk.samtools.util.CloseableIterator) Scene(javafx.scene.Scene) Socket(java.net.Socket) SimpleStringProperty(javafx.beans.property.SimpleStringProperty) FXCollections(javafx.collections.FXCollections) HashMap(java.util.HashMap) BackingStoreException(java.util.prefs.BackingStoreException) TextFieldTableCell(javafx.scene.control.cell.TextFieldTableCell) Function(java.util.function.Function) ValidationStringency(htsjdk.samtools.ValidationStringency) ArrayList(java.util.ArrayList) TableColumn(javafx.scene.control.TableColumn) HashSet(java.util.HashSet) Insets(javafx.geometry.Insets) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) VCFHeaderLineType(htsjdk.variant.vcf.VCFHeaderLineType) MenuBar(javafx.scene.control.MenuBar) Files(java.nio.file.Files) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) IOException(java.io.IOException) SamReader(htsjdk.samtools.SamReader) InputStreamReader(java.io.InputStreamReader) File(java.io.File) Preferences(java.util.prefs.Preferences) Menu(javafx.scene.control.Menu) FileChooser(javafx.stage.FileChooser) Stage(javafx.stage.Stage) Closeable(java.io.Closeable) JfxUtils(com.github.lindenb.jvarkit.jfx.components.JfxUtils) VCFFormatHeaderLine(htsjdk.variant.vcf.VCFFormatHeaderLine) Window(javafx.stage.Window) BufferedReader(java.io.BufferedReader) Collections(java.util.Collections) BorderPane(javafx.scene.layout.BorderPane) Insets(javafx.geometry.Insets) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VariantContext(htsjdk.variant.variantcontext.VariantContext) MenuBar(javafx.scene.control.MenuBar) ContextMenu(javafx.scene.control.ContextMenu) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) SamReader(htsjdk.samtools.SamReader) FileChooser(javafx.stage.FileChooser) ContextMenu(javafx.scene.control.ContextMenu) Menu(javafx.scene.control.Menu) SamReaderFactory(htsjdk.samtools.SamReaderFactory) MenuItem(javafx.scene.control.MenuItem) SeparatorMenuItem(javafx.scene.control.SeparatorMenuItem) SeparatorMenuItem(javafx.scene.control.SeparatorMenuItem) Scene(javafx.scene.Scene) TableColumn(javafx.scene.control.TableColumn) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) BackingStoreException(java.util.prefs.BackingStoreException) IOException(java.io.IOException) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File) VBox(javafx.scene.layout.VBox)

Aggregations

SamReader (htsjdk.samtools.SamReader)211 SAMRecord (htsjdk.samtools.SAMRecord)137 File (java.io.File)111 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)89 SAMFileHeader (htsjdk.samtools.SAMFileHeader)83 IOException (java.io.IOException)71 SamReaderFactory (htsjdk.samtools.SamReaderFactory)65 ArrayList (java.util.ArrayList)63 SAMFileWriter (htsjdk.samtools.SAMFileWriter)58 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)44 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)42 List (java.util.List)39 CigarElement (htsjdk.samtools.CigarElement)32 IndexedFastaSequenceFile (htsjdk.samtools.reference.IndexedFastaSequenceFile)32 HashMap (java.util.HashMap)31 Cigar (htsjdk.samtools.Cigar)30 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)30 PrintWriter (java.io.PrintWriter)27 Interval (htsjdk.samtools.util.Interval)26 HashSet (java.util.HashSet)26