Search in sources :

Example 1 with IntervalParser

use of com.github.lindenb.jvarkit.util.bio.IntervalParser in project jvarkit by lindenb.

the class Bam2Wig method doWork.

@Override
public int doWork(final List<String> args) {
    if (this.win_shift <= 0) {
        LOG.error("window shift<=0");
        return -1;
    }
    if (this.window_span <= 0) {
        LOG.error("window size<=0");
        return -1;
    }
    final Interval interval;
    PrintWriter pw = null;
    CloseableIterator<SAMRecord> samRecordIterator = null;
    final List<SamReader> samReaders = new ArrayList<>();
    final List<CloseableIterator<SAMRecord>> merginIterators = new ArrayList<>();
    try {
        final SamReaderFactory srf = SamReaderFactory.makeDefault().validationStringency(htsjdk.samtools.ValidationStringency.LENIENT);
        if (args.isEmpty()) {
            if (!StringUtil.isBlank(region_str)) {
                LOG.error("cannot specify region for stdin");
                return -1;
            }
            interval = null;
            samReaders.add(srf.open(SamInputResource.of(stdin())));
            samRecordIterator = samReaders.get(0).iterator();
        } else if (args.size() == 1 && !args.get(0).endsWith(".list")) {
            samReaders.add(srf.open(SamInputResource.of(new File(args.get(0)))));
            if (StringUtil.isBlank(this.region_str)) {
                samRecordIterator = samReaders.get(0).iterator();
                interval = null;
            } else {
                interval = new IntervalParser(samReaders.get(0).getFileHeader().getSequenceDictionary()).setContigNameIsWholeContig(true).parse(region_str);
                if (interval == null) {
                    LOG.error("Cannot parse interval " + this.region_str);
                    return -1;
                }
                LOG.debug("interval " + interval);
                samRecordIterator = samReaders.get(0).query(interval.getContig(), interval.getStart(), interval.getEnd(), false);
            }
        } else {
            final List<File> samFiles;
            if (args.size() == 1 && args.get(0).endsWith(".list")) {
                samFiles = IOUtils.unrollFile(new File(args.get(0)));
            } else {
                samFiles = args.stream().map(S -> new File(S)).collect(Collectors.toList());
            }
            if (samFiles.isEmpty()) {
                LOG.error("No Input SAM file");
                return -1;
            }
            final SAMSequenceDictionary dict0 = SAMSequenceDictionaryExtractor.extractDictionary(samFiles.get(0));
            if (dict0 == null)
                throw new JvarkitException.DictionaryMissing(samFiles.get(0).getPath());
            samFiles.stream().forEach(F -> {
                final SAMSequenceDictionary dicti = SAMSequenceDictionaryExtractor.extractDictionary(F);
                if (dicti == null)
                    throw new JvarkitException.DictionaryMissing(F.getPath());
                if (!SequenceUtil.areSequenceDictionariesEqual(dicti, dict0)) {
                    throw new JvarkitException.DictionariesAreNotTheSame(dict0, dicti);
                }
            });
            for (final File bamFile : samFiles) {
                LOG.info("opening " + bamFile);
                samReaders.add(srf.open(bamFile));
            }
            final SamFileHeaderMerger mergedheader = new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate, samReaders.stream().map(SR -> SR.getFileHeader()).collect(Collectors.toList()), false);
            final Map<SamReader, CloseableIterator<SAMRecord>> reader2iter = new HashMap<>();
            if (StringUtil.isBlank(this.region_str)) {
                for (final SamReader sr : samReaders) {
                    reader2iter.put(sr, sr.iterator());
                }
                interval = null;
            } else {
                interval = new IntervalParser(dict0).setContigNameIsWholeContig(true).parse(region_str);
                if (interval == null) {
                    LOG.error("Cannot parse interval " + this.region_str);
                    return -1;
                }
                LOG.info("interval :" + interval);
                for (final SamReader sr : samReaders) {
                    reader2iter.put(sr, sr.query(interval.getContig(), interval.getStart(), interval.getEnd(), false));
                }
            }
            merginIterators.addAll(reader2iter.values());
            samRecordIterator = new MergingSamRecordIterator(mergedheader, reader2iter, true);
        }
        for (final SamReader sr : samReaders) {
            if (sr.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
                LOG.error("one of your bam input is not sorted on coordinate");
                return -1;
            }
        }
        pw = openFileOrStdoutAsPrintWriter(this.outputFile);
        run(pw, samRecordIterator, samReaders.get(0).getFileHeader().getSequenceDictionary(), interval);
        samRecordIterator.close();
        samRecordIterator = null;
        CloserUtil.close(samReaders);
        samReaders.clear();
        pw.flush();
        return RETURN_OK;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(merginIterators);
        CloserUtil.close(samRecordIterator);
        CloserUtil.close(samReaders);
        CloserUtil.close(pw);
        pw = null;
    }
}
Also used : Cigar(htsjdk.samtools.Cigar) CloseableIterator(htsjdk.samtools.util.CloseableIterator) Arrays(java.util.Arrays) SequenceUtil(htsjdk.samtools.util.SequenceUtil) MergingSamRecordIterator(htsjdk.samtools.MergingSamRecordIterator) Program(com.github.lindenb.jvarkit.util.jcommander.Program) IOUtil(htsjdk.samtools.util.IOUtil) Parameter(com.beust.jcommander.Parameter) CigarElement(htsjdk.samtools.CigarElement) CigarOperator(htsjdk.samtools.CigarOperator) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) HashMap(java.util.HashMap) SAMRecordPartition(com.github.lindenb.jvarkit.util.samtools.SAMRecordPartition) SAMFileHeader(htsjdk.samtools.SAMFileHeader) IntervalParser(com.github.lindenb.jvarkit.util.bio.IntervalParser) ArrayList(java.util.ArrayList) Interval(htsjdk.samtools.util.Interval) StringUtil(htsjdk.samtools.util.StringUtil) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) Pedigree(com.github.lindenb.jvarkit.util.Pedigree) Map(java.util.Map) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) SAMSequenceDictionaryExtractor(htsjdk.variant.utils.SAMSequenceDictionaryExtractor) CloserUtil(htsjdk.samtools.util.CloserUtil) PrintWriter(java.io.PrintWriter) Counter(com.github.lindenb.jvarkit.util.Counter) Predicate(java.util.function.Predicate) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) Logger(com.github.lindenb.jvarkit.util.log.Logger) SamFilterParser(com.github.lindenb.jvarkit.util.bio.samfilter.SamFilterParser) IOException(java.io.IOException) SamReader(htsjdk.samtools.SamReader) Collectors(java.util.stream.Collectors) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) Percentile(com.github.lindenb.jvarkit.math.stats.Percentile) File(java.io.File) SAMRecord(htsjdk.samtools.SAMRecord) SamFileHeaderMerger(htsjdk.samtools.SamFileHeaderMerger) Objects(java.util.Objects) SamRecordFilter(htsjdk.samtools.filter.SamRecordFilter) List(java.util.List) SamInputResource(htsjdk.samtools.SamInputResource) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SamReaderFactory(htsjdk.samtools.SamReaderFactory) SamFileHeaderMerger(htsjdk.samtools.SamFileHeaderMerger) CloseableIterator(htsjdk.samtools.util.CloseableIterator) IntervalParser(com.github.lindenb.jvarkit.util.bio.IntervalParser) SamReaderFactory(htsjdk.samtools.SamReaderFactory) MergingSamRecordIterator(htsjdk.samtools.MergingSamRecordIterator) ArrayList(java.util.ArrayList) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) IOException(java.io.IOException) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) ArrayList(java.util.ArrayList) List(java.util.List) File(java.io.File) HashMap(java.util.HashMap) Map(java.util.Map) Interval(htsjdk.samtools.util.Interval) PrintWriter(java.io.PrintWriter)

Example 2 with IntervalParser

use of com.github.lindenb.jvarkit.util.bio.IntervalParser in project jvarkit by lindenb.

the class CompareBamAndBuild method doWork.

@Override
public int doWork(final List<String> args) {
    PrintWriter out = null;
    SortingCollection<Match> database = null;
    if (this.chainFile == null) {
        LOG.error("Chain file is not defined Option");
        return -1;
    }
    if (args.size() != 2) {
        LOG.error("Illegal number of arguments. Expected two indexed BAMS.");
        return -1;
    }
    try {
        LOG.info("load chain file");
        this.liftOver = new LiftOver(this.chainFile);
        database = SortingCollection.newInstance(Match.class, new MatchCodec(), new MatchOrdererInSortingCollection(), this.maxRecordsInRam, this.tmpDir.toPath());
        database.setDestructiveIteration(true);
        for (int currentSamFileIndex = 0; currentSamFileIndex < 2; currentSamFileIndex++) {
            final File samFile = new File(args.get(currentSamFileIndex));
            LOG.info("read " + samFile);
            this.bamFiles[currentSamFileIndex] = samFile;
            SamReader samFileReader = CompareBamAndBuild.this.openSamReader(samFile.getPath());
            final SAMSequenceDictionary dict = samFileReader.getFileHeader().getSequenceDictionary();
            this.sequenceDictionaries[currentSamFileIndex] = dict;
            if (dict.isEmpty()) {
                samFileReader.close();
                LOG.error("Empty Dict  in " + samFile);
                return -1;
            }
            final Interval interval;
            if (REGION != null) {
                IntervalParser intervalParser = new IntervalParser(dict);
                interval = intervalParser.parse(this.REGION);
                if (interval == null) {
                    samFileReader.close();
                    LOG.error("Cannot parse " + REGION + " (bad syntax or not in dictionary");
                    return -1;
                }
            } else {
                interval = null;
            }
            final SAMRecordIterator iter;
            if (interval == null) {
                iter = samFileReader.iterator();
            } else {
                iter = samFileReader.queryOverlapping(interval.getContig(), interval.getStart(), interval.getEnd());
            }
            final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict);
            while (iter.hasNext()) {
                final SAMRecord rec = progress.watch(iter.next());
                if (rec.isSecondaryOrSupplementary())
                    continue;
                final Match m = new Match();
                m.flag = rec.getFlags();
                m.readName = rec.getReadName();
                m.firstBamFile = currentSamFileIndex == 0;
                if (rec.getReadUnmappedFlag()) {
                    m.tid = -1;
                    m.pos = -1;
                } else {
                    m.tid = rec.getReferenceIndex();
                    m.pos = rec.getAlignmentStart();
                }
                database.add(m);
            }
            iter.close();
            samFileReader.close();
            samFileReader = null;
            LOG.info("Close " + samFile);
        }
        database.doneAdding();
        LOG.info("Writing results....");
        out = super.openFileOrStdoutAsPrintWriter(this.outputFile);
        // compute the differences for each read
        out.print("#READ-Name\tCOMPARE");
        for (final File f : this.bamFiles) {
            out.print("\t" + f);
        }
        out.println();
        /* create an array of set<Match> */
        final MatchComparator match_comparator = new MatchComparator();
        final List<Set<Match>> matches = new ArrayList<Set<CompareBamAndBuild.Match>>(2);
        while (matches.size() < 2) {
            matches.add(new TreeSet<CompareBamAndBuild.Match>(match_comparator));
        }
        CloseableIterator<Match> iter = database.iterator();
        String currReadName = null;
        int curr_num_in_pair = -1;
        for (; ; ) {
            final Match nextMatch;
            if (iter.hasNext()) {
                nextMatch = iter.next();
            } else {
                nextMatch = null;
            }
            if (nextMatch == null || (currReadName != null && !currReadName.equals(nextMatch.readName)) || (curr_num_in_pair != -1 && curr_num_in_pair != nextMatch.indexInPair())) {
                if (currReadName != null) {
                    out.print(currReadName);
                    if (curr_num_in_pair > 0) {
                        out.print("/");
                        out.print(curr_num_in_pair);
                    }
                    out.print("\t");
                    if (same(matches.get(0), matches.get(1))) {
                        out.print("EQ");
                    } else {
                        out.print("NE");
                    }
                    for (int x = 0; x < 2; ++x) {
                        out.print("\t");
                        print(out, matches.get(x));
                    }
                    out.println();
                }
                if (nextMatch == null)
                    break;
                for (final Set<Match> set : matches) set.clear();
            }
            currReadName = nextMatch.readName;
            curr_num_in_pair = nextMatch.indexInPair();
            matches.get(nextMatch.firstBamFile ? 0 : 1).add(nextMatch);
        }
        iter.close();
        out.flush();
        out.close();
        database.cleanup();
        return RETURN_OK;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(out);
        this.liftOver = null;
    }
}
Also used : LiftOver(htsjdk.samtools.liftover.LiftOver) IntervalParser(com.github.lindenb.jvarkit.util.bio.IntervalParser) TreeSet(java.util.TreeSet) Set(java.util.Set) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) ArrayList(java.util.ArrayList) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) IOException(java.io.IOException) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) File(java.io.File) PrintWriter(java.io.PrintWriter) Interval(htsjdk.samtools.util.Interval)

Example 3 with IntervalParser

use of com.github.lindenb.jvarkit.util.bio.IntervalParser in project jvarkit by lindenb.

the class CompareBams method doWork.

@Override
public int doWork(final List<String> args) {
    this.IN.addAll(args.stream().map(S -> new File(S)).collect(Collectors.toList()));
    SortingCollection<Match> database = null;
    SamReader samFileReader = null;
    CloseableIterator<Match> iter = null;
    try {
        if (this.IN.size() < 2) {
            LOG.error("Need more bams please");
            return -1;
        }
        database = SortingCollection.newInstance(Match.class, new MatchCodec(), new MatchOrderer(), this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
        this.samSequenceDictAreTheSame = true;
        database.setDestructiveIteration(true);
        for (int currentSamFileIndex = 0; currentSamFileIndex < this.IN.size(); currentSamFileIndex++) {
            final File samFile = this.IN.get(currentSamFileIndex);
            LOG.info("Opening " + samFile);
            samFileReader = super.createSamReaderFactory().open(samFile);
            final SAMSequenceDictionary dict = samFileReader.getFileHeader().getSequenceDictionary();
            if (dict == null || dict.isEmpty()) {
                LOG.error("Empty Dict  in " + samFile);
                return -1;
            }
            if (!this.sequenceDictionaries.isEmpty() && !SequenceUtil.areSequenceDictionariesEqual(this.sequenceDictionaries.get(0), dict)) {
                this.samSequenceDictAreTheSame = false;
                LOG.warn("FOOL !! THE SEQUENCE DICTIONARIES ARE **NOT** THE SAME. I will try to compare anyway but it will be slower.");
            }
            this.sequenceDictionaries.add(dict);
            final Optional<Interval> interval;
            if (REGION != null && !REGION.trim().isEmpty()) {
                final IntervalParser dix = new IntervalParser(dict);
                interval = Optional.ofNullable(dix.parse(REGION));
                if (!interval.isPresent()) {
                    LOG.error("Cannot parse " + REGION + " (bad syntax or not in dictionary)");
                    return -1;
                }
            } else {
                interval = Optional.empty();
            }
            SAMRecordIterator it = null;
            if (!interval.isPresent()) {
                it = samFileReader.iterator();
            } else {
                it = samFileReader.queryOverlapping(interval.get().getContig(), interval.get().getStart(), interval.get().getEnd());
            }
            final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict);
            while (it.hasNext()) {
                final SAMRecord rec = progress.watch(it.next());
                if (!rec.getReadUnmappedFlag()) {
                    if (rec.getMappingQuality() < this.min_mapq)
                        continue;
                    if (rec.isSecondaryOrSupplementary())
                        continue;
                }
                final Match m = new Match();
                if (rec.getReadPairedFlag()) {
                    m.num_in_pair = (rec.getFirstOfPairFlag() ? 1 : 2);
                } else {
                    m.num_in_pair = 0;
                }
                m.readName = rec.getReadName();
                m.bamIndex = currentSamFileIndex;
                m.flag = rec.getFlags();
                m.cigar = rec.getCigarString();
                if (m.cigar == null)
                    m.cigar = "";
                if (rec.getReadUnmappedFlag()) {
                    m.tid = -1;
                    m.pos = -1;
                } else {
                    m.tid = rec.getReferenceIndex();
                    m.pos = rec.getAlignmentStart();
                }
                database.add(m);
            }
            it.close();
            samFileReader.close();
            samFileReader = null;
            LOG.info("Close " + samFile);
        }
        database.doneAdding();
        LOG.info("Writing results....");
        this.out = super.openFileOrStdoutAsPrintWriter(this.outputFile);
        // compute the differences for each read
        this.out.print("#READ-Name\t");
        for (int x = 0; x < this.IN.size(); ++x) {
            for (int y = x + 1; y < this.IN.size(); ++y) {
                if (!(x == 0 && y == 1))
                    this.out.print("|");
                this.out.print(IN.get(x));
                this.out.print(" ");
                this.out.print(IN.get(y));
            }
        }
        for (int x = 0; x < this.IN.size(); ++x) {
            this.out.print("\t" + IN.get(x));
        }
        this.out.println();
        /* create an array of set<Match> */
        final MatchComparator match_comparator = new MatchComparator();
        final List<Set<Match>> matches = new ArrayList<Set<CompareBams.Match>>(this.IN.size());
        while (matches.size() < this.IN.size()) {
            matches.add(new TreeSet<CompareBams.Match>(match_comparator));
        }
        iter = database.iterator();
        String currReadName = null;
        int curr_num_in_pair = -1;
        for (; ; ) {
            Match nextMatch = null;
            if (iter.hasNext()) {
                nextMatch = iter.next();
            }
            if (nextMatch == null || (currReadName != null && !currReadName.equals(nextMatch.readName)) || (curr_num_in_pair != -1 && curr_num_in_pair != nextMatch.num_in_pair)) {
                if (currReadName != null) {
                    this.out.print(currReadName);
                    if (curr_num_in_pair > 0) {
                        this.out.print("/");
                        this.out.print(curr_num_in_pair);
                    }
                    this.out.print("\t");
                    for (int x = 0; x < this.IN.size(); ++x) {
                        final Set<Match> first = matches.get(x);
                        for (int y = x + 1; y < this.IN.size(); ++y) {
                            if (!(x == 0 && y == 1))
                                this.out.print("|");
                            Set<Match> second = matches.get(y);
                            if (same(first, second)) {
                                this.out.print("EQ");
                            } else {
                                this.out.print("NE");
                            }
                        }
                    }
                    for (int x = 0; x < this.IN.size(); ++x) {
                        this.out.print("\t");
                        print(matches.get(x), sequenceDictionaries.get(x));
                    }
                    this.out.println();
                }
                if (nextMatch == null)
                    break;
                for (Set<Match> set : matches) set.clear();
            }
            currReadName = nextMatch.readName;
            curr_num_in_pair = nextMatch.num_in_pair;
            matches.get(nextMatch.bamIndex).add(nextMatch);
            if (this.out.checkError())
                break;
        }
        iter.close();
        this.out.flush();
        return RETURN_OK;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        if (database != null)
            database.cleanup();
        CloserUtil.close(samFileReader);
        CloserUtil.close(this.out);
        this.out = null;
    }
}
Also used : IntervalParser(com.github.lindenb.jvarkit.util.bio.IntervalParser) TreeSet(java.util.TreeSet) Set(java.util.Set) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) ArrayList(java.util.ArrayList) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) IOException(java.io.IOException) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) File(java.io.File) Interval(htsjdk.samtools.util.Interval)

Example 4 with IntervalParser

use of com.github.lindenb.jvarkit.util.bio.IntervalParser in project jvarkit by lindenb.

the class BamToSql method doWork.

@Override
public int doWork(List<String> args) {
    if (this.faidxFile == null) {
        LOG.error("ref sequence faidx not defined");
        return -1;
    }
    SAMRecordIterator iter = null;
    SamReader sfr = null;
    PrintWriter out = null;
    GenomicSequence genomicSequence = null;
    IndexedFastaSequenceFile indexedFastaSequenceFile = null;
    args = new ArrayList<String>(IOUtils.unrollFiles(args));
    try {
        out = super.openFileOrStdoutAsPrintWriter(this.outputFile);
        indexedFastaSequenceFile = new IndexedFastaSequenceFile(this.faidxFile);
        out.println("CREATE TABLE IF NOT EXISTS SamFile");
        out.println("(");
        out.println("id INTEGER PRIMARY KEY,");
        out.println("filename TEXT");
        out.println(");");
        out.println("CREATE TABLE IF NOT EXISTS Dictionary");
        out.println("(");
        out.println("id INTEGER PRIMARY KEY,");
        out.println("name TEXT NOT NULL,");
        out.println("length INT NOT NULL,");
        out.println("tid INT NOT NULL,");
        out.println("samfile_id INT NOT NULL,");
        out.println("FOREIGN KEY(samfile_id) REFERENCES SamFile(id)");
        out.println(");");
        out.println("CREATE TABLE IF NOT EXISTS ReadGroup");
        out.println("(");
        out.println("id INTEGER PRIMARY KEY,");
        out.println("groupId TEXT NOT NULL,");
        out.println("sample TEXT NOT NULL,");
        out.println("samfile_id INT NOT NULL,");
        out.println("FOREIGN KEY(samfile_id) REFERENCES SamFile(id)");
        out.println(");");
        out.println("CREATE TABLE IF NOT EXISTS Read");
        out.println("(");
        out.println("id INTEGER PRIMARY KEY,");
        out.println("name TEXT NOT NULL,");
        out.println("flag INTEGER NOT NULL,");
        if (this.printflag) {
            for (final SAMFlag flg : SAMFlag.values()) {
                out.println(flg.name() + " INTEGER NOT NULL,");
            }
        }
        out.println("rname TEXT,");
        out.println("pos INTEGER,");
        out.println("mapq INTEGER NOT NULL,");
        out.println("cigar TEXT,");
        out.println("rnext TEXT,");
        out.println("pnext INTEGER,");
        out.println("tlen INTEGER,");
        out.println("sequence TEXT NOT NULL,");
        out.println("qualities TEXT NOT NULL,");
        out.println("samfile_id INT NOT NULL,");
        out.println("group_id INT,");
        out.println("FOREIGN KEY(samfile_id) REFERENCES SamFile(id),");
        out.println("FOREIGN KEY(group_id) REFERENCES ReadGroup(id)");
        out.println(");");
        out.println("CREATE TABLE IF NOT EXISTS Cigar");
        out.println("(");
        out.println("id INTEGER PRIMARY KEY,");
        out.println("read_pos INT ,");
        out.println("read_base TEXT,");
        out.println("read_qual INT ,");
        out.println("ref_pos INT ,");
        out.println("ref_base TEXT,");
        out.println("operator TEXT NOT NULL,");
        out.println("read_id INT NOT NULL,");
        out.println("FOREIGN KEY(read_id) REFERENCES Read(id)");
        out.println(");");
        out.println("begin transaction;");
        int samIndex = 0;
        do {
            final String inputName;
            if (samIndex == 0 && args.isEmpty()) {
                sfr = openSamReader(null);
                inputName = "<stdin>";
            } else {
                inputName = args.get(samIndex);
                sfr = openSamReader(inputName);
            }
            final SAMFileHeader header1 = sfr.getFileHeader();
            if (header1 == null) {
                throw new JvarkitException.FileFormatError("File header missing");
            }
            final SAMSequenceDictionary dict = header1.getSequenceDictionary();
            if (dict == null) {
                throw new JvarkitException.DictionaryMissing("No Dictionary in input");
            }
            final IntervalParser intervalParser = new IntervalParser(dict);
            final Interval userInterval;
            iter = null;
            if (this.regionStr == null || this.regionStr.isEmpty()) {
                LOG.warn("You're currently scanning the whole BAM ???!!!");
                iter = sfr.iterator();
                userInterval = null;
            } else {
                userInterval = intervalParser.parse(this.regionStr);
                if (userInterval == null) {
                    throw new JvarkitException.UserError("cannot parse interval " + this.regionStr);
                }
                iter = sfr.query(userInterval.getContig(), userInterval.getStart(), userInterval.getEnd(), false);
            }
            out.println(String.join(" ", "insert into SamFile(filename) values(", quote(inputName), ");"));
            for (int i = 0; i < dict.size(); ++i) {
                final SAMSequenceRecord ssr = dict.getSequence(i);
                out.println("insert into Dictionary(name,length,tid,samfile_id) select " + quote(inputName) + "," + ssr.getSequenceLength() + "," + i + ",max(id) from SamFile;");
            }
            for (final SAMReadGroupRecord g : header1.getReadGroups()) {
                out.println("insert into ReadGroup(groupId,sample,samfile_id) select " + quote(g.getId()) + "," + quote(g.getSample()) + "," + "max(id) from SamFile;");
            }
            final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header1);
            while (iter.hasNext()) {
                final SAMRecord rec = progress.watch(iter.next());
                final StringBuilder sql = new StringBuilder();
                sql.append("insert into Read(" + "name,flag,");
                if (this.printflag) {
                    for (final SAMFlag flg : SAMFlag.values()) {
                        sql.append(flg.name()).append(",");
                    }
                }
                sql.append("rname,pos,mapq,cigar,rnext,pnext,tlen,sequence,qualities,group_id,samfile_id) select ");
                sql.append(quote(rec.getReadName())).append(",");
                sql.append(rec.getFlags()).append(",");
                if (this.printflag) {
                    for (final SAMFlag flg : SAMFlag.values()) {
                        sql.append(flg.isSet(rec.getFlags()) ? 1 : 0);
                        sql.append(",");
                    }
                }
                if (rec.getReferenceName() == null || rec.getReferenceName().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME)) {
                    sql.append("NULL,NULL");
                } else {
                    sql.append(quote(rec.getReferenceName()));
                    sql.append(",");
                    sql.append(rec.getAlignmentStart());
                }
                sql.append(",");
                sql.append(rec.getMappingQuality());
                sql.append(",");
                // cigar
                if (rec.getCigarString() == null || rec.getCigarString().equals(SAMRecord.NO_ALIGNMENT_CIGAR)) {
                    sql.append("NULL");
                } else {
                    sql.append(quote(rec.getCigarString()));
                }
                sql.append(",");
                // rnext
                if (rec.getMateReferenceName() == null || rec.getMateReferenceName().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME)) {
                    sql.append("NULL,NULL");
                } else {
                    sql.append(quote(rec.getMateReferenceName()));
                    sql.append(",");
                    sql.append(rec.getMateAlignmentStart());
                }
                sql.append(",");
                // tlen
                sql.append(rec.getInferredInsertSize());
                sql.append(",");
                // sequence
                sql.append(quote(rec.getReadString()));
                sql.append(",");
                // qualities
                sql.append(quote(rec.getBaseQualityString()));
                sql.append(",");
                if (rec.getReadGroup() == null) {
                    sql.append("NULL");
                } else {
                    sql.append("G.id");
                }
                sql.append(",F.id FROM SamFile as F");
                if (rec.getReadGroup() != null) {
                    sql.append(" , ReadGroup as G where G.groupId=").append(quote(rec.getReadGroup().getId())).append(" and F.id = G.samfile_id ");
                }
                sql.append("  ORDER BY F.id DESC LIMIT 1;");
                out.println(sql.toString());
                if (this.printcigar && !rec.getReadUnmappedFlag() && rec.getCigar() != null) {
                    if (genomicSequence == null || !genomicSequence.getChrom().equals(rec.getReferenceName())) {
                        genomicSequence = new GenomicSequence(indexedFastaSequenceFile, rec.getReferenceName());
                    }
                    int ref = rec.getUnclippedStart();
                    final byte[] bases = rec.getReadBases();
                    final byte[] quals = rec.getBaseQualities();
                    int read = 0;
                    for (final CigarElement ce : rec.getCigar()) {
                        final CigarOperator op = ce.getOperator();
                        if (op.equals(CigarOperator.P))
                            continue;
                        for (int i = 0; i < ce.getLength(); ++i) {
                            sql.setLength(0);
                            boolean in_user_interval = true;
                            sql.append("insert into Cigar(operator,read_pos,read_base,read_qual,ref_pos,ref_base,read_id) ");
                            sql.append("select '");
                            sql.append(op.name());
                            sql.append("',");
                            if (userInterval != null && !(rec.getReferenceName().equals(userInterval.getContig()) && ref >= userInterval.getStart() && ref <= userInterval.getEnd())) {
                                in_user_interval = false;
                            }
                            switch(op) {
                                case I:
                                    {
                                        sql.append(read);
                                        sql.append(",");
                                        sql.append("'" + (char) bases[read] + "',");
                                        sql.append("" + quals[read] + "");
                                        sql.append(",");
                                        sql.append("NULL,NULL");
                                        read++;
                                        break;
                                    }
                                case D:
                                case N:
                                case // yes H (hard clip)
                                H:
                                    {
                                        sql.append("NULL,NULL,NULL,");
                                        sql.append(ref);
                                        sql.append(",'");
                                        sql.append((ref < 1 || ref - 1 >= genomicSequence.length()) ? '*' : genomicSequence.charAt(ref - 1));
                                        sql.append("'");
                                        ref++;
                                        break;
                                    }
                                case M:
                                case X:
                                case EQ:
                                case // yes S, soft clip
                                S:
                                    {
                                        sql.append(read);
                                        sql.append(",");
                                        sql.append("'" + (char) bases[read] + "',");
                                        sql.append("" + quals[read] + "");
                                        sql.append(",");
                                        sql.append(ref);
                                        sql.append(",'");
                                        sql.append((ref < 1 || ref - 1 >= genomicSequence.length()) ? '*' : genomicSequence.charAt(ref - 1));
                                        sql.append("'");
                                        ref++;
                                        read++;
                                        break;
                                    }
                                default:
                                    throw new IllegalStateException();
                            }
                            sql.append(", id from Read ORDER BY id DESC LIMIT 1;");
                            if (in_user_interval)
                                out.println(sql.toString());
                        }
                    }
                }
            }
            iter.close();
            iter = null;
            sfr.close();
            sfr = null;
            progress.finish();
            samIndex++;
        } while (samIndex < args.size());
        out.println("COMMIT;");
        out.flush();
        out.close();
        LOG.info("done");
        return 0;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(iter);
        CloserUtil.close(sfr);
        CloserUtil.close(out);
        CloserUtil.close(indexedFastaSequenceFile);
    }
}
Also used : SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) SamReader(htsjdk.samtools.SamReader) PrintWriter(java.io.PrintWriter) IntervalParser(com.github.lindenb.jvarkit.util.bio.IntervalParser) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMFlag(htsjdk.samtools.SAMFlag) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) CigarOperator(htsjdk.samtools.CigarOperator) CigarElement(htsjdk.samtools.CigarElement) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) Interval(htsjdk.samtools.util.Interval)

Example 5 with IntervalParser

use of com.github.lindenb.jvarkit.util.bio.IntervalParser in project jvarkit by lindenb.

the class SAM4WebLogo method doWork.

@Override
public int doWork(final List<String> args) {
    if (StringUtil.isBlank(this.regionStr)) {
        LOG.error("Undefined interval.");
        return -1;
    }
    final IntervalParser intervalParser = new IntervalParser().setFixContigName(true);
    PrintWriter out = null;
    SamReader samReader = null;
    SAMRecordIterator iter = null;
    try {
        out = super.openFileOrStdoutAsPrintWriter(outputFile);
        for (final String inputName : IOUtils.unrollFiles(args)) {
            samReader = openSamReader(inputName);
            final Interval interval = intervalParser.setDictionary(samReader.getFileHeader().getSequenceDictionary()).parse(this.regionStr);
            if (interval == null) {
                LOG.error("Bad interval " + this.regionStr);
                return -1;
            }
            iter = samReader.query(interval.getContig(), interval.getStart(), interval.getEnd(), false);
            while (iter.hasNext()) {
                final SAMRecord rec = iter.next();
                if (rec.getReadUnmappedFlag())
                    continue;
                if (this.SamRecordFilter.filterOut(rec))
                    continue;
                final Cigar cigar = rec.getCigar();
                if (cigar == null || cigar.isEmpty())
                    continue;
                if (!rec.getReferenceName().equals(interval.getContig()))
                    continue;
                if (this.readEnd.apply(rec) < interval.getStart())
                    continue;
                if (this.readStart.apply(rec) > interval.getEnd())
                    continue;
                printRead(out, rec, interval);
            }
            iter.close();
            iter = null;
            samReader.close();
            samReader = null;
        }
        out.flush();
        out.close();
        out = null;
        return 0;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(iter);
        CloserUtil.close(samReader);
        CloserUtil.close(out);
    }
}
Also used : SamReader(htsjdk.samtools.SamReader) IntervalParser(com.github.lindenb.jvarkit.util.bio.IntervalParser) Cigar(htsjdk.samtools.Cigar) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMRecord(htsjdk.samtools.SAMRecord) PrintWriter(java.io.PrintWriter) Interval(htsjdk.samtools.util.Interval)

Aggregations

IntervalParser (com.github.lindenb.jvarkit.util.bio.IntervalParser)10 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)9 Interval (htsjdk.samtools.util.Interval)8 SamReader (htsjdk.samtools.SamReader)7 ArrayList (java.util.ArrayList)7 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)6 IOException (java.io.IOException)6 File (java.io.File)5 PrintWriter (java.io.PrintWriter)5 JvarkitException (com.github.lindenb.jvarkit.lang.JvarkitException)4 SAMFileHeader (htsjdk.samtools.SAMFileHeader)4 SAMRecord (htsjdk.samtools.SAMRecord)4 IOUtils (com.github.lindenb.jvarkit.io.IOUtils)3 Program (com.github.lindenb.jvarkit.util.jcommander.Program)3 Logger (com.github.lindenb.jvarkit.util.log.Logger)3 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)3 SamReaderFactory (htsjdk.samtools.SamReaderFactory)3 IndexedFastaSequenceFile (htsjdk.samtools.reference.IndexedFastaSequenceFile)3 CloseableIterator (htsjdk.samtools.util.CloseableIterator)3 VariantContext (htsjdk.variant.variantcontext.VariantContext)3