Search in sources :

Example 16 with IndexedFastaSequenceFile

use of htsjdk.samtools.reference.IndexedFastaSequenceFile in project jvarkit by lindenb.

the class BedLiftOver method doWork.

@Override
public int doWork(List<String> args) {
    if (liftOverFile == null) {
        LOG.error("LiftOver file is undefined.");
        return -1;
    }
    this.liftOver = new LiftOver(liftOverFile);
    this.liftOver.setLiftOverMinMatch(this.userMinMatch);
    PrintWriter out = null;
    PrintWriter failed = null;
    try {
        if (!this.ignoreLiftOverValidation) {
            IndexedFastaSequenceFile ref = new IndexedFastaSequenceFile(faidx);
            this.liftOver.validateToSequences(ref.getSequenceDictionary());
            ref.close();
        }
        out = super.openFileOrStdoutAsPrintWriter(this.outputFile);
        if (this.failedFile != null) {
            failed = super.openFileOrStdoutAsPrintWriter(failedFile);
        }
        if (args.isEmpty()) {
            BufferedReader r = openBufferedReader(null);
            scan(r, out, failed);
            CloserUtil.close(r);
        } else {
            for (final String filename : args) {
                BufferedReader r = openBufferedReader(filename);
                scan(r, out, failed);
                CloserUtil.close(r);
            }
        }
        out.flush();
        out.close();
        out = null;
        if (failed != null) {
            failed.flush();
            failed.close();
            failed = null;
        }
        return 0;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(out);
        CloserUtil.close(failed);
    }
}
Also used : LiftOver(htsjdk.samtools.liftover.LiftOver) BufferedReader(java.io.BufferedReader) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) IOException(java.io.IOException) PrintWriter(java.io.PrintWriter)

Example 17 with IndexedFastaSequenceFile

use of htsjdk.samtools.reference.IndexedFastaSequenceFile in project jvarkit by lindenb.

the class VcfLiftOver method doWork.

@Override
public int doWork(List<String> args) {
    if (this.liftOverFile == null) {
        LOG.error("LiftOver file is undefined.");
        return -1;
    }
    if (this.faidx == null) {
        LOG.error("reference file is undefined.");
        return -1;
    }
    try {
        this.indexedFastaSequenceFile = new IndexedFastaSequenceFile(this.faidx);
        this.liftOver = new LiftOver(this.liftOverFile);
        this.liftOver.setLiftOverMinMatch(this.userMinMatch);
        if (!this.ignoreLiftOverValidation) {
            this.liftOver.validateToSequences(this.indexedFastaSequenceFile.getSequenceDictionary());
        }
        return doVcfToVcf(args, outputFile);
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(this.indexedFastaSequenceFile);
    }
}
Also used : LiftOver(htsjdk.samtools.liftover.LiftOver) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile)

Example 18 with IndexedFastaSequenceFile

use of htsjdk.samtools.reference.IndexedFastaSequenceFile in project jvarkit by lindenb.

the class AddLinearIndexToBed method doWork.

@Override
public int doWork(final List<String> args) {
    if (refFile == null) {
        LOG.error("Reference file undefined");
        return -1;
    }
    PrintStream out = null;
    try {
        htsjdk.samtools.reference.IndexedFastaSequenceFile ref = new IndexedFastaSequenceFile(this.refFile);
        this.dictionary = ref.getSequenceDictionary();
        ref.close();
        if (this.dictionary == null) {
            throw new JvarkitException.FastaDictionaryMissing(this.refFile);
        }
        this.tid2offset = new long[this.dictionary.size()];
        Arrays.fill(this.tid2offset, 0L);
        for (int i = 1; i < this.dictionary.size(); ++i) {
            this.tid2offset[i] = this.tid2offset[i - 1] + this.dictionary.getSequence(i - 1).getSequenceLength();
        }
        out = openFileOrStdoutAsPrintStream(this.outputFile);
        if (args.isEmpty()) {
            LOG.info("reading stdin");
            doWork(stdin(), out);
        } else {
            for (final String arg : args) {
                LOG.info("opening " + arg);
                InputStream in = IOUtils.openURIForReading(arg);
                doWork(in, out);
                CloserUtil.close(out);
            }
        }
        out.flush();
        return RETURN_OK;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(out);
        dictionary = null;
        tid2offset = null;
    }
}
Also used : PrintStream(java.io.PrintStream) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) InputStream(java.io.InputStream) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) IOException(java.io.IOException) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException)

Example 19 with IndexedFastaSequenceFile

use of htsjdk.samtools.reference.IndexedFastaSequenceFile in project jvarkit by lindenb.

the class BamToSql method doWork.

@Override
public int doWork(List<String> args) {
    if (this.faidxFile == null) {
        LOG.error("ref sequence faidx not defined");
        return -1;
    }
    SAMRecordIterator iter = null;
    SamReader sfr = null;
    PrintWriter out = null;
    GenomicSequence genomicSequence = null;
    IndexedFastaSequenceFile indexedFastaSequenceFile = null;
    args = new ArrayList<String>(IOUtils.unrollFiles(args));
    try {
        out = super.openFileOrStdoutAsPrintWriter(this.outputFile);
        indexedFastaSequenceFile = new IndexedFastaSequenceFile(this.faidxFile);
        out.println("CREATE TABLE IF NOT EXISTS SamFile");
        out.println("(");
        out.println("id INTEGER PRIMARY KEY,");
        out.println("filename TEXT");
        out.println(");");
        out.println("CREATE TABLE IF NOT EXISTS Dictionary");
        out.println("(");
        out.println("id INTEGER PRIMARY KEY,");
        out.println("name TEXT NOT NULL,");
        out.println("length INT NOT NULL,");
        out.println("tid INT NOT NULL,");
        out.println("samfile_id INT NOT NULL,");
        out.println("FOREIGN KEY(samfile_id) REFERENCES SamFile(id)");
        out.println(");");
        out.println("CREATE TABLE IF NOT EXISTS ReadGroup");
        out.println("(");
        out.println("id INTEGER PRIMARY KEY,");
        out.println("groupId TEXT NOT NULL,");
        out.println("sample TEXT NOT NULL,");
        out.println("samfile_id INT NOT NULL,");
        out.println("FOREIGN KEY(samfile_id) REFERENCES SamFile(id)");
        out.println(");");
        out.println("CREATE TABLE IF NOT EXISTS Read");
        out.println("(");
        out.println("id INTEGER PRIMARY KEY,");
        out.println("name TEXT NOT NULL,");
        out.println("flag INTEGER NOT NULL,");
        if (this.printflag) {
            for (final SAMFlag flg : SAMFlag.values()) {
                out.println(flg.name() + " INTEGER NOT NULL,");
            }
        }
        out.println("rname TEXT,");
        out.println("pos INTEGER,");
        out.println("mapq INTEGER NOT NULL,");
        out.println("cigar TEXT,");
        out.println("rnext TEXT,");
        out.println("pnext INTEGER,");
        out.println("tlen INTEGER,");
        out.println("sequence TEXT NOT NULL,");
        out.println("qualities TEXT NOT NULL,");
        out.println("samfile_id INT NOT NULL,");
        out.println("group_id INT,");
        out.println("FOREIGN KEY(samfile_id) REFERENCES SamFile(id),");
        out.println("FOREIGN KEY(group_id) REFERENCES ReadGroup(id)");
        out.println(");");
        out.println("CREATE TABLE IF NOT EXISTS Cigar");
        out.println("(");
        out.println("id INTEGER PRIMARY KEY,");
        out.println("read_pos INT ,");
        out.println("read_base TEXT,");
        out.println("read_qual INT ,");
        out.println("ref_pos INT ,");
        out.println("ref_base TEXT,");
        out.println("operator TEXT NOT NULL,");
        out.println("read_id INT NOT NULL,");
        out.println("FOREIGN KEY(read_id) REFERENCES Read(id)");
        out.println(");");
        out.println("begin transaction;");
        int samIndex = 0;
        do {
            final String inputName;
            if (samIndex == 0 && args.isEmpty()) {
                sfr = openSamReader(null);
                inputName = "<stdin>";
            } else {
                inputName = args.get(samIndex);
                sfr = openSamReader(inputName);
            }
            final SAMFileHeader header1 = sfr.getFileHeader();
            if (header1 == null) {
                throw new JvarkitException.FileFormatError("File header missing");
            }
            final SAMSequenceDictionary dict = header1.getSequenceDictionary();
            if (dict == null) {
                throw new JvarkitException.DictionaryMissing("No Dictionary in input");
            }
            final IntervalParser intervalParser = new IntervalParser(dict);
            final Interval userInterval;
            iter = null;
            if (this.regionStr == null || this.regionStr.isEmpty()) {
                LOG.warn("You're currently scanning the whole BAM ???!!!");
                iter = sfr.iterator();
                userInterval = null;
            } else {
                userInterval = intervalParser.parse(this.regionStr);
                if (userInterval == null) {
                    throw new JvarkitException.UserError("cannot parse interval " + this.regionStr);
                }
                iter = sfr.query(userInterval.getContig(), userInterval.getStart(), userInterval.getEnd(), false);
            }
            out.println(String.join(" ", "insert into SamFile(filename) values(", quote(inputName), ");"));
            for (int i = 0; i < dict.size(); ++i) {
                final SAMSequenceRecord ssr = dict.getSequence(i);
                out.println("insert into Dictionary(name,length,tid,samfile_id) select " + quote(inputName) + "," + ssr.getSequenceLength() + "," + i + ",max(id) from SamFile;");
            }
            for (final SAMReadGroupRecord g : header1.getReadGroups()) {
                out.println("insert into ReadGroup(groupId,sample,samfile_id) select " + quote(g.getId()) + "," + quote(g.getSample()) + "," + "max(id) from SamFile;");
            }
            final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header1);
            while (iter.hasNext()) {
                final SAMRecord rec = progress.watch(iter.next());
                final StringBuilder sql = new StringBuilder();
                sql.append("insert into Read(" + "name,flag,");
                if (this.printflag) {
                    for (final SAMFlag flg : SAMFlag.values()) {
                        sql.append(flg.name()).append(",");
                    }
                }
                sql.append("rname,pos,mapq,cigar,rnext,pnext,tlen,sequence,qualities,group_id,samfile_id) select ");
                sql.append(quote(rec.getReadName())).append(",");
                sql.append(rec.getFlags()).append(",");
                if (this.printflag) {
                    for (final SAMFlag flg : SAMFlag.values()) {
                        sql.append(flg.isSet(rec.getFlags()) ? 1 : 0);
                        sql.append(",");
                    }
                }
                if (rec.getReferenceName() == null || rec.getReferenceName().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME)) {
                    sql.append("NULL,NULL");
                } else {
                    sql.append(quote(rec.getReferenceName()));
                    sql.append(",");
                    sql.append(rec.getAlignmentStart());
                }
                sql.append(",");
                sql.append(rec.getMappingQuality());
                sql.append(",");
                // cigar
                if (rec.getCigarString() == null || rec.getCigarString().equals(SAMRecord.NO_ALIGNMENT_CIGAR)) {
                    sql.append("NULL");
                } else {
                    sql.append(quote(rec.getCigarString()));
                }
                sql.append(",");
                // rnext
                if (rec.getMateReferenceName() == null || rec.getMateReferenceName().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME)) {
                    sql.append("NULL,NULL");
                } else {
                    sql.append(quote(rec.getMateReferenceName()));
                    sql.append(",");
                    sql.append(rec.getMateAlignmentStart());
                }
                sql.append(",");
                // tlen
                sql.append(rec.getInferredInsertSize());
                sql.append(",");
                // sequence
                sql.append(quote(rec.getReadString()));
                sql.append(",");
                // qualities
                sql.append(quote(rec.getBaseQualityString()));
                sql.append(",");
                if (rec.getReadGroup() == null) {
                    sql.append("NULL");
                } else {
                    sql.append("G.id");
                }
                sql.append(",F.id FROM SamFile as F");
                if (rec.getReadGroup() != null) {
                    sql.append(" , ReadGroup as G where G.groupId=").append(quote(rec.getReadGroup().getId())).append(" and F.id = G.samfile_id ");
                }
                sql.append("  ORDER BY F.id DESC LIMIT 1;");
                out.println(sql.toString());
                if (this.printcigar && !rec.getReadUnmappedFlag() && rec.getCigar() != null) {
                    if (genomicSequence == null || !genomicSequence.getChrom().equals(rec.getReferenceName())) {
                        genomicSequence = new GenomicSequence(indexedFastaSequenceFile, rec.getReferenceName());
                    }
                    int ref = rec.getUnclippedStart();
                    final byte[] bases = rec.getReadBases();
                    final byte[] quals = rec.getBaseQualities();
                    int read = 0;
                    for (final CigarElement ce : rec.getCigar()) {
                        final CigarOperator op = ce.getOperator();
                        if (op.equals(CigarOperator.P))
                            continue;
                        for (int i = 0; i < ce.getLength(); ++i) {
                            sql.setLength(0);
                            boolean in_user_interval = true;
                            sql.append("insert into Cigar(operator,read_pos,read_base,read_qual,ref_pos,ref_base,read_id) ");
                            sql.append("select '");
                            sql.append(op.name());
                            sql.append("',");
                            if (userInterval != null && !(rec.getReferenceName().equals(userInterval.getContig()) && ref >= userInterval.getStart() && ref <= userInterval.getEnd())) {
                                in_user_interval = false;
                            }
                            switch(op) {
                                case I:
                                    {
                                        sql.append(read);
                                        sql.append(",");
                                        sql.append("'" + (char) bases[read] + "',");
                                        sql.append("" + quals[read] + "");
                                        sql.append(",");
                                        sql.append("NULL,NULL");
                                        read++;
                                        break;
                                    }
                                case D:
                                case N:
                                case // yes H (hard clip)
                                H:
                                    {
                                        sql.append("NULL,NULL,NULL,");
                                        sql.append(ref);
                                        sql.append(",'");
                                        sql.append((ref < 1 || ref - 1 >= genomicSequence.length()) ? '*' : genomicSequence.charAt(ref - 1));
                                        sql.append("'");
                                        ref++;
                                        break;
                                    }
                                case M:
                                case X:
                                case EQ:
                                case // yes S, soft clip
                                S:
                                    {
                                        sql.append(read);
                                        sql.append(",");
                                        sql.append("'" + (char) bases[read] + "',");
                                        sql.append("" + quals[read] + "");
                                        sql.append(",");
                                        sql.append(ref);
                                        sql.append(",'");
                                        sql.append((ref < 1 || ref - 1 >= genomicSequence.length()) ? '*' : genomicSequence.charAt(ref - 1));
                                        sql.append("'");
                                        ref++;
                                        read++;
                                        break;
                                    }
                                default:
                                    throw new IllegalStateException();
                            }
                            sql.append(", id from Read ORDER BY id DESC LIMIT 1;");
                            if (in_user_interval)
                                out.println(sql.toString());
                        }
                    }
                }
            }
            iter.close();
            iter = null;
            sfr.close();
            sfr = null;
            progress.finish();
            samIndex++;
        } while (samIndex < args.size());
        out.println("COMMIT;");
        out.flush();
        out.close();
        LOG.info("done");
        return 0;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(iter);
        CloserUtil.close(sfr);
        CloserUtil.close(out);
        CloserUtil.close(indexedFastaSequenceFile);
    }
}
Also used : SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) SamReader(htsjdk.samtools.SamReader) PrintWriter(java.io.PrintWriter) IntervalParser(com.github.lindenb.jvarkit.util.bio.IntervalParser) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMFlag(htsjdk.samtools.SAMFlag) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) CigarOperator(htsjdk.samtools.CigarOperator) CigarElement(htsjdk.samtools.CigarElement) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) Interval(htsjdk.samtools.util.Interval)

Example 20 with IndexedFastaSequenceFile

use of htsjdk.samtools.reference.IndexedFastaSequenceFile in project jvarkit by lindenb.

the class VcfJaspar method doWork.

@Override
public int doWork(List<String> args) {
    if (this.jasparUri == null) {
        LOG.error("Undefined jaspar-uri");
        return -1;
    }
    if (this.fasta == null) {
        LOG.error("Undefined fasta sequence");
        return -1;
    }
    try {
        LOG.info("Reading JASPAR: " + jasparUri);
        LineIterator liter = IOUtils.openURIForLineIterator(this.jasparUri);
        Iterator<Matrix> miter = Matrix.iterator(liter);
        while (miter.hasNext()) {
            Matrix matrix = miter.next();
            this.jasparDb.add(matrix.convertToPWM());
        }
        CloserUtil.close(liter);
        LOG.info("JASPAR size: " + this.jasparDb.size());
        if (jasparDb.isEmpty()) {
            LOG.warn("JASPAR IS EMPTY");
        }
        LOG.info("opening " + fasta);
        this.indexedFastaSequenceFile = new IndexedFastaSequenceFile(fasta);
        return doVcfToVcf(oneFileOrNull(args), outputFile);
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(this.indexedFastaSequenceFile);
        this.indexedFastaSequenceFile = null;
    }
}
Also used : LineIterator(htsjdk.tribble.readers.LineIterator) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile)

Aggregations

IndexedFastaSequenceFile (htsjdk.samtools.reference.IndexedFastaSequenceFile)57 File (java.io.File)34 SamReader (htsjdk.samtools.SamReader)22 SAMRecord (htsjdk.samtools.SAMRecord)20 GenomicSequence (com.github.lindenb.jvarkit.util.picard.GenomicSequence)16 SAMFileHeader (htsjdk.samtools.SAMFileHeader)16 ArrayList (java.util.ArrayList)16 IOException (java.io.IOException)15 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)14 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)13 SamReaderFactory (htsjdk.samtools.SamReaderFactory)12 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)11 CigarElement (htsjdk.samtools.CigarElement)11 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)11 List (java.util.List)11 FileNotFoundException (java.io.FileNotFoundException)10 BufferedReader (java.io.BufferedReader)9 Collectors (java.util.stream.Collectors)9 Cigar (htsjdk.samtools.Cigar)8 CigarOperator (htsjdk.samtools.CigarOperator)7