Search in sources :

Example 31 with IndexedFastaSequenceFile

use of htsjdk.samtools.reference.IndexedFastaSequenceFile in project ASCIIGenome by dariober.

the class GenomicCoords method setGenome.

/* Methods */
/**
 * Set genome dictionary and fasta file ref if available. See
 * GenomicCoords.getSamSeqDictFromAnyFile() for available inputs.
 * @param includeGenomeFile: Should the input data be treated as a genome file?
 * Set to true only if the input can be a genome file. Other files (bed, vcf, gff)
 * look like valid genome file and this can result in wring dictionary.
 */
public void setGenome(List<String> input, boolean includeGenomeFile) throws IOException {
    List<String> cleanList = new ArrayList<String>();
    for (String x : input) {
        if (x != null && !x.trim().isEmpty()) {
            cleanList.add(Utils.tildeToHomeDir(x));
        }
    }
    if (cleanList.size() == 0) {
        return;
    }
    // Set Dictionary
    this.setSamSeqDictFromAnySource(cleanList, includeGenomeFile);
    // Try to set fasta sequence
    for (String x : cleanList) {
        boolean done = true;
        try {
            if (new File(x + ".fai").exists()) {
                this.setFastaFile(x);
            } else {
                throw new FileNotFoundException();
            }
        // IndexedFastaSequenceFile fa= new IndexedFastaSequenceFile(new File(x));
        // this.setFastaFile(x);
        // fa.close();
        } catch (FileNotFoundException e) {
            try {
                new Faidx(new File(x));
                (new File(x + ".fai")).deleteOnExit();
                this.setFastaFile(x);
            } catch (Exception e1) {
                done = false;
            }
        }
        if (done) {
            break;
        }
    }
}
Also used : ArrayList(java.util.ArrayList) FileNotFoundException(java.io.FileNotFoundException) Faidx(faidx.Faidx) File(java.io.File) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) InvalidCommandLineException(exceptions.InvalidCommandLineException) InvalidColourException(exceptions.InvalidColourException) MalformedURLException(java.net.MalformedURLException) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) InvalidGenomicCoordsException(exceptions.InvalidGenomicCoordsException)

Example 32 with IndexedFastaSequenceFile

use of htsjdk.samtools.reference.IndexedFastaSequenceFile in project ASCIIGenome by dariober.

the class Main method initRegion.

/**
 * Return a suitable region to start. If a region is already given, do nothing.
 * This method is a mess and should be cleaned up together with GenomicCoords class.
 * @throws InvalidGenomicCoordsException
 */
public static String initRegion(List<String> inputFileList, String fasta, String genome, int debug) throws IOException, InvalidGenomicCoordsException {
    // Preferably we start from a position that has a feature rather than from the start of a
    // random chrom.
    System.err.print("Initializing coordinates... ");
    // First search for files that can init chrom and position
    List<String> skipped = new ArrayList<String>();
    for (String x : inputFileList) {
        TrackFormat fmt = Utils.getFileTypeFromName(x);
        if (fmt.equals(TrackFormat.TDF)) {
            skipped.add(x);
            continue;
        }
        try {
            String region = Utils.initRegionFromFile(x);
            System.err.println("Done from: " + x);
            return region;
        } catch (Exception e) {
            System.err.println("\nCould not initilize from file " + x);
            if (debug > 0) {
                e.printStackTrace();
            }
        }
    }
    // Try to initialize from fasta
    if (fasta != null && !fasta.trim().isEmpty()) {
        IndexedFastaSequenceFile faSeqFile = new IndexedFastaSequenceFile(new File(fasta));
        String region = faSeqFile.nextSequence().getName();
        faSeqFile.close();
        return region;
    }
    // Try genome file
    if (genome != null && !genome.trim().isEmpty()) {
        GenomicCoords gc = new GenomicCoords(Utils.getTerminalWidth());
        gc.setGenome(Arrays.asList(new String[] { genome }), false);
        SAMSequenceDictionary samSeqDict = gc.getSamSeqDict();
        String region = samSeqDict.getSequence(0).getSequenceName();
        return region;
    }
    // Failing that, look for any file that gives at least chrom
    for (String x : skipped) {
        try {
            String region = Utils.initRegionFromFile(x);
            System.err.println("Done from: " + x);
            return region;
        } catch (Exception e) {
            System.err.println("\nCould not initilize from file " + x);
            if (debug > 0) {
                e.printStackTrace();
            }
        }
    }
    // It appears everything failed to initialise...
    return "";
}
Also used : TrackFormat(tracks.TrackFormat) ArrayList(java.util.ArrayList) File(java.io.File) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) InvalidCommandLineException(exceptions.InvalidCommandLineException) InvalidColourException(exceptions.InvalidColourException) InvalidRecordException(exceptions.InvalidRecordException) SQLException(java.sql.SQLException) InvalidConfigException(exceptions.InvalidConfigException) UnindexableFastaFileException(faidx.UnindexableFastaFileException) MalformedURLException(java.net.MalformedURLException) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) DocumentException(com.itextpdf.text.DocumentException) BamIndexNotFoundException(exceptions.BamIndexNotFoundException) InvalidGenomicCoordsException(exceptions.InvalidGenomicCoordsException) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile)

Example 33 with IndexedFastaSequenceFile

use of htsjdk.samtools.reference.IndexedFastaSequenceFile in project ASCIIGenome by dariober.

the class Utils method checkFasta.

public static void checkFasta(String fasta, int debug) throws IOException, UnindexableFastaFileException {
    if (fasta == null) {
        return;
    }
    File fafile = new File(fasta);
    if (!fafile.isFile()) {
        System.err.println("Fasta file '" + fasta + "' not found.");
        if (debug == 0 || debug == 1) {
            System.exit(1);
        } else if (debug == 2) {
            throw new IOException();
        }
    }
    if (!fafile.canRead()) {
        System.err.println("Fasta file '" + fasta + "' is not readable.");
        if (debug == 0 || debug == 1) {
            System.exit(1);
        } else if (debug == 2) {
            throw new IOException();
        }
    }
    IndexedFastaSequenceFile faSeqFile = null;
    try {
        faSeqFile = new IndexedFastaSequenceFile(fafile);
        faSeqFile.close();
    } catch (FileNotFoundException e) {
        System.err.println("\nIndexing '" + fasta + "'.");
        new Faidx(new File(fasta));
        (new File(fasta + ".fai")).deleteOnExit();
    }
}
Also used : FileNotFoundException(java.io.FileNotFoundException) Faidx(faidx.Faidx) IOException(java.io.IOException) File(java.io.File) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile)

Example 34 with IndexedFastaSequenceFile

use of htsjdk.samtools.reference.IndexedFastaSequenceFile in project ASCIIGenome by dariober.

the class Utils method prepareRefSeq.

/**
 * Get sequence as byte[] for the given genomic coords.
 * @param fasta
 * @param gc
 * @return
 * @throws IOException
 */
public static byte[] prepareRefSeq(String fasta, GenomicCoords gc) throws IOException {
    byte[] faSeq = null;
    if (fasta != null) {
        IndexedFastaSequenceFile faSeqFile = null;
        try {
            faSeqFile = new IndexedFastaSequenceFile(new File(fasta));
            try {
                faSeq = faSeqFile.getSubsequenceAt(gc.getChrom(), gc.getFrom(), gc.getTo()).getBases();
            } catch (NullPointerException e) {
                System.err.println("Cannot fetch sequence " + gc.toString());
                e.printStackTrace();
            }
            faSeqFile.close();
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        }
    }
    return faSeq;
}
Also used : FileNotFoundException(java.io.FileNotFoundException) File(java.io.File) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile)

Example 35 with IndexedFastaSequenceFile

use of htsjdk.samtools.reference.IndexedFastaSequenceFile in project ASCIIGenome by dariober.

the class Track method setVariantReadInInterval.

/**
 *Set filter to extract reads containing variant at the given interval.
 * from, to: 1-based coordinates (first base of chr1 is `chr1:1-1`).
 * @throws SQLException
 * @throws InvalidRecordException
 * @throws InvalidGenomicCoordsException
 * @throws IOException
 * @throws ClassNotFoundException
 * @throws MalformedURLException
 */
public void setVariantReadInInterval(String chrom, int from, int to, boolean variantOnly) throws MalformedURLException, ClassNotFoundException, IOException, InvalidGenomicCoordsException, InvalidRecordException, SQLException {
    this.getFeatureFilter().setVariantOnly(variantOnly);
    if (chrom.equals(Filter.DEFAULT_VARIANT_CHROM.getValue())) {
        this.getFeatureFilter().setVariantReadInInterval(chrom, from, to, null);
        this.update();
        return;
    }
    if (from > to) {
        System.err.println("Invalid coordinates for filter from > to: " + from + ", " + to);
        throw new InvalidGenomicCoordsException();
    }
    IndexedFastaSequenceFile faSeqFile = new IndexedFastaSequenceFile(new File(this.getGc().getFastaFile()));
    byte[] faSeq = faSeqFile.getSubsequenceAt(chrom, from, to).getBases();
    faSeqFile.close();
    this.getFeatureFilter().setVariantReadInInterval(chrom, from, to, faSeq);
    this.update();
}
Also used : InvalidGenomicCoordsException(exceptions.InvalidGenomicCoordsException) File(java.io.File) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile)

Aggregations

IndexedFastaSequenceFile (htsjdk.samtools.reference.IndexedFastaSequenceFile)57 File (java.io.File)34 SamReader (htsjdk.samtools.SamReader)22 SAMRecord (htsjdk.samtools.SAMRecord)20 GenomicSequence (com.github.lindenb.jvarkit.util.picard.GenomicSequence)16 SAMFileHeader (htsjdk.samtools.SAMFileHeader)16 ArrayList (java.util.ArrayList)16 IOException (java.io.IOException)15 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)14 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)13 SamReaderFactory (htsjdk.samtools.SamReaderFactory)12 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)11 CigarElement (htsjdk.samtools.CigarElement)11 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)11 List (java.util.List)11 FileNotFoundException (java.io.FileNotFoundException)10 BufferedReader (java.io.BufferedReader)9 Collectors (java.util.stream.Collectors)9 Cigar (htsjdk.samtools.Cigar)8 CigarOperator (htsjdk.samtools.CigarOperator)7