use of htsjdk.samtools.reference.IndexedFastaSequenceFile in project ASCIIGenome by dariober.
the class GenomicCoords method setGenome.
/* Methods */
/**
* Set genome dictionary and fasta file ref if available. See
* GenomicCoords.getSamSeqDictFromAnyFile() for available inputs.
* @param includeGenomeFile: Should the input data be treated as a genome file?
* Set to true only if the input can be a genome file. Other files (bed, vcf, gff)
* look like valid genome file and this can result in wring dictionary.
*/
public void setGenome(List<String> input, boolean includeGenomeFile) throws IOException {
List<String> cleanList = new ArrayList<String>();
for (String x : input) {
if (x != null && !x.trim().isEmpty()) {
cleanList.add(Utils.tildeToHomeDir(x));
}
}
if (cleanList.size() == 0) {
return;
}
// Set Dictionary
this.setSamSeqDictFromAnySource(cleanList, includeGenomeFile);
// Try to set fasta sequence
for (String x : cleanList) {
boolean done = true;
try {
if (new File(x + ".fai").exists()) {
this.setFastaFile(x);
} else {
throw new FileNotFoundException();
}
// IndexedFastaSequenceFile fa= new IndexedFastaSequenceFile(new File(x));
// this.setFastaFile(x);
// fa.close();
} catch (FileNotFoundException e) {
try {
new Faidx(new File(x));
(new File(x + ".fai")).deleteOnExit();
this.setFastaFile(x);
} catch (Exception e1) {
done = false;
}
}
if (done) {
break;
}
}
}
use of htsjdk.samtools.reference.IndexedFastaSequenceFile in project ASCIIGenome by dariober.
the class Main method initRegion.
/**
* Return a suitable region to start. If a region is already given, do nothing.
* This method is a mess and should be cleaned up together with GenomicCoords class.
* @throws InvalidGenomicCoordsException
*/
public static String initRegion(List<String> inputFileList, String fasta, String genome, int debug) throws IOException, InvalidGenomicCoordsException {
// Preferably we start from a position that has a feature rather than from the start of a
// random chrom.
System.err.print("Initializing coordinates... ");
// First search for files that can init chrom and position
List<String> skipped = new ArrayList<String>();
for (String x : inputFileList) {
TrackFormat fmt = Utils.getFileTypeFromName(x);
if (fmt.equals(TrackFormat.TDF)) {
skipped.add(x);
continue;
}
try {
String region = Utils.initRegionFromFile(x);
System.err.println("Done from: " + x);
return region;
} catch (Exception e) {
System.err.println("\nCould not initilize from file " + x);
if (debug > 0) {
e.printStackTrace();
}
}
}
// Try to initialize from fasta
if (fasta != null && !fasta.trim().isEmpty()) {
IndexedFastaSequenceFile faSeqFile = new IndexedFastaSequenceFile(new File(fasta));
String region = faSeqFile.nextSequence().getName();
faSeqFile.close();
return region;
}
// Try genome file
if (genome != null && !genome.trim().isEmpty()) {
GenomicCoords gc = new GenomicCoords(Utils.getTerminalWidth());
gc.setGenome(Arrays.asList(new String[] { genome }), false);
SAMSequenceDictionary samSeqDict = gc.getSamSeqDict();
String region = samSeqDict.getSequence(0).getSequenceName();
return region;
}
// Failing that, look for any file that gives at least chrom
for (String x : skipped) {
try {
String region = Utils.initRegionFromFile(x);
System.err.println("Done from: " + x);
return region;
} catch (Exception e) {
System.err.println("\nCould not initilize from file " + x);
if (debug > 0) {
e.printStackTrace();
}
}
}
// It appears everything failed to initialise...
return "";
}
use of htsjdk.samtools.reference.IndexedFastaSequenceFile in project ASCIIGenome by dariober.
the class Utils method checkFasta.
public static void checkFasta(String fasta, int debug) throws IOException, UnindexableFastaFileException {
if (fasta == null) {
return;
}
File fafile = new File(fasta);
if (!fafile.isFile()) {
System.err.println("Fasta file '" + fasta + "' not found.");
if (debug == 0 || debug == 1) {
System.exit(1);
} else if (debug == 2) {
throw new IOException();
}
}
if (!fafile.canRead()) {
System.err.println("Fasta file '" + fasta + "' is not readable.");
if (debug == 0 || debug == 1) {
System.exit(1);
} else if (debug == 2) {
throw new IOException();
}
}
IndexedFastaSequenceFile faSeqFile = null;
try {
faSeqFile = new IndexedFastaSequenceFile(fafile);
faSeqFile.close();
} catch (FileNotFoundException e) {
System.err.println("\nIndexing '" + fasta + "'.");
new Faidx(new File(fasta));
(new File(fasta + ".fai")).deleteOnExit();
}
}
use of htsjdk.samtools.reference.IndexedFastaSequenceFile in project ASCIIGenome by dariober.
the class Utils method prepareRefSeq.
/**
* Get sequence as byte[] for the given genomic coords.
* @param fasta
* @param gc
* @return
* @throws IOException
*/
public static byte[] prepareRefSeq(String fasta, GenomicCoords gc) throws IOException {
byte[] faSeq = null;
if (fasta != null) {
IndexedFastaSequenceFile faSeqFile = null;
try {
faSeqFile = new IndexedFastaSequenceFile(new File(fasta));
try {
faSeq = faSeqFile.getSubsequenceAt(gc.getChrom(), gc.getFrom(), gc.getTo()).getBases();
} catch (NullPointerException e) {
System.err.println("Cannot fetch sequence " + gc.toString());
e.printStackTrace();
}
faSeqFile.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
return faSeq;
}
use of htsjdk.samtools.reference.IndexedFastaSequenceFile in project ASCIIGenome by dariober.
the class Track method setVariantReadInInterval.
/**
*Set filter to extract reads containing variant at the given interval.
* from, to: 1-based coordinates (first base of chr1 is `chr1:1-1`).
* @throws SQLException
* @throws InvalidRecordException
* @throws InvalidGenomicCoordsException
* @throws IOException
* @throws ClassNotFoundException
* @throws MalformedURLException
*/
public void setVariantReadInInterval(String chrom, int from, int to, boolean variantOnly) throws MalformedURLException, ClassNotFoundException, IOException, InvalidGenomicCoordsException, InvalidRecordException, SQLException {
this.getFeatureFilter().setVariantOnly(variantOnly);
if (chrom.equals(Filter.DEFAULT_VARIANT_CHROM.getValue())) {
this.getFeatureFilter().setVariantReadInInterval(chrom, from, to, null);
this.update();
return;
}
if (from > to) {
System.err.println("Invalid coordinates for filter from > to: " + from + ", " + to);
throw new InvalidGenomicCoordsException();
}
IndexedFastaSequenceFile faSeqFile = new IndexedFastaSequenceFile(new File(this.getGc().getFastaFile()));
byte[] faSeq = faSeqFile.getSubsequenceAt(chrom, from, to).getBases();
faSeqFile.close();
this.getFeatureFilter().setVariantReadInInterval(chrom, from, to, faSeq);
this.update();
}
Aggregations