use of exceptions.InvalidGenomicCoordsException in project ASCIIGenome by dariober.
the class Utils method initRegionFromFile.
/**
* Get the first chrom string from first line of input file. As you add support for more filetypes you should update
* this function. This method is very dirty and shouldn't be trusted 100%
* @throws InvalidGenomicCoordsException
* @throws SQLException
* @throws InvalidRecordException
* @throws InvalidCommandLineException
* @throws ClassNotFoundException
*/
@SuppressWarnings("unused")
public static String initRegionFromFile(String x) throws IOException, InvalidGenomicCoordsException, ClassNotFoundException, InvalidCommandLineException, InvalidRecordException, SQLException {
UrlValidator urlValidator = new UrlValidator();
String region = "";
TrackFormat fmt = Utils.getFileTypeFromName(x);
if (fmt.equals(TrackFormat.BAM)) {
SamReader samReader;
if (urlValidator.isValid(x)) {
samReader = SamReaderFactory.makeDefault().open(SamInputResource.of(new URL(x)));
} else {
SamReaderFactory srf = SamReaderFactory.make();
srf.validationStringency(ValidationStringency.SILENT);
samReader = srf.open(new File(x));
}
// Default: Start from the first contig in dictionary
region = samReader.getFileHeader().getSequence(0).getSequenceName();
SAMRecordIterator iter = samReader.iterator();
if (iter.hasNext()) {
// If there are records in this BAM, init from first record
SAMRecord rec = iter.next();
region = rec.getContig() + ":" + rec.getAlignmentStart();
samReader.close();
}
return region;
} else if (fmt.equals(TrackFormat.BIGWIG) && !urlValidator.isValid(x)) {
// Loading from URL is painfully slow so do not initialize from URL
return initRegionFromBigWig(x);
} else if (fmt.equals(TrackFormat.BIGBED) && !urlValidator.isValid(x)) {
// Loading from URL is painfully slow so do not initialize from URL
return initRegionFromBigBed(x);
} else if (urlValidator.isValid(x) && (fmt.equals(TrackFormat.BIGWIG) || fmt.equals(TrackFormat.BIGBED))) {
System.err.println("Refusing to initialize from URL");
throw new InvalidGenomicCoordsException();
} else if (fmt.equals(TrackFormat.TDF)) {
Iterator<String> iter = TDFReader.getReader(x).getChromosomeNames().iterator();
while (iter.hasNext()) {
region = iter.next();
if (!region.equals("All")) {
return region;
}
}
System.err.println("Cannot initialize from " + x);
throw new RuntimeException();
} else {
// Input file appears to be a generic interval file. We expect chrom to be in column 1
// VCF files are also included here since they are either gzip or plain ASCII.
BufferedReader br;
GZIPInputStream gzipStream;
if (x.toLowerCase().endsWith(".gz") || x.toLowerCase().endsWith(".bgz")) {
if (urlValidator.isValid(x)) {
gzipStream = new GZIPInputStream(new URL(x).openStream());
} else {
InputStream fileStream = new FileInputStream(x);
gzipStream = new GZIPInputStream(fileStream);
}
Reader decoder = new InputStreamReader(gzipStream, "UTF-8");
br = new BufferedReader(decoder);
} else {
if (urlValidator.isValid(x)) {
InputStream instream = new URL(x).openStream();
Reader decoder = new InputStreamReader(instream, "UTF-8");
br = new BufferedReader(decoder);
} else {
br = new BufferedReader(new FileReader(x));
}
}
String line;
while ((line = br.readLine()) != null) {
line = line.trim();
if (line.startsWith("#") || line.isEmpty() || line.startsWith("track ")) {
continue;
}
if (fmt.equals(TrackFormat.VCF)) {
region = line.split("\t")[0] + ":" + line.split("\t")[1];
} else {
IntervalFeature feature = new IntervalFeature(line, fmt, null);
region = feature.getChrom() + ":" + feature.getFrom();
}
br.close();
return region;
}
if (line == null) {
// This means the input has no records
region = "Undefined_contig";
if (fmt.equals(TrackFormat.VCF)) {
SAMSequenceDictionary seqdict = getVCFHeader(x).getSequenceDictionary();
if (seqdict != null) {
Iterator<SAMSequenceRecord> iter = seqdict.getSequences().iterator();
if (iter.hasNext()) {
region = iter.next().getSequenceName();
}
}
}
return region;
}
}
System.err.println("Cannot initialize from " + x);
throw new RuntimeException();
}
use of exceptions.InvalidGenomicCoordsException in project ASCIIGenome by dariober.
the class TrackIntervalFeatureTest method canApplyAwk_getFeaturesInInterval.
@Test
public void canApplyAwk_getFeaturesInInterval() throws IOException, InvalidGenomicCoordsException, ClassNotFoundException, InvalidRecordException, SQLException {
String intervalFileName = "test_data/hg19_genes_head.gtf.gz";
GenomicCoords gc = new GenomicCoords("chr1:10000-100000", 80, null, null);
TrackIntervalFeature tif = new TrackIntervalFeature(intervalFileName, gc);
String awk = "'$3 == \"start_codon\" && $9 !~ \"OR4F\"'";
// Note use single quotes
tif.setAwk(awk);
// Check -F arg has been prepended.
assertEquals("-F '\\t' " + awk, tif.getAwk());
List<IntervalFeature> subset = tif.getFeaturesInInterval("chr1", 1, 500000000);
assertEquals(40, subset.size());
// Filter for feature size > x
tif.setAwk("-F \\t '($5 - $4) > 1000'");
subset = tif.getFeaturesInInterval("chr1", 1, 500000000);
assertEquals(23, subset.size());
// Remove filter w/o args.
tif.setAwk(" ");
subset = tif.getFeaturesInInterval("chr1", 1, 500000000);
assertEquals(1000, subset.size());
// Invalid script: Ugly stackTrace printed. All records returned
boolean pass = false;
try {
tif.setAwk("$foo");
} catch (InvalidGenomicCoordsException e) {
pass = true;
}
assertTrue(pass);
// Faulty script has been removed.
assertEquals("", tif.getAwk());
subset = tif.getFeaturesInInterval("chr1", 1, 500000000);
assertEquals(1000, subset.size());
// awk output is neither empty nor equal to input
// Exception expected.
pass = false;
try {
tif.setAwk("'{print 999}'");
} catch (InvalidGenomicCoordsException e) {
pass = true;
}
assertTrue(pass);
}
use of exceptions.InvalidGenomicCoordsException in project ASCIIGenome by dariober.
the class Track method setVariantReadInInterval.
/**
*Set filter to extract reads containing variant at the given interval.
* from, to: 1-based coordinates (first base of chr1 is `chr1:1-1`).
* @throws SQLException
* @throws InvalidRecordException
* @throws InvalidGenomicCoordsException
* @throws IOException
* @throws ClassNotFoundException
* @throws MalformedURLException
*/
public void setVariantReadInInterval(String chrom, int from, int to, boolean variantOnly) throws MalformedURLException, ClassNotFoundException, IOException, InvalidGenomicCoordsException, InvalidRecordException, SQLException {
this.getFeatureFilter().setVariantOnly(variantOnly);
if (chrom.equals(Filter.DEFAULT_VARIANT_CHROM.getValue())) {
this.getFeatureFilter().setVariantReadInInterval(chrom, from, to, null);
this.update();
return;
}
if (from > to) {
System.err.println("Invalid coordinates for filter from > to: " + from + ", " + to);
throw new InvalidGenomicCoordsException();
}
IndexedFastaSequenceFile faSeqFile = new IndexedFastaSequenceFile(new File(this.getGc().getFastaFile()));
byte[] faSeq = faSeqFile.getSubsequenceAt(chrom, from, to).getBases();
faSeqFile.close();
this.getFeatureFilter().setVariantReadInInterval(chrom, from, to, faSeq);
this.update();
}
Aggregations