Search in sources :

Example 11 with InvalidGenomicCoordsException

use of exceptions.InvalidGenomicCoordsException in project ASCIIGenome by dariober.

the class Utils method initRegionFromFile.

/**
 * Get the first chrom string from first line of input file. As you add support for more filetypes you should update
 * this function. This method is very dirty and shouldn't be trusted 100%
 * @throws InvalidGenomicCoordsException
 * @throws SQLException
 * @throws InvalidRecordException
 * @throws InvalidCommandLineException
 * @throws ClassNotFoundException
 */
@SuppressWarnings("unused")
public static String initRegionFromFile(String x) throws IOException, InvalidGenomicCoordsException, ClassNotFoundException, InvalidCommandLineException, InvalidRecordException, SQLException {
    UrlValidator urlValidator = new UrlValidator();
    String region = "";
    TrackFormat fmt = Utils.getFileTypeFromName(x);
    if (fmt.equals(TrackFormat.BAM)) {
        SamReader samReader;
        if (urlValidator.isValid(x)) {
            samReader = SamReaderFactory.makeDefault().open(SamInputResource.of(new URL(x)));
        } else {
            SamReaderFactory srf = SamReaderFactory.make();
            srf.validationStringency(ValidationStringency.SILENT);
            samReader = srf.open(new File(x));
        }
        // Default: Start from the first contig in dictionary
        region = samReader.getFileHeader().getSequence(0).getSequenceName();
        SAMRecordIterator iter = samReader.iterator();
        if (iter.hasNext()) {
            // If there are records in this BAM, init from first record
            SAMRecord rec = iter.next();
            region = rec.getContig() + ":" + rec.getAlignmentStart();
            samReader.close();
        }
        return region;
    } else if (fmt.equals(TrackFormat.BIGWIG) && !urlValidator.isValid(x)) {
        // Loading from URL is painfully slow so do not initialize from URL
        return initRegionFromBigWig(x);
    } else if (fmt.equals(TrackFormat.BIGBED) && !urlValidator.isValid(x)) {
        // Loading from URL is painfully slow so do not initialize from URL
        return initRegionFromBigBed(x);
    } else if (urlValidator.isValid(x) && (fmt.equals(TrackFormat.BIGWIG) || fmt.equals(TrackFormat.BIGBED))) {
        System.err.println("Refusing to initialize from URL");
        throw new InvalidGenomicCoordsException();
    } else if (fmt.equals(TrackFormat.TDF)) {
        Iterator<String> iter = TDFReader.getReader(x).getChromosomeNames().iterator();
        while (iter.hasNext()) {
            region = iter.next();
            if (!region.equals("All")) {
                return region;
            }
        }
        System.err.println("Cannot initialize from " + x);
        throw new RuntimeException();
    } else {
        // Input file appears to be a generic interval file. We expect chrom to be in column 1
        // VCF files are also included here since they are either gzip or plain ASCII.
        BufferedReader br;
        GZIPInputStream gzipStream;
        if (x.toLowerCase().endsWith(".gz") || x.toLowerCase().endsWith(".bgz")) {
            if (urlValidator.isValid(x)) {
                gzipStream = new GZIPInputStream(new URL(x).openStream());
            } else {
                InputStream fileStream = new FileInputStream(x);
                gzipStream = new GZIPInputStream(fileStream);
            }
            Reader decoder = new InputStreamReader(gzipStream, "UTF-8");
            br = new BufferedReader(decoder);
        } else {
            if (urlValidator.isValid(x)) {
                InputStream instream = new URL(x).openStream();
                Reader decoder = new InputStreamReader(instream, "UTF-8");
                br = new BufferedReader(decoder);
            } else {
                br = new BufferedReader(new FileReader(x));
            }
        }
        String line;
        while ((line = br.readLine()) != null) {
            line = line.trim();
            if (line.startsWith("#") || line.isEmpty() || line.startsWith("track ")) {
                continue;
            }
            if (fmt.equals(TrackFormat.VCF)) {
                region = line.split("\t")[0] + ":" + line.split("\t")[1];
            } else {
                IntervalFeature feature = new IntervalFeature(line, fmt, null);
                region = feature.getChrom() + ":" + feature.getFrom();
            }
            br.close();
            return region;
        }
        if (line == null) {
            // This means the input has no records
            region = "Undefined_contig";
            if (fmt.equals(TrackFormat.VCF)) {
                SAMSequenceDictionary seqdict = getVCFHeader(x).getSequenceDictionary();
                if (seqdict != null) {
                    Iterator<SAMSequenceRecord> iter = seqdict.getSequences().iterator();
                    if (iter.hasNext()) {
                        region = iter.next().getSequenceName();
                    }
                }
            }
            return region;
        }
    }
    System.err.println("Cannot initialize from " + x);
    throw new RuntimeException();
}
Also used : TrackFormat(tracks.TrackFormat) SamReaderFactory(htsjdk.samtools.SamReaderFactory) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) InputStreamReader(java.io.InputStreamReader) GZIPInputStream(java.util.zip.GZIPInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) Reader(java.io.Reader) AbstractFeatureReader(htsjdk.tribble.AbstractFeatureReader) TabixReader(htsjdk.tribble.readers.TabixReader) InputStreamReader(java.io.InputStreamReader) BufferedReader(java.io.BufferedReader) TDFReader(org.broad.igv.tdf.TDFReader) BBFileReader(org.broad.igv.bbfile.BBFileReader) SamReader(htsjdk.samtools.SamReader) FileReader(java.io.FileReader) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) URL(java.net.URL) FileInputStream(java.io.FileInputStream) GZIPInputStream(java.util.zip.GZIPInputStream) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) UrlValidator(org.apache.commons.validator.routines.UrlValidator) InvalidGenomicCoordsException(exceptions.InvalidGenomicCoordsException) BufferedReader(java.io.BufferedReader) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) BBFileReader(org.broad.igv.bbfile.BBFileReader) FileReader(java.io.FileReader) File(java.io.File) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) IntervalFeature(tracks.IntervalFeature)

Example 12 with InvalidGenomicCoordsException

use of exceptions.InvalidGenomicCoordsException in project ASCIIGenome by dariober.

the class TrackIntervalFeatureTest method canApplyAwk_getFeaturesInInterval.

@Test
public void canApplyAwk_getFeaturesInInterval() throws IOException, InvalidGenomicCoordsException, ClassNotFoundException, InvalidRecordException, SQLException {
    String intervalFileName = "test_data/hg19_genes_head.gtf.gz";
    GenomicCoords gc = new GenomicCoords("chr1:10000-100000", 80, null, null);
    TrackIntervalFeature tif = new TrackIntervalFeature(intervalFileName, gc);
    String awk = "'$3 == \"start_codon\" && $9 !~ \"OR4F\"'";
    // Note use single quotes
    tif.setAwk(awk);
    // Check -F arg has been prepended.
    assertEquals("-F '\\t' " + awk, tif.getAwk());
    List<IntervalFeature> subset = tif.getFeaturesInInterval("chr1", 1, 500000000);
    assertEquals(40, subset.size());
    // Filter for feature size > x
    tif.setAwk("-F \\t '($5 - $4) > 1000'");
    subset = tif.getFeaturesInInterval("chr1", 1, 500000000);
    assertEquals(23, subset.size());
    // Remove filter w/o args.
    tif.setAwk("  ");
    subset = tif.getFeaturesInInterval("chr1", 1, 500000000);
    assertEquals(1000, subset.size());
    // Invalid script: Ugly stackTrace printed. All records returned
    boolean pass = false;
    try {
        tif.setAwk("$foo");
    } catch (InvalidGenomicCoordsException e) {
        pass = true;
    }
    assertTrue(pass);
    // Faulty script has been removed.
    assertEquals("", tif.getAwk());
    subset = tif.getFeaturesInInterval("chr1", 1, 500000000);
    assertEquals(1000, subset.size());
    // awk output is neither empty nor equal to input
    // Exception expected.
    pass = false;
    try {
        tif.setAwk("'{print 999}'");
    } catch (InvalidGenomicCoordsException e) {
        pass = true;
    }
    assertTrue(pass);
}
Also used : GenomicCoords(samTextViewer.GenomicCoords) InvalidGenomicCoordsException(exceptions.InvalidGenomicCoordsException) Test(org.junit.Test)

Example 13 with InvalidGenomicCoordsException

use of exceptions.InvalidGenomicCoordsException in project ASCIIGenome by dariober.

the class Track method setVariantReadInInterval.

/**
 *Set filter to extract reads containing variant at the given interval.
 * from, to: 1-based coordinates (first base of chr1 is `chr1:1-1`).
 * @throws SQLException
 * @throws InvalidRecordException
 * @throws InvalidGenomicCoordsException
 * @throws IOException
 * @throws ClassNotFoundException
 * @throws MalformedURLException
 */
public void setVariantReadInInterval(String chrom, int from, int to, boolean variantOnly) throws MalformedURLException, ClassNotFoundException, IOException, InvalidGenomicCoordsException, InvalidRecordException, SQLException {
    this.getFeatureFilter().setVariantOnly(variantOnly);
    if (chrom.equals(Filter.DEFAULT_VARIANT_CHROM.getValue())) {
        this.getFeatureFilter().setVariantReadInInterval(chrom, from, to, null);
        this.update();
        return;
    }
    if (from > to) {
        System.err.println("Invalid coordinates for filter from > to: " + from + ", " + to);
        throw new InvalidGenomicCoordsException();
    }
    IndexedFastaSequenceFile faSeqFile = new IndexedFastaSequenceFile(new File(this.getGc().getFastaFile()));
    byte[] faSeq = faSeqFile.getSubsequenceAt(chrom, from, to).getBases();
    faSeqFile.close();
    this.getFeatureFilter().setVariantReadInInterval(chrom, from, to, faSeq);
    this.update();
}
Also used : InvalidGenomicCoordsException(exceptions.InvalidGenomicCoordsException) File(java.io.File) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile)

Aggregations

InvalidGenomicCoordsException (exceptions.InvalidGenomicCoordsException)13 ArrayList (java.util.ArrayList)7 InvalidColourException (exceptions.InvalidColourException)5 InvalidCommandLineException (exceptions.InvalidCommandLineException)5 InvalidRecordException (exceptions.InvalidRecordException)5 IOException (java.io.IOException)5 SQLException (java.sql.SQLException)5 InvalidConfigException (exceptions.InvalidConfigException)4 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)4 IndexedFastaSequenceFile (htsjdk.samtools.reference.IndexedFastaSequenceFile)3 File (java.io.File)3 PatternSyntaxException (java.util.regex.PatternSyntaxException)3 ArgumentParserException (net.sourceforge.argparse4j.inf.ArgumentParserException)3 List (java.util.List)2 TrackFormat (tracks.TrackFormat)2 DocumentException (com.itextpdf.text.DocumentException)1 CommandList (commandHelp.CommandList)1 BamIndexNotFoundException (exceptions.BamIndexNotFoundException)1 UnindexableFastaFileException (faidx.UnindexableFastaFileException)1 SAMRecord (htsjdk.samtools.SAMRecord)1