Search in sources :

Example 1 with TrackFormat

use of tracks.TrackFormat in project ASCIIGenome by dariober.

the class Main method initRegion.

/**
 * Return a suitable region to start. If a region is already given, do nothing.
 * This method is a mess and should be cleaned up together with GenomicCoords class.
 * @throws InvalidGenomicCoordsException
 */
public static String initRegion(List<String> inputFileList, String fasta, String genome, int debug) throws IOException, InvalidGenomicCoordsException {
    // Preferably we start from a position that has a feature rather than from the start of a
    // random chrom.
    System.err.print("Initializing coordinates... ");
    // First search for files that can init chrom and position
    List<String> skipped = new ArrayList<String>();
    for (String x : inputFileList) {
        TrackFormat fmt = Utils.getFileTypeFromName(x);
        if (fmt.equals(TrackFormat.TDF)) {
            skipped.add(x);
            continue;
        }
        try {
            String region = Utils.initRegionFromFile(x);
            System.err.println("Done from: " + x);
            return region;
        } catch (Exception e) {
            System.err.println("\nCould not initilize from file " + x);
            if (debug > 0) {
                e.printStackTrace();
            }
        }
    }
    // Try to initialize from fasta
    if (fasta != null && !fasta.trim().isEmpty()) {
        IndexedFastaSequenceFile faSeqFile = new IndexedFastaSequenceFile(new File(fasta));
        String region = faSeqFile.nextSequence().getName();
        faSeqFile.close();
        return region;
    }
    // Try genome file
    if (genome != null && !genome.trim().isEmpty()) {
        GenomicCoords gc = new GenomicCoords(Utils.getTerminalWidth());
        gc.setGenome(Arrays.asList(new String[] { genome }), false);
        SAMSequenceDictionary samSeqDict = gc.getSamSeqDict();
        String region = samSeqDict.getSequence(0).getSequenceName();
        return region;
    }
    // Failing that, look for any file that gives at least chrom
    for (String x : skipped) {
        try {
            String region = Utils.initRegionFromFile(x);
            System.err.println("Done from: " + x);
            return region;
        } catch (Exception e) {
            System.err.println("\nCould not initilize from file " + x);
            if (debug > 0) {
                e.printStackTrace();
            }
        }
    }
    // It appears everything failed to initialise...
    return "";
}
Also used : TrackFormat(tracks.TrackFormat) ArrayList(java.util.ArrayList) File(java.io.File) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) InvalidCommandLineException(exceptions.InvalidCommandLineException) InvalidColourException(exceptions.InvalidColourException) InvalidRecordException(exceptions.InvalidRecordException) SQLException(java.sql.SQLException) InvalidConfigException(exceptions.InvalidConfigException) UnindexableFastaFileException(faidx.UnindexableFastaFileException) MalformedURLException(java.net.MalformedURLException) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) DocumentException(com.itextpdf.text.DocumentException) BamIndexNotFoundException(exceptions.BamIndexNotFoundException) InvalidGenomicCoordsException(exceptions.InvalidGenomicCoordsException) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile)

Example 2 with TrackFormat

use of tracks.TrackFormat in project ASCIIGenome by dariober.

the class Utils method prepareStdinFile.

/**
 * Read file from stdin and write to a tmp file with name extension
 * consistent with the input format.
 * This method should be private. Set to protected only for testing.
 */
protected static File prepareStdinFile() throws IOException {
    Scanner sc = new Scanner(System.in);
    File tmp = createTempFile("stdin.", "");
    tmp.deleteOnExit();
    BufferedWriter bw = new BufferedWriter(new FileWriter(tmp));
    while (sc.hasNextLine()) {
        bw.write(sc.nextLine() + "\n");
    }
    bw.close();
    TrackFormat fmt = sniffFile(tmp);
    String fmtName;
    if (fmt.equals(TrackFormat.BAM)) {
        fmtName = tmp.getAbsoluteFile() + ".sam";
    } else if (fmt.equals(TrackFormat.VCF)) {
        fmtName = tmp.getAbsoluteFile() + ".vcf";
    } else if (fmt.equals(TrackFormat.BEDGRAPH)) {
        fmtName = tmp.getAbsoluteFile() + ".bedGraph";
    } else if (fmt.equals(TrackFormat.BED)) {
        fmtName = tmp.getAbsoluteFile() + ".bed";
    } else if (fmt.equals(TrackFormat.GFF)) {
        fmtName = tmp.getAbsoluteFile() + ".gff3";
    } else if (fmt.equals(TrackFormat.GTF)) {
        fmtName = tmp.getAbsoluteFile() + ".gtf";
    } else {
        throw new IOException("Cannot determine track format of stdin.");
    }
    File fmtFile = new File(fmtName);
    fmtFile.deleteOnExit();
    tmp.renameTo(fmtFile);
    tmp.delete();
    return fmtFile;
}
Also used : Scanner(java.util.Scanner) TrackFormat(tracks.TrackFormat) SAMFileWriter(htsjdk.samtools.SAMFileWriter) FileWriter(java.io.FileWriter) IOException(java.io.IOException) File(java.io.File) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) BufferedWriter(java.io.BufferedWriter)

Example 3 with TrackFormat

use of tracks.TrackFormat in project ASCIIGenome by dariober.

the class Utils method initRegionFromFile.

/**
 * Get the first chrom string from first line of input file. As you add support for more filetypes you should update
 * this function. This method is very dirty and shouldn't be trusted 100%
 * @throws InvalidGenomicCoordsException
 * @throws SQLException
 * @throws InvalidRecordException
 * @throws InvalidCommandLineException
 * @throws ClassNotFoundException
 */
@SuppressWarnings("unused")
public static String initRegionFromFile(String x) throws IOException, InvalidGenomicCoordsException, ClassNotFoundException, InvalidCommandLineException, InvalidRecordException, SQLException {
    UrlValidator urlValidator = new UrlValidator();
    String region = "";
    TrackFormat fmt = Utils.getFileTypeFromName(x);
    if (fmt.equals(TrackFormat.BAM)) {
        SamReader samReader;
        if (urlValidator.isValid(x)) {
            samReader = SamReaderFactory.makeDefault().open(SamInputResource.of(new URL(x)));
        } else {
            SamReaderFactory srf = SamReaderFactory.make();
            srf.validationStringency(ValidationStringency.SILENT);
            samReader = srf.open(new File(x));
        }
        // Default: Start from the first contig in dictionary
        region = samReader.getFileHeader().getSequence(0).getSequenceName();
        SAMRecordIterator iter = samReader.iterator();
        if (iter.hasNext()) {
            // If there are records in this BAM, init from first record
            SAMRecord rec = iter.next();
            region = rec.getContig() + ":" + rec.getAlignmentStart();
            samReader.close();
        }
        return region;
    } else if (fmt.equals(TrackFormat.BIGWIG) && !urlValidator.isValid(x)) {
        // Loading from URL is painfully slow so do not initialize from URL
        return initRegionFromBigWig(x);
    } else if (fmt.equals(TrackFormat.BIGBED) && !urlValidator.isValid(x)) {
        // Loading from URL is painfully slow so do not initialize from URL
        return initRegionFromBigBed(x);
    } else if (urlValidator.isValid(x) && (fmt.equals(TrackFormat.BIGWIG) || fmt.equals(TrackFormat.BIGBED))) {
        System.err.println("Refusing to initialize from URL");
        throw new InvalidGenomicCoordsException();
    } else if (fmt.equals(TrackFormat.TDF)) {
        Iterator<String> iter = TDFReader.getReader(x).getChromosomeNames().iterator();
        while (iter.hasNext()) {
            region = iter.next();
            if (!region.equals("All")) {
                return region;
            }
        }
        System.err.println("Cannot initialize from " + x);
        throw new RuntimeException();
    } else {
        // Input file appears to be a generic interval file. We expect chrom to be in column 1
        // VCF files are also included here since they are either gzip or plain ASCII.
        BufferedReader br;
        GZIPInputStream gzipStream;
        if (x.toLowerCase().endsWith(".gz") || x.toLowerCase().endsWith(".bgz")) {
            if (urlValidator.isValid(x)) {
                gzipStream = new GZIPInputStream(new URL(x).openStream());
            } else {
                InputStream fileStream = new FileInputStream(x);
                gzipStream = new GZIPInputStream(fileStream);
            }
            Reader decoder = new InputStreamReader(gzipStream, "UTF-8");
            br = new BufferedReader(decoder);
        } else {
            if (urlValidator.isValid(x)) {
                InputStream instream = new URL(x).openStream();
                Reader decoder = new InputStreamReader(instream, "UTF-8");
                br = new BufferedReader(decoder);
            } else {
                br = new BufferedReader(new FileReader(x));
            }
        }
        String line;
        while ((line = br.readLine()) != null) {
            line = line.trim();
            if (line.startsWith("#") || line.isEmpty() || line.startsWith("track ")) {
                continue;
            }
            if (fmt.equals(TrackFormat.VCF)) {
                region = line.split("\t")[0] + ":" + line.split("\t")[1];
            } else {
                IntervalFeature feature = new IntervalFeature(line, fmt, null);
                region = feature.getChrom() + ":" + feature.getFrom();
            }
            br.close();
            return region;
        }
        if (line == null) {
            // This means the input has no records
            region = "Undefined_contig";
            if (fmt.equals(TrackFormat.VCF)) {
                SAMSequenceDictionary seqdict = getVCFHeader(x).getSequenceDictionary();
                if (seqdict != null) {
                    Iterator<SAMSequenceRecord> iter = seqdict.getSequences().iterator();
                    if (iter.hasNext()) {
                        region = iter.next().getSequenceName();
                    }
                }
            }
            return region;
        }
    }
    System.err.println("Cannot initialize from " + x);
    throw new RuntimeException();
}
Also used : TrackFormat(tracks.TrackFormat) SamReaderFactory(htsjdk.samtools.SamReaderFactory) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) InputStreamReader(java.io.InputStreamReader) GZIPInputStream(java.util.zip.GZIPInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) Reader(java.io.Reader) AbstractFeatureReader(htsjdk.tribble.AbstractFeatureReader) TabixReader(htsjdk.tribble.readers.TabixReader) InputStreamReader(java.io.InputStreamReader) BufferedReader(java.io.BufferedReader) TDFReader(org.broad.igv.tdf.TDFReader) BBFileReader(org.broad.igv.bbfile.BBFileReader) SamReader(htsjdk.samtools.SamReader) FileReader(java.io.FileReader) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) URL(java.net.URL) FileInputStream(java.io.FileInputStream) GZIPInputStream(java.util.zip.GZIPInputStream) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) UrlValidator(org.apache.commons.validator.routines.UrlValidator) InvalidGenomicCoordsException(exceptions.InvalidGenomicCoordsException) BufferedReader(java.io.BufferedReader) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) BBFileReader(org.broad.igv.bbfile.BBFileReader) FileReader(java.io.FileReader) File(java.io.File) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) IntervalFeature(tracks.IntervalFeature)

Aggregations

IndexedFastaSequenceFile (htsjdk.samtools.reference.IndexedFastaSequenceFile)3 File (java.io.File)3 TrackFormat (tracks.TrackFormat)3 InvalidGenomicCoordsException (exceptions.InvalidGenomicCoordsException)2 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)2 IOException (java.io.IOException)2 DocumentException (com.itextpdf.text.DocumentException)1 BamIndexNotFoundException (exceptions.BamIndexNotFoundException)1 InvalidColourException (exceptions.InvalidColourException)1 InvalidCommandLineException (exceptions.InvalidCommandLineException)1 InvalidConfigException (exceptions.InvalidConfigException)1 InvalidRecordException (exceptions.InvalidRecordException)1 UnindexableFastaFileException (faidx.UnindexableFastaFileException)1 SAMFileWriter (htsjdk.samtools.SAMFileWriter)1 SAMRecord (htsjdk.samtools.SAMRecord)1 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)1 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)1 SamReader (htsjdk.samtools.SamReader)1 SamReaderFactory (htsjdk.samtools.SamReaderFactory)1 AbstractFeatureReader (htsjdk.tribble.AbstractFeatureReader)1