use of tracks.TrackFormat in project ASCIIGenome by dariober.
the class Main method initRegion.
/**
* Return a suitable region to start. If a region is already given, do nothing.
* This method is a mess and should be cleaned up together with GenomicCoords class.
* @throws InvalidGenomicCoordsException
*/
public static String initRegion(List<String> inputFileList, String fasta, String genome, int debug) throws IOException, InvalidGenomicCoordsException {
// Preferably we start from a position that has a feature rather than from the start of a
// random chrom.
System.err.print("Initializing coordinates... ");
// First search for files that can init chrom and position
List<String> skipped = new ArrayList<String>();
for (String x : inputFileList) {
TrackFormat fmt = Utils.getFileTypeFromName(x);
if (fmt.equals(TrackFormat.TDF)) {
skipped.add(x);
continue;
}
try {
String region = Utils.initRegionFromFile(x);
System.err.println("Done from: " + x);
return region;
} catch (Exception e) {
System.err.println("\nCould not initilize from file " + x);
if (debug > 0) {
e.printStackTrace();
}
}
}
// Try to initialize from fasta
if (fasta != null && !fasta.trim().isEmpty()) {
IndexedFastaSequenceFile faSeqFile = new IndexedFastaSequenceFile(new File(fasta));
String region = faSeqFile.nextSequence().getName();
faSeqFile.close();
return region;
}
// Try genome file
if (genome != null && !genome.trim().isEmpty()) {
GenomicCoords gc = new GenomicCoords(Utils.getTerminalWidth());
gc.setGenome(Arrays.asList(new String[] { genome }), false);
SAMSequenceDictionary samSeqDict = gc.getSamSeqDict();
String region = samSeqDict.getSequence(0).getSequenceName();
return region;
}
// Failing that, look for any file that gives at least chrom
for (String x : skipped) {
try {
String region = Utils.initRegionFromFile(x);
System.err.println("Done from: " + x);
return region;
} catch (Exception e) {
System.err.println("\nCould not initilize from file " + x);
if (debug > 0) {
e.printStackTrace();
}
}
}
// It appears everything failed to initialise...
return "";
}
use of tracks.TrackFormat in project ASCIIGenome by dariober.
the class Utils method prepareStdinFile.
/**
* Read file from stdin and write to a tmp file with name extension
* consistent with the input format.
* This method should be private. Set to protected only for testing.
*/
protected static File prepareStdinFile() throws IOException {
Scanner sc = new Scanner(System.in);
File tmp = createTempFile("stdin.", "");
tmp.deleteOnExit();
BufferedWriter bw = new BufferedWriter(new FileWriter(tmp));
while (sc.hasNextLine()) {
bw.write(sc.nextLine() + "\n");
}
bw.close();
TrackFormat fmt = sniffFile(tmp);
String fmtName;
if (fmt.equals(TrackFormat.BAM)) {
fmtName = tmp.getAbsoluteFile() + ".sam";
} else if (fmt.equals(TrackFormat.VCF)) {
fmtName = tmp.getAbsoluteFile() + ".vcf";
} else if (fmt.equals(TrackFormat.BEDGRAPH)) {
fmtName = tmp.getAbsoluteFile() + ".bedGraph";
} else if (fmt.equals(TrackFormat.BED)) {
fmtName = tmp.getAbsoluteFile() + ".bed";
} else if (fmt.equals(TrackFormat.GFF)) {
fmtName = tmp.getAbsoluteFile() + ".gff3";
} else if (fmt.equals(TrackFormat.GTF)) {
fmtName = tmp.getAbsoluteFile() + ".gtf";
} else {
throw new IOException("Cannot determine track format of stdin.");
}
File fmtFile = new File(fmtName);
fmtFile.deleteOnExit();
tmp.renameTo(fmtFile);
tmp.delete();
return fmtFile;
}
use of tracks.TrackFormat in project ASCIIGenome by dariober.
the class Utils method initRegionFromFile.
/**
* Get the first chrom string from first line of input file. As you add support for more filetypes you should update
* this function. This method is very dirty and shouldn't be trusted 100%
* @throws InvalidGenomicCoordsException
* @throws SQLException
* @throws InvalidRecordException
* @throws InvalidCommandLineException
* @throws ClassNotFoundException
*/
@SuppressWarnings("unused")
public static String initRegionFromFile(String x) throws IOException, InvalidGenomicCoordsException, ClassNotFoundException, InvalidCommandLineException, InvalidRecordException, SQLException {
UrlValidator urlValidator = new UrlValidator();
String region = "";
TrackFormat fmt = Utils.getFileTypeFromName(x);
if (fmt.equals(TrackFormat.BAM)) {
SamReader samReader;
if (urlValidator.isValid(x)) {
samReader = SamReaderFactory.makeDefault().open(SamInputResource.of(new URL(x)));
} else {
SamReaderFactory srf = SamReaderFactory.make();
srf.validationStringency(ValidationStringency.SILENT);
samReader = srf.open(new File(x));
}
// Default: Start from the first contig in dictionary
region = samReader.getFileHeader().getSequence(0).getSequenceName();
SAMRecordIterator iter = samReader.iterator();
if (iter.hasNext()) {
// If there are records in this BAM, init from first record
SAMRecord rec = iter.next();
region = rec.getContig() + ":" + rec.getAlignmentStart();
samReader.close();
}
return region;
} else if (fmt.equals(TrackFormat.BIGWIG) && !urlValidator.isValid(x)) {
// Loading from URL is painfully slow so do not initialize from URL
return initRegionFromBigWig(x);
} else if (fmt.equals(TrackFormat.BIGBED) && !urlValidator.isValid(x)) {
// Loading from URL is painfully slow so do not initialize from URL
return initRegionFromBigBed(x);
} else if (urlValidator.isValid(x) && (fmt.equals(TrackFormat.BIGWIG) || fmt.equals(TrackFormat.BIGBED))) {
System.err.println("Refusing to initialize from URL");
throw new InvalidGenomicCoordsException();
} else if (fmt.equals(TrackFormat.TDF)) {
Iterator<String> iter = TDFReader.getReader(x).getChromosomeNames().iterator();
while (iter.hasNext()) {
region = iter.next();
if (!region.equals("All")) {
return region;
}
}
System.err.println("Cannot initialize from " + x);
throw new RuntimeException();
} else {
// Input file appears to be a generic interval file. We expect chrom to be in column 1
// VCF files are also included here since they are either gzip or plain ASCII.
BufferedReader br;
GZIPInputStream gzipStream;
if (x.toLowerCase().endsWith(".gz") || x.toLowerCase().endsWith(".bgz")) {
if (urlValidator.isValid(x)) {
gzipStream = new GZIPInputStream(new URL(x).openStream());
} else {
InputStream fileStream = new FileInputStream(x);
gzipStream = new GZIPInputStream(fileStream);
}
Reader decoder = new InputStreamReader(gzipStream, "UTF-8");
br = new BufferedReader(decoder);
} else {
if (urlValidator.isValid(x)) {
InputStream instream = new URL(x).openStream();
Reader decoder = new InputStreamReader(instream, "UTF-8");
br = new BufferedReader(decoder);
} else {
br = new BufferedReader(new FileReader(x));
}
}
String line;
while ((line = br.readLine()) != null) {
line = line.trim();
if (line.startsWith("#") || line.isEmpty() || line.startsWith("track ")) {
continue;
}
if (fmt.equals(TrackFormat.VCF)) {
region = line.split("\t")[0] + ":" + line.split("\t")[1];
} else {
IntervalFeature feature = new IntervalFeature(line, fmt, null);
region = feature.getChrom() + ":" + feature.getFrom();
}
br.close();
return region;
}
if (line == null) {
// This means the input has no records
region = "Undefined_contig";
if (fmt.equals(TrackFormat.VCF)) {
SAMSequenceDictionary seqdict = getVCFHeader(x).getSequenceDictionary();
if (seqdict != null) {
Iterator<SAMSequenceRecord> iter = seqdict.getSequences().iterator();
if (iter.hasNext()) {
region = iter.next().getSequenceName();
}
}
}
return region;
}
}
System.err.println("Cannot initialize from " + x);
throw new RuntimeException();
}
Aggregations