use of tracks.IntervalFeature in project ASCIIGenome by dariober.
the class UcscGenePred method getTranscript.
/**
* @param genePredList List where bin column has been already removed.
* @throws InvalidGenomicCoordsException
*/
private IntervalFeature getTranscript(List<String> genePredList, String source) throws InvalidGenomicCoordsException {
String[] gff = new String[9];
gff[0] = genePredList.get(1);
gff[1] = source;
gff[2] = "transcript";
gff[3] = Integer.toString(Integer.parseInt(genePredList.get(3)) + 1);
gff[4] = genePredList.get(4);
gff[5] = ".";
// Strand
gff[6] = genePredList.get(2);
gff[7] = ".";
gff[8] = "gene_id \"" + genePredList.get(11) + '"' + "; transcript_id \"" + genePredList.get(0) + '"' + "; gene_name \"" + genePredList.get(11) + "\";";
IntervalFeature tx = new IntervalFeature(Joiner.on("\t").join(gff), TrackFormat.GTF, null);
return tx;
}
use of tracks.IntervalFeature in project ASCIIGenome by dariober.
the class UcscGenePred method getLeftUTR.
// private List<IntervalFeature> getStartCodonRev(List<IntervalFeature> cds){
//
// }
private List<IntervalFeature> getLeftUTR(List<IntervalFeature> exons, final int cdsStart, final int cdsEnd) throws InvalidGenomicCoordsException {
List<IntervalFeature> utr = new ArrayList<IntervalFeature>();
if (cdsStart > cdsEnd) {
// There is no UTR in this transcirpt
return utr;
}
for (IntervalFeature exon : exons) {
if (exon.getFrom() >= cdsStart) {
// There is no UTR
break;
}
// Use this if the exon is completely to the left of cdsStart
int utrExonEnd = exon.getTo();
if (exon.getFrom() < cdsStart && exon.getTo() >= cdsStart) {
// Is the exon containing the cdsStart?
utrExonEnd = cdsStart - 1;
}
String[] gff = new String[9];
gff[0] = exon.getChrom();
gff[1] = exon.getSource();
gff[2] = exon.getStrand() == '+' ? "5UTR" : "3UTR";
gff[3] = Integer.toString(exon.getFrom());
gff[4] = Integer.toString(utrExonEnd);
gff[5] = ".";
gff[6] = String.valueOf(exons.get(0).getStrand());
gff[7] = ".";
gff[8] = Lists.newArrayList(Splitter.on("\t").omitEmptyStrings().split(exon.getRaw())).get(8);
IntervalFeature x = new IntervalFeature(Joiner.on("\t").join(gff), TrackFormat.GTF, null);
utr.add(x);
}
return utr;
}
use of tracks.IntervalFeature in project ASCIIGenome by dariober.
the class UcscGenePred method getCDS.
private List<IntervalFeature> getCDS(List<IntervalFeature> exons, final int cdsStart, final int cdsEnd, String cdsStartStat, String cdsEndStat) throws InvalidGenomicCoordsException {
if (cdsStart > cdsEnd) {
// There are no CDS in this transcript
return new ArrayList<IntervalFeature>();
}
// Iterate through exons checking whether at least part of it is containing in the interval cdsStart:cdsEnd.
// If so, take the exon slice inside the interval cdsStart:cdsEnd and add it to the list of CDSs
List<IntervalFeature> cds = new ArrayList<IntervalFeature>();
for (int i = 0; i < exons.size(); i++) {
IntervalFeature exon = exons.get(i);
if (exon.getTo() < cdsStart || exon.getFrom() > cdsEnd) {
// Exon is not in interval cdsStart:cdsEnd
continue;
}
int cdsFrom = exon.getFrom();
if (cdsFrom < cdsStart) {
// If only part of exon is CDS
cdsFrom = cdsStart;
}
int cdsTo = exon.getTo();
if (cdsTo > cdsEnd) {
// If only part of exon is CDS
cdsTo = cdsEnd;
}
String attr = Lists.newArrayList(Splitter.on("\t").omitEmptyStrings().split(exon.getRaw())).get(8);
// Build the interval feature object
String[] gff = new String[9];
gff[0] = exons.get(0).getChrom();
gff[1] = exons.get(0).getSource();
gff[2] = "CDS";
gff[3] = Integer.toString(cdsFrom);
gff[4] = Integer.toString(cdsTo);
gff[5] = ".";
gff[6] = String.valueOf(exons.get(0).getStrand());
// It's unclear to me how frames are assigned so leave it N/A.
gff[7] = ".";
gff[8] = attr;
IntervalFeature x = new IntervalFeature(Joiner.on("\t").join(gff), TrackFormat.GTF, null);
cds.add(x);
}
// and remove the remainder from the following CDS.
if (exons.get(0).getStrand() == '+' && cdsEndStat.equals("cmpl")) {
IntervalFeature stopCds = cds.get(cds.size() - 1);
int newStop = stopCds.getTo() - 3;
int remainder = -(newStop - stopCds.getFrom());
if (remainder > 0) {
// If remainder is > 0, this CDS doesn't exist at all and must be removed. This happens if the
// stop codon is split across two exons (rare but it happens).
// We also need to chip off "remainder" from the previous CDS.
cds.remove(stopCds);
stopCds = cds.get(cds.size() - 1);
// I'm not sure why you need +1 to make it work!
newStop = stopCds.getTo() - remainder + 1;
}
if (newStop <= 0 || newStop < stopCds.getFrom()) {
// Sanity check
throw new InvalidGenomicCoordsException();
}
// We create an intervalFeature from scratch that will replace the old one.
List<String> raw = Lists.newArrayList(Splitter.on("\t").omitEmptyStrings().splitToList(stopCds.getRaw()));
// Replace end coord
raw.set(4, Integer.toString(newStop));
// Replace last element.
cds.set(cds.size() - 1, new IntervalFeature(Joiner.on("\t").join(raw), TrackFormat.GTF, null));
} else if (exons.get(0).getStrand() == '-' && cdsStartStat.equals("cmpl")) {
// same as above. This time apply to first CDS whose start has to be increased by 3
IntervalFeature stopCds = cds.get(0);
int newStop = stopCds.getFrom() + 3;
int remainder = newStop - stopCds.getTo();
if (remainder > 0) {
// If remainder is >= 0, this CDS doesn't exist at all and must be removed. This happens if the
// stop codon is split across two exons (rare but it happens).
// We also need to chip off "remainder" from the next CDS.
cds.remove(stopCds);
stopCds = cds.get(0);
// Not sure why -1 works!
newStop = stopCds.getFrom() + remainder - 1;
}
if (newStop <= 0 || newStop > stopCds.getTo()) {
// Sanity check
throw new InvalidGenomicCoordsException();
}
// We create an intervalFeature from scratch that will replace the old one.
List<String> raw = Lists.newArrayList(Splitter.on("\t").omitEmptyStrings().splitToList(stopCds.getRaw()));
// Replace start coord
raw.set(3, Integer.toString(newStop));
// Replace last element.
cds.set(0, new IntervalFeature(Joiner.on("\t").join(raw), TrackFormat.GTF, null));
}
return cds;
}
use of tracks.IntervalFeature in project ASCIIGenome by dariober.
the class Main method main.
public static void main(String[] args) throws IOException, InvalidGenomicCoordsException, InvalidCommandLineException, InvalidRecordException, BamIndexNotFoundException, ClassNotFoundException, SQLException, DocumentException, UnindexableFastaFileException, InvalidColourException, InvalidConfigException {
/* Start parsing arguments *
* *** If you change something here change also in console input ***/
Namespace opts = ArgParse.argParse(args);
List<String> initFileList = opts.getList("input");
String region = opts.getString("region");
final String fasta = opts.getString("fasta");
String exec = opts.getString("exec");
String config = opts.getString("config");
exec = parseExec(exec);
int debug = opts.getInt("debug");
// Get configuration. Note that we don't need to assign this to a variable.
new Config(config);
new Xterm256();
ASCIIGenomeHistory asciiGenomeHistory = new ASCIIGenomeHistory();
// Init console right at start so if something goes wrong the user's terminal is reset to
// initial defaults with the shutdown hook. This could be achieved in cleaner way probably.
ConsoleReader console = initConsole();
messageVersion(opts.getBoolean("noFormat"));
/* Set up console */
Utils.checkFasta(fasta, debug);
/* Test input files exist */
List<String> inputFileList = new ArrayList<String>();
Utils.addSourceName(inputFileList, initFileList, debug);
if (region == null || region.isEmpty()) {
region = initRegion(inputFileList, fasta, null, debug);
}
int terminalWidth = Utils.getTerminalWidth();
GenomicCoords initGc = new GenomicCoords(region, terminalWidth, null, null);
List<String> initGenomeList = new ArrayList<String>();
for (String x : inputFileList) {
initGenomeList.add(x);
}
initGenomeList.add(fasta);
initGc.setGenome(initGenomeList, false);
// ----------------------------
// Genomic positions start here:
final GenomicCoordsHistory gch = new GenomicCoordsHistory();
GenomicCoords start = new GenomicCoords(initGc.toStringRegion(), terminalWidth, initGc.getSamSeqDict(), initGc.getFastaFile());
gch.readHistory(asciiGenomeHistory.getFileName(), start);
gch.add(start);
final TrackSet trackSet = new TrackSet(inputFileList, gch.current());
trackSet.addHistoryFiles(asciiGenomeHistory.getFiles());
setDefaultTrackHeights(console.getTerminal().getHeight(), trackSet.getTrackList());
final TrackProcessor proc = new TrackProcessor(trackSet, gch);
proc.setShowMem(opts.getBoolean("showMem"));
proc.setShowTime(opts.getBoolean("showTime"));
proc.setNoFormat(opts.getBoolean("noFormat"));
// Put here the previous command so that it is re-issued if no input is given
// You have to initialize this var outside the while loop that processes input files.
String currentCmdConcatInput = "";
if (!proc.isNoFormat()) {
String str = String.format("\033[48;5;%sm", Config.get256Color(ConfigKey.background));
System.out.print(str);
}
// Batch processing file of regions
final String batchFile = opts.getString("batchFile");
if (batchFile != null && !batchFile.isEmpty()) {
console.clearScreen();
console.flush();
BufferedReader br = batchFileReader(batchFile);
String line = null;
while ((line = br.readLine()) != null) {
// Start processing intervals one by one
IntervalFeature target = new IntervalFeature(line, TrackFormat.BED, null);
String reg = target.getChrom() + ":" + target.getFrom() + "-" + target.getTo();
String gotoAndExec = ("goto " + reg + " && " + exec).trim().replaceAll("&&$", "");
InteractiveInput itr = new InteractiveInput(console);
itr.processInput(gotoAndExec, proc, debug);
if (itr.getInteractiveInputExitCode().equals(ExitCode.ERROR)) {
System.err.println("Error processing '" + gotoAndExec + "' at line '" + line + "'");
System.exit(1);
}
}
br.close();
return;
}
// See if we need to process the exec arg before going to interactive mode.
// Also if we are in non-interactive mode, we process the track set now and later exit
console.clearScreen();
console.flush();
proc.iterateTracks();
if (!exec.isEmpty() || opts.getBoolean("nonInteractive")) {
InteractiveInput itr = new InteractiveInput(console);
itr.processInput(exec, proc, debug);
if (opts.getBoolean("nonInteractive")) {
System.out.print("\033[0m");
return;
}
}
/* Set up done, start processing */
/* ============================= */
console.setHistory(asciiGenomeHistory.getCommandHistory());
writeYamlHistory(asciiGenomeHistory, console.getHistory(), trackSet, gch);
while (true) {
// keep going until quit or if no interactive input set
// *** START processing interactive input
// String like "zi && -F 16 && mapq 10"
String cmdConcatInput = "";
InteractiveInput interactiveInput = new InteractiveInput(console);
ExitCode currentExitCode = ExitCode.NULL;
interactiveInput.setInteractiveInputExitCode(currentExitCode);
while (!interactiveInput.getInteractiveInputExitCode().equals(ExitCode.ERROR) || interactiveInput.getInteractiveInputExitCode().equals(ExitCode.NULL)) {
console.setPrompt(StringUtils.repeat(' ', proc.getWindowSize()) + '\r' + "[h] for help: ");
cmdConcatInput = console.readLine().trim();
if (cmdConcatInput.isEmpty()) {
// Empty input: User only issued <ENTER>
if (interactiveInput.getInteractiveInputExitCode().equals(ExitCode.CLEAN)) {
// User only issued <ENTER>: Repeat previous command if the exit code was not an error.
cmdConcatInput = currentCmdConcatInput;
} else {
// Refresh screen if the exit code was not CLEAN.
cmdConcatInput = "+0";
}
}
interactiveInput.processInput(cmdConcatInput, proc, debug);
currentCmdConcatInput = cmdConcatInput;
}
// *** END processing interactive input
}
}
use of tracks.IntervalFeature in project ASCIIGenome by dariober.
the class Utils method initRegionFromFile.
/**
* Get the first chrom string from first line of input file. As you add support for more filetypes you should update
* this function. This method is very dirty and shouldn't be trusted 100%
* @throws InvalidGenomicCoordsException
* @throws SQLException
* @throws InvalidRecordException
* @throws InvalidCommandLineException
* @throws ClassNotFoundException
*/
@SuppressWarnings("unused")
public static String initRegionFromFile(String x) throws IOException, InvalidGenomicCoordsException, ClassNotFoundException, InvalidCommandLineException, InvalidRecordException, SQLException {
UrlValidator urlValidator = new UrlValidator();
String region = "";
TrackFormat fmt = Utils.getFileTypeFromName(x);
if (fmt.equals(TrackFormat.BAM)) {
SamReader samReader;
if (urlValidator.isValid(x)) {
samReader = SamReaderFactory.makeDefault().open(SamInputResource.of(new URL(x)));
} else {
SamReaderFactory srf = SamReaderFactory.make();
srf.validationStringency(ValidationStringency.SILENT);
samReader = srf.open(new File(x));
}
// Default: Start from the first contig in dictionary
region = samReader.getFileHeader().getSequence(0).getSequenceName();
SAMRecordIterator iter = samReader.iterator();
if (iter.hasNext()) {
// If there are records in this BAM, init from first record
SAMRecord rec = iter.next();
region = rec.getContig() + ":" + rec.getAlignmentStart();
samReader.close();
}
return region;
} else if (fmt.equals(TrackFormat.BIGWIG) && !urlValidator.isValid(x)) {
// Loading from URL is painfully slow so do not initialize from URL
return initRegionFromBigWig(x);
} else if (fmt.equals(TrackFormat.BIGBED) && !urlValidator.isValid(x)) {
// Loading from URL is painfully slow so do not initialize from URL
return initRegionFromBigBed(x);
} else if (urlValidator.isValid(x) && (fmt.equals(TrackFormat.BIGWIG) || fmt.equals(TrackFormat.BIGBED))) {
System.err.println("Refusing to initialize from URL");
throw new InvalidGenomicCoordsException();
} else if (fmt.equals(TrackFormat.TDF)) {
Iterator<String> iter = TDFReader.getReader(x).getChromosomeNames().iterator();
while (iter.hasNext()) {
region = iter.next();
if (!region.equals("All")) {
return region;
}
}
System.err.println("Cannot initialize from " + x);
throw new RuntimeException();
} else {
// Input file appears to be a generic interval file. We expect chrom to be in column 1
// VCF files are also included here since they are either gzip or plain ASCII.
BufferedReader br;
GZIPInputStream gzipStream;
if (x.toLowerCase().endsWith(".gz") || x.toLowerCase().endsWith(".bgz")) {
if (urlValidator.isValid(x)) {
gzipStream = new GZIPInputStream(new URL(x).openStream());
} else {
InputStream fileStream = new FileInputStream(x);
gzipStream = new GZIPInputStream(fileStream);
}
Reader decoder = new InputStreamReader(gzipStream, "UTF-8");
br = new BufferedReader(decoder);
} else {
if (urlValidator.isValid(x)) {
InputStream instream = new URL(x).openStream();
Reader decoder = new InputStreamReader(instream, "UTF-8");
br = new BufferedReader(decoder);
} else {
br = new BufferedReader(new FileReader(x));
}
}
String line;
while ((line = br.readLine()) != null) {
line = line.trim();
if (line.startsWith("#") || line.isEmpty() || line.startsWith("track ")) {
continue;
}
if (fmt.equals(TrackFormat.VCF)) {
region = line.split("\t")[0] + ":" + line.split("\t")[1];
} else {
IntervalFeature feature = new IntervalFeature(line, fmt, null);
region = feature.getChrom() + ":" + feature.getFrom();
}
br.close();
return region;
}
if (line == null) {
// This means the input has no records
region = "Undefined_contig";
if (fmt.equals(TrackFormat.VCF)) {
SAMSequenceDictionary seqdict = getVCFHeader(x).getSequenceDictionary();
if (seqdict != null) {
Iterator<SAMSequenceRecord> iter = seqdict.getSequences().iterator();
if (iter.hasNext()) {
region = iter.next().getSequenceName();
}
}
}
return region;
}
}
System.err.println("Cannot initialize from " + x);
throw new RuntimeException();
}
Aggregations