Search in sources :

Example 1 with IntervalFeature

use of tracks.IntervalFeature in project ASCIIGenome by dariober.

the class UcscGenePred method getTranscript.

/**
 * @param genePredList List where bin column has been already removed.
 * @throws InvalidGenomicCoordsException
 */
private IntervalFeature getTranscript(List<String> genePredList, String source) throws InvalidGenomicCoordsException {
    String[] gff = new String[9];
    gff[0] = genePredList.get(1);
    gff[1] = source;
    gff[2] = "transcript";
    gff[3] = Integer.toString(Integer.parseInt(genePredList.get(3)) + 1);
    gff[4] = genePredList.get(4);
    gff[5] = ".";
    // Strand
    gff[6] = genePredList.get(2);
    gff[7] = ".";
    gff[8] = "gene_id \"" + genePredList.get(11) + '"' + "; transcript_id \"" + genePredList.get(0) + '"' + "; gene_name \"" + genePredList.get(11) + "\";";
    IntervalFeature tx = new IntervalFeature(Joiner.on("\t").join(gff), TrackFormat.GTF, null);
    return tx;
}
Also used : IntervalFeature(tracks.IntervalFeature)

Example 2 with IntervalFeature

use of tracks.IntervalFeature in project ASCIIGenome by dariober.

the class UcscGenePred method getLeftUTR.

// private List<IntervalFeature> getStartCodonRev(List<IntervalFeature> cds){
// 
// }
private List<IntervalFeature> getLeftUTR(List<IntervalFeature> exons, final int cdsStart, final int cdsEnd) throws InvalidGenomicCoordsException {
    List<IntervalFeature> utr = new ArrayList<IntervalFeature>();
    if (cdsStart > cdsEnd) {
        // There is no UTR in this transcirpt
        return utr;
    }
    for (IntervalFeature exon : exons) {
        if (exon.getFrom() >= cdsStart) {
            // There is no UTR
            break;
        }
        // Use this if the exon is completely to the left of cdsStart
        int utrExonEnd = exon.getTo();
        if (exon.getFrom() < cdsStart && exon.getTo() >= cdsStart) {
            // Is the exon containing the cdsStart?
            utrExonEnd = cdsStart - 1;
        }
        String[] gff = new String[9];
        gff[0] = exon.getChrom();
        gff[1] = exon.getSource();
        gff[2] = exon.getStrand() == '+' ? "5UTR" : "3UTR";
        gff[3] = Integer.toString(exon.getFrom());
        gff[4] = Integer.toString(utrExonEnd);
        gff[5] = ".";
        gff[6] = String.valueOf(exons.get(0).getStrand());
        gff[7] = ".";
        gff[8] = Lists.newArrayList(Splitter.on("\t").omitEmptyStrings().split(exon.getRaw())).get(8);
        IntervalFeature x = new IntervalFeature(Joiner.on("\t").join(gff), TrackFormat.GTF, null);
        utr.add(x);
    }
    return utr;
}
Also used : ArrayList(java.util.ArrayList) IntervalFeature(tracks.IntervalFeature)

Example 3 with IntervalFeature

use of tracks.IntervalFeature in project ASCIIGenome by dariober.

the class UcscGenePred method getCDS.

private List<IntervalFeature> getCDS(List<IntervalFeature> exons, final int cdsStart, final int cdsEnd, String cdsStartStat, String cdsEndStat) throws InvalidGenomicCoordsException {
    if (cdsStart > cdsEnd) {
        // There are no CDS in this transcript
        return new ArrayList<IntervalFeature>();
    }
    // Iterate through exons checking whether at least part of it is containing in the interval cdsStart:cdsEnd.
    // If so, take the exon slice inside the interval cdsStart:cdsEnd and add it to the list of CDSs
    List<IntervalFeature> cds = new ArrayList<IntervalFeature>();
    for (int i = 0; i < exons.size(); i++) {
        IntervalFeature exon = exons.get(i);
        if (exon.getTo() < cdsStart || exon.getFrom() > cdsEnd) {
            // Exon is not in interval cdsStart:cdsEnd
            continue;
        }
        int cdsFrom = exon.getFrom();
        if (cdsFrom < cdsStart) {
            // If only part of exon is CDS
            cdsFrom = cdsStart;
        }
        int cdsTo = exon.getTo();
        if (cdsTo > cdsEnd) {
            // If only part of exon is CDS
            cdsTo = cdsEnd;
        }
        String attr = Lists.newArrayList(Splitter.on("\t").omitEmptyStrings().split(exon.getRaw())).get(8);
        // Build the interval feature object
        String[] gff = new String[9];
        gff[0] = exons.get(0).getChrom();
        gff[1] = exons.get(0).getSource();
        gff[2] = "CDS";
        gff[3] = Integer.toString(cdsFrom);
        gff[4] = Integer.toString(cdsTo);
        gff[5] = ".";
        gff[6] = String.valueOf(exons.get(0).getStrand());
        // It's unclear to me how frames are assigned so leave it N/A.
        gff[7] = ".";
        gff[8] = attr;
        IntervalFeature x = new IntervalFeature(Joiner.on("\t").join(gff), TrackFormat.GTF, null);
        cds.add(x);
    }
    // and remove the remainder from the following CDS.
    if (exons.get(0).getStrand() == '+' && cdsEndStat.equals("cmpl")) {
        IntervalFeature stopCds = cds.get(cds.size() - 1);
        int newStop = stopCds.getTo() - 3;
        int remainder = -(newStop - stopCds.getFrom());
        if (remainder > 0) {
            // If remainder is > 0, this CDS doesn't exist at all and must be removed. This happens if the
            // stop codon is split across two exons (rare but it happens).
            // We also need to chip off "remainder" from the previous CDS.
            cds.remove(stopCds);
            stopCds = cds.get(cds.size() - 1);
            // I'm not sure why you need +1 to make it work!
            newStop = stopCds.getTo() - remainder + 1;
        }
        if (newStop <= 0 || newStop < stopCds.getFrom()) {
            // Sanity check
            throw new InvalidGenomicCoordsException();
        }
        // We create an intervalFeature from scratch that will replace the old one.
        List<String> raw = Lists.newArrayList(Splitter.on("\t").omitEmptyStrings().splitToList(stopCds.getRaw()));
        // Replace end coord
        raw.set(4, Integer.toString(newStop));
        // Replace last element.
        cds.set(cds.size() - 1, new IntervalFeature(Joiner.on("\t").join(raw), TrackFormat.GTF, null));
    } else if (exons.get(0).getStrand() == '-' && cdsStartStat.equals("cmpl")) {
        // same as above. This time apply to first CDS whose start has to be increased by 3
        IntervalFeature stopCds = cds.get(0);
        int newStop = stopCds.getFrom() + 3;
        int remainder = newStop - stopCds.getTo();
        if (remainder > 0) {
            // If remainder is >= 0, this CDS doesn't exist at all and must be removed. This happens if the
            // stop codon is split across two exons (rare but it happens).
            // We also need to chip off "remainder" from the next CDS.
            cds.remove(stopCds);
            stopCds = cds.get(0);
            // Not sure why -1 works!
            newStop = stopCds.getFrom() + remainder - 1;
        }
        if (newStop <= 0 || newStop > stopCds.getTo()) {
            // Sanity check
            throw new InvalidGenomicCoordsException();
        }
        // We create an intervalFeature from scratch that will replace the old one.
        List<String> raw = Lists.newArrayList(Splitter.on("\t").omitEmptyStrings().splitToList(stopCds.getRaw()));
        // Replace start coord
        raw.set(3, Integer.toString(newStop));
        // Replace last element.
        cds.set(0, new IntervalFeature(Joiner.on("\t").join(raw), TrackFormat.GTF, null));
    }
    return cds;
}
Also used : ArrayList(java.util.ArrayList) InvalidGenomicCoordsException(exceptions.InvalidGenomicCoordsException) ArrayList(java.util.ArrayList) List(java.util.List) IntervalFeature(tracks.IntervalFeature)

Example 4 with IntervalFeature

use of tracks.IntervalFeature in project ASCIIGenome by dariober.

the class Main method main.

public static void main(String[] args) throws IOException, InvalidGenomicCoordsException, InvalidCommandLineException, InvalidRecordException, BamIndexNotFoundException, ClassNotFoundException, SQLException, DocumentException, UnindexableFastaFileException, InvalidColourException, InvalidConfigException {
    /* Start parsing arguments * 
		 * *** If you change something here change also in console input ***/
    Namespace opts = ArgParse.argParse(args);
    List<String> initFileList = opts.getList("input");
    String region = opts.getString("region");
    final String fasta = opts.getString("fasta");
    String exec = opts.getString("exec");
    String config = opts.getString("config");
    exec = parseExec(exec);
    int debug = opts.getInt("debug");
    // Get configuration. Note that we don't need to assign this to a variable.
    new Config(config);
    new Xterm256();
    ASCIIGenomeHistory asciiGenomeHistory = new ASCIIGenomeHistory();
    // Init console right at start so if something goes wrong the user's terminal is reset to
    // initial defaults with the shutdown hook. This could be achieved in cleaner way probably.
    ConsoleReader console = initConsole();
    messageVersion(opts.getBoolean("noFormat"));
    /* Set up console */
    Utils.checkFasta(fasta, debug);
    /* Test input files exist */
    List<String> inputFileList = new ArrayList<String>();
    Utils.addSourceName(inputFileList, initFileList, debug);
    if (region == null || region.isEmpty()) {
        region = initRegion(inputFileList, fasta, null, debug);
    }
    int terminalWidth = Utils.getTerminalWidth();
    GenomicCoords initGc = new GenomicCoords(region, terminalWidth, null, null);
    List<String> initGenomeList = new ArrayList<String>();
    for (String x : inputFileList) {
        initGenomeList.add(x);
    }
    initGenomeList.add(fasta);
    initGc.setGenome(initGenomeList, false);
    // ----------------------------
    // Genomic positions start here:
    final GenomicCoordsHistory gch = new GenomicCoordsHistory();
    GenomicCoords start = new GenomicCoords(initGc.toStringRegion(), terminalWidth, initGc.getSamSeqDict(), initGc.getFastaFile());
    gch.readHistory(asciiGenomeHistory.getFileName(), start);
    gch.add(start);
    final TrackSet trackSet = new TrackSet(inputFileList, gch.current());
    trackSet.addHistoryFiles(asciiGenomeHistory.getFiles());
    setDefaultTrackHeights(console.getTerminal().getHeight(), trackSet.getTrackList());
    final TrackProcessor proc = new TrackProcessor(trackSet, gch);
    proc.setShowMem(opts.getBoolean("showMem"));
    proc.setShowTime(opts.getBoolean("showTime"));
    proc.setNoFormat(opts.getBoolean("noFormat"));
    // Put here the previous command so that it is re-issued if no input is given
    // You have to initialize this var outside the while loop that processes input files.
    String currentCmdConcatInput = "";
    if (!proc.isNoFormat()) {
        String str = String.format("\033[48;5;%sm", Config.get256Color(ConfigKey.background));
        System.out.print(str);
    }
    // Batch processing file of regions
    final String batchFile = opts.getString("batchFile");
    if (batchFile != null && !batchFile.isEmpty()) {
        console.clearScreen();
        console.flush();
        BufferedReader br = batchFileReader(batchFile);
        String line = null;
        while ((line = br.readLine()) != null) {
            // Start processing intervals one by one
            IntervalFeature target = new IntervalFeature(line, TrackFormat.BED, null);
            String reg = target.getChrom() + ":" + target.getFrom() + "-" + target.getTo();
            String gotoAndExec = ("goto " + reg + " && " + exec).trim().replaceAll("&&$", "");
            InteractiveInput itr = new InteractiveInput(console);
            itr.processInput(gotoAndExec, proc, debug);
            if (itr.getInteractiveInputExitCode().equals(ExitCode.ERROR)) {
                System.err.println("Error processing '" + gotoAndExec + "' at line '" + line + "'");
                System.exit(1);
            }
        }
        br.close();
        return;
    }
    // See if we need to process the exec arg before going to interactive mode.
    // Also if we are in non-interactive mode, we process the track set now and later exit
    console.clearScreen();
    console.flush();
    proc.iterateTracks();
    if (!exec.isEmpty() || opts.getBoolean("nonInteractive")) {
        InteractiveInput itr = new InteractiveInput(console);
        itr.processInput(exec, proc, debug);
        if (opts.getBoolean("nonInteractive")) {
            System.out.print("\033[0m");
            return;
        }
    }
    /* Set up done, start processing */
    /* ============================= */
    console.setHistory(asciiGenomeHistory.getCommandHistory());
    writeYamlHistory(asciiGenomeHistory, console.getHistory(), trackSet, gch);
    while (true) {
        // keep going until quit or if no interactive input set
        // *** START processing interactive input
        // String like "zi && -F 16 && mapq 10"
        String cmdConcatInput = "";
        InteractiveInput interactiveInput = new InteractiveInput(console);
        ExitCode currentExitCode = ExitCode.NULL;
        interactiveInput.setInteractiveInputExitCode(currentExitCode);
        while (!interactiveInput.getInteractiveInputExitCode().equals(ExitCode.ERROR) || interactiveInput.getInteractiveInputExitCode().equals(ExitCode.NULL)) {
            console.setPrompt(StringUtils.repeat(' ', proc.getWindowSize()) + '\r' + "[h] for help: ");
            cmdConcatInput = console.readLine().trim();
            if (cmdConcatInput.isEmpty()) {
                // Empty input: User only issued <ENTER>
                if (interactiveInput.getInteractiveInputExitCode().equals(ExitCode.CLEAN)) {
                    // User only issued <ENTER>: Repeat previous command if the exit code was not an error.
                    cmdConcatInput = currentCmdConcatInput;
                } else {
                    // Refresh screen if the exit code was not CLEAN.
                    cmdConcatInput = "+0";
                }
            }
            interactiveInput.processInput(cmdConcatInput, proc, debug);
            currentCmdConcatInput = cmdConcatInput;
        }
    // *** END processing interactive input
    }
}
Also used : Xterm256(coloring.Xterm256) TrackSet(tracks.TrackSet) ConsoleReader(jline.console.ConsoleReader) Config(coloring.Config) ArrayList(java.util.ArrayList) Namespace(net.sourceforge.argparse4j.inf.Namespace) BufferedReader(java.io.BufferedReader) IntervalFeature(tracks.IntervalFeature)

Example 5 with IntervalFeature

use of tracks.IntervalFeature in project ASCIIGenome by dariober.

the class Utils method initRegionFromFile.

/**
 * Get the first chrom string from first line of input file. As you add support for more filetypes you should update
 * this function. This method is very dirty and shouldn't be trusted 100%
 * @throws InvalidGenomicCoordsException
 * @throws SQLException
 * @throws InvalidRecordException
 * @throws InvalidCommandLineException
 * @throws ClassNotFoundException
 */
@SuppressWarnings("unused")
public static String initRegionFromFile(String x) throws IOException, InvalidGenomicCoordsException, ClassNotFoundException, InvalidCommandLineException, InvalidRecordException, SQLException {
    UrlValidator urlValidator = new UrlValidator();
    String region = "";
    TrackFormat fmt = Utils.getFileTypeFromName(x);
    if (fmt.equals(TrackFormat.BAM)) {
        SamReader samReader;
        if (urlValidator.isValid(x)) {
            samReader = SamReaderFactory.makeDefault().open(SamInputResource.of(new URL(x)));
        } else {
            SamReaderFactory srf = SamReaderFactory.make();
            srf.validationStringency(ValidationStringency.SILENT);
            samReader = srf.open(new File(x));
        }
        // Default: Start from the first contig in dictionary
        region = samReader.getFileHeader().getSequence(0).getSequenceName();
        SAMRecordIterator iter = samReader.iterator();
        if (iter.hasNext()) {
            // If there are records in this BAM, init from first record
            SAMRecord rec = iter.next();
            region = rec.getContig() + ":" + rec.getAlignmentStart();
            samReader.close();
        }
        return region;
    } else if (fmt.equals(TrackFormat.BIGWIG) && !urlValidator.isValid(x)) {
        // Loading from URL is painfully slow so do not initialize from URL
        return initRegionFromBigWig(x);
    } else if (fmt.equals(TrackFormat.BIGBED) && !urlValidator.isValid(x)) {
        // Loading from URL is painfully slow so do not initialize from URL
        return initRegionFromBigBed(x);
    } else if (urlValidator.isValid(x) && (fmt.equals(TrackFormat.BIGWIG) || fmt.equals(TrackFormat.BIGBED))) {
        System.err.println("Refusing to initialize from URL");
        throw new InvalidGenomicCoordsException();
    } else if (fmt.equals(TrackFormat.TDF)) {
        Iterator<String> iter = TDFReader.getReader(x).getChromosomeNames().iterator();
        while (iter.hasNext()) {
            region = iter.next();
            if (!region.equals("All")) {
                return region;
            }
        }
        System.err.println("Cannot initialize from " + x);
        throw new RuntimeException();
    } else {
        // Input file appears to be a generic interval file. We expect chrom to be in column 1
        // VCF files are also included here since they are either gzip or plain ASCII.
        BufferedReader br;
        GZIPInputStream gzipStream;
        if (x.toLowerCase().endsWith(".gz") || x.toLowerCase().endsWith(".bgz")) {
            if (urlValidator.isValid(x)) {
                gzipStream = new GZIPInputStream(new URL(x).openStream());
            } else {
                InputStream fileStream = new FileInputStream(x);
                gzipStream = new GZIPInputStream(fileStream);
            }
            Reader decoder = new InputStreamReader(gzipStream, "UTF-8");
            br = new BufferedReader(decoder);
        } else {
            if (urlValidator.isValid(x)) {
                InputStream instream = new URL(x).openStream();
                Reader decoder = new InputStreamReader(instream, "UTF-8");
                br = new BufferedReader(decoder);
            } else {
                br = new BufferedReader(new FileReader(x));
            }
        }
        String line;
        while ((line = br.readLine()) != null) {
            line = line.trim();
            if (line.startsWith("#") || line.isEmpty() || line.startsWith("track ")) {
                continue;
            }
            if (fmt.equals(TrackFormat.VCF)) {
                region = line.split("\t")[0] + ":" + line.split("\t")[1];
            } else {
                IntervalFeature feature = new IntervalFeature(line, fmt, null);
                region = feature.getChrom() + ":" + feature.getFrom();
            }
            br.close();
            return region;
        }
        if (line == null) {
            // This means the input has no records
            region = "Undefined_contig";
            if (fmt.equals(TrackFormat.VCF)) {
                SAMSequenceDictionary seqdict = getVCFHeader(x).getSequenceDictionary();
                if (seqdict != null) {
                    Iterator<SAMSequenceRecord> iter = seqdict.getSequences().iterator();
                    if (iter.hasNext()) {
                        region = iter.next().getSequenceName();
                    }
                }
            }
            return region;
        }
    }
    System.err.println("Cannot initialize from " + x);
    throw new RuntimeException();
}
Also used : TrackFormat(tracks.TrackFormat) SamReaderFactory(htsjdk.samtools.SamReaderFactory) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) InputStreamReader(java.io.InputStreamReader) GZIPInputStream(java.util.zip.GZIPInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) Reader(java.io.Reader) AbstractFeatureReader(htsjdk.tribble.AbstractFeatureReader) TabixReader(htsjdk.tribble.readers.TabixReader) InputStreamReader(java.io.InputStreamReader) BufferedReader(java.io.BufferedReader) TDFReader(org.broad.igv.tdf.TDFReader) BBFileReader(org.broad.igv.bbfile.BBFileReader) SamReader(htsjdk.samtools.SamReader) FileReader(java.io.FileReader) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) URL(java.net.URL) FileInputStream(java.io.FileInputStream) GZIPInputStream(java.util.zip.GZIPInputStream) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) UrlValidator(org.apache.commons.validator.routines.UrlValidator) InvalidGenomicCoordsException(exceptions.InvalidGenomicCoordsException) BufferedReader(java.io.BufferedReader) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) BBFileReader(org.broad.igv.bbfile.BBFileReader) FileReader(java.io.FileReader) File(java.io.File) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) IntervalFeature(tracks.IntervalFeature)

Aggregations

IntervalFeature (tracks.IntervalFeature)11 ArrayList (java.util.ArrayList)9 InvalidGenomicCoordsException (exceptions.InvalidGenomicCoordsException)2 BufferedReader (java.io.BufferedReader)2 Config (coloring.Config)1 Xterm256 (coloring.Xterm256)1 SAMRecord (htsjdk.samtools.SAMRecord)1 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)1 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)1 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)1 SamReader (htsjdk.samtools.SamReader)1 SamReaderFactory (htsjdk.samtools.SamReaderFactory)1 IndexedFastaSequenceFile (htsjdk.samtools.reference.IndexedFastaSequenceFile)1 AbstractFeatureReader (htsjdk.tribble.AbstractFeatureReader)1 TabixReader (htsjdk.tribble.readers.TabixReader)1 VCFFileReader (htsjdk.variant.vcf.VCFFileReader)1 ByteArrayInputStream (java.io.ByteArrayInputStream)1 File (java.io.File)1 FileInputStream (java.io.FileInputStream)1 FileReader (java.io.FileReader)1