Search in sources :

Example 1 with JexlPredicate

use of com.github.lindenb.jvarkit.jexl.JexlPredicate in project jvarkit by lindenb.

the class KnownGenesToBed method scan.

// 
private void scan(final BufferedReader r) throws IOException {
    boolean hide_introns = false;
    boolean hide_utrs = false;
    boolean hide_cds = false;
    boolean hide_exons = false;
    boolean hide_transcripts = false;
    boolean hide_non_coding = false;
    boolean hide_coding = false;
    final Predicate<JexlContext> predicate = StringUtils.isBlank(selectExpr) ? KG -> true : new JexlPredicate(this.selectExpr);
    for (String str : CharSplitter.COMMA.split(this.hideStr)) {
        if (StringUtils.isBlank(str))
            continue;
        str = str.trim().toUpperCase();
        if (str.equals("INSTRON") || str.equals("INSTRONS"))
            hide_introns = true;
        if (str.equals("UTR") || str.equals("UTRs"))
            hide_utrs = true;
        if (str.equals("CDS"))
            hide_cds = true;
        if (str.equals("EXON") || str.equals("EXONS"))
            hide_exons = true;
        if (str.equals("TRANSCRIPT") || str.equals("TRANSCRIPTS"))
            hide_transcripts = true;
        if (str.equals("NON_CODING"))
            hide_non_coding = true;
        if (str.equals("CODING"))
            hide_coding = true;
    }
    String line;
    final CharSplitter tab = CharSplitter.TAB;
    while ((line = r.readLine()) != null) {
        if (out.checkError())
            break;
        final String[] tokens = tab.split(line);
        final KnownGene kg = new KnownGene(tokens);
        if (hide_coding && !kg.isNonCoding())
            continue;
        if (hide_non_coding && kg.isNonCoding())
            continue;
        if (!predicate.test(new KgContext(kg)))
            continue;
        if (!hide_transcripts)
            print(kg, kg.getTxStart(), kg.getTxEnd(), "TRANSCRIPT", kg.getName());
        for (int i = 0; i < kg.getExonCount(); ++i) {
            final KnownGene.Exon exon = kg.getExon(i);
            if (!hide_exons)
                print(kg, exon.getStart(), exon.getEnd(), "EXON", exon.getName());
            if (!hide_utrs && kg.getCdsStart() > exon.getStart()) {
                print(kg, exon.getStart(), Math.min(kg.getCdsStart(), exon.getEnd()), "UTR", "UTR" + (kg.isPositiveStrand() ? "5" : "3"));
            }
            if (!hide_cds && !(kg.getCdsStart() >= exon.getEnd() || kg.getCdsEnd() < exon.getStart())) {
                print(kg, Math.max(kg.getCdsStart(), exon.getStart()), Math.min(kg.getCdsEnd(), exon.getEnd()), "CDS", exon.getName());
            }
            final KnownGene.Intron intron = exon.getNextIntron();
            if (!hide_introns && intron != null) {
                print(kg, intron.getStart(), intron.getEnd(), "INTRON", intron.getName());
            }
            if (!hide_utrs && kg.getCdsEnd() < exon.getEnd()) {
                print(kg, Math.max(kg.getCdsEnd(), exon.getStart()), exon.getEnd(), "UTR", "UTR" + (kg.isPositiveStrand() ? "3" : "5"));
            }
        }
    }
}
Also used : CharSplitter(com.github.lindenb.jvarkit.lang.CharSplitter) JexlPredicate(com.github.lindenb.jvarkit.jexl.JexlPredicate) JexlContext(org.apache.commons.jexl2.JexlContext) KnownGene(com.github.lindenb.jvarkit.util.ucsc.KnownGene)

Example 2 with JexlPredicate

use of com.github.lindenb.jvarkit.jexl.JexlPredicate in project jvarkit by lindenb.

the class VcfUcscGdb method readRemoteResources.

private List<RemoteBigFile> readRemoteResources(final Path path) throws IOException {
    final List<RemoteBigFile> remoteBigFiles = new ArrayList<>();
    IOUtil.assertFileIsReadable(path);
    try (BufferedReader br = IOUtil.openFileForBufferedReading(path)) {
        final HashMap<String, String> hash = new HashMap<>();
        final Function<String, String> required = (K) -> {
            if (!hash.containsKey(K))
                throw new RuntimeIOException("Key \"" + K + "\" missing. Found: " + hash.keySet());
            final String v = hash.get(K).trim();
            if (StringUtils.isBlank(v))
                throw new RuntimeIOException("Key \"" + K + "\" is empty");
            return v;
        };
        try (LineIterator iter = new LineIterator(br)) {
            for (; ; ) {
                final String line = (iter.hasNext() ? iter.next() : null);
                if (StringUtils.isBlank(line)) {
                    if (hash.getOrDefault("enabled", "true").equals("false")) {
                        hash.clear();
                    }
                    if (!hash.isEmpty()) {
                        final RemoteBigFile bf = new RemoteBigFile();
                        bf.url = required.apply("url");
                        if (hash.containsKey("name")) {
                            bf.name = hash.get("name");
                        } else {
                            bf.name = bf.url;
                            int slah = bf.name.lastIndexOf('/');
                            bf.name = bf.name.substring(slah + 1);
                            int dot = bf.name.lastIndexOf('.');
                            bf.name = bf.name.substring(0, dot).replace('.', '_').replace('-', '_').replace(',', '_');
                        }
                        if (remoteBigFiles.stream().anyMatch(R -> R.name.equals(bf.name))) {
                            bf.close();
                            throw new RuntimeIOException("Duplicate remote resource: " + hash);
                        }
                        if (hash.containsKey("accept")) {
                            bf.accept = new JexlPredicate(hash.get("accept"));
                        }
                        if (hash.containsKey("tostring")) {
                            bf.converter = new JexlToString(hash.get("tostring"));
                        }
                        if (hash.containsKey("desc")) {
                            bf.description = hash.get("desc");
                        } else if (hash.containsKey("description")) {
                            bf.description = hash.get("description");
                        } else {
                            bf.description = "Data from " + bf.url;
                        }
                        if (hash.containsKey("limit")) {
                            bf.limit = Integer.parseInt(hash.get("limit"));
                        }
                        if (hash.containsKey("fractV")) {
                            bf.fractionOfVariant = Double.parseDouble(hash.get("fractV"));
                        }
                        if (hash.containsKey("fractF")) {
                            bf.fractionOfVariant = Double.parseDouble(hash.get("fractF"));
                        }
                        if (hash.containsKey("aggregate")) {
                            bf.wigAggregate = hash.get("aggregate");
                            if (!(bf.wigAggregate.equals("min") || bf.wigAggregate.equals("max"))) {
                                bf.close();
                                throw new RuntimeIOException("Bad value for aggregate accepted:(min/max))");
                            }
                        }
                        remoteBigFiles.add(bf);
                    }
                    if (line == null)
                        break;
                    hash.clear();
                    continue;
                }
                if (line.startsWith("#"))
                    continue;
                int sep = line.indexOf(':');
                if (sep == -1)
                    sep = line.indexOf('=');
                if (sep == -1)
                    throw new RuntimeIOException("Cannot find ':' or '=' in  " + line);
                final String key = line.substring(0, sep).toLowerCase().trim();
                if (hash.containsKey(key))
                    throw new RuntimeIOException("Duplicate key " + key + " in resource: " + hash);
                final String value = line.substring(sep + 1).trim();
                hash.put(key, value);
            }
        }
    }
    return remoteBigFiles;
}
Also used : WritingVariantsDelegate(com.github.lindenb.jvarkit.variant.variantcontext.writer.WritingVariantsDelegate) Arrays(java.util.Arrays) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Program(com.github.lindenb.jvarkit.util.jcommander.Program) VCFIterator(htsjdk.variant.vcf.VCFIterator) Parameter(com.beust.jcommander.Parameter) IOUtil(htsjdk.samtools.util.IOUtil) VCFHeader(htsjdk.variant.vcf.VCFHeader) AbstractList(java.util.AbstractList) HashMap(java.util.HashMap) Function(java.util.function.Function) ParametersDelegate(com.beust.jcommander.ParametersDelegate) ArrayList(java.util.ArrayList) Interval(htsjdk.samtools.util.Interval) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) Path(java.nio.file.Path) LinkedHashSet(java.util.LinkedHashSet) CloserUtil(htsjdk.samtools.util.CloserUtil) Locatable(htsjdk.samtools.util.Locatable) VCFHeaderLineType(htsjdk.variant.vcf.VCFHeaderLineType) Predicate(java.util.function.Predicate) Logger(com.github.lindenb.jvarkit.util.log.Logger) LineIterator(com.github.lindenb.jvarkit.util.iterator.LineIterator) Set(java.util.Set) IOException(java.io.IOException) BedFeature(org.broad.igv.bbfile.BedFeature) JexlToString(com.github.lindenb.jvarkit.jexl.JexlToString) List(java.util.List) BigBedIterator(org.broad.igv.bbfile.BigBedIterator) WigItem(org.broad.igv.bbfile.WigItem) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) JexlContext(org.apache.commons.jexl2.JexlContext) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) Closeable(java.io.Closeable) CoordMath(htsjdk.samtools.util.CoordMath) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) VariantContext(htsjdk.variant.variantcontext.VariantContext) BufferedReader(java.io.BufferedReader) BBFileReader(org.broad.igv.bbfile.BBFileReader) JexlPredicate(com.github.lindenb.jvarkit.jexl.JexlPredicate) VCFHeaderLineCount(htsjdk.variant.vcf.VCFHeaderLineCount) BigWigIterator(org.broad.igv.bbfile.BigWigIterator) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) JexlToString(com.github.lindenb.jvarkit.jexl.JexlToString) JexlToString(com.github.lindenb.jvarkit.jexl.JexlToString) LineIterator(com.github.lindenb.jvarkit.util.iterator.LineIterator) JexlPredicate(com.github.lindenb.jvarkit.jexl.JexlPredicate) BufferedReader(java.io.BufferedReader)

Example 3 with JexlPredicate

use of com.github.lindenb.jvarkit.jexl.JexlPredicate in project jvarkit by lindenb.

the class VcfUcsc method beforeVcf.

@Override
protected int beforeVcf() {
    int max_column_index = 0;
    try {
        if (StringUtil.isBlank(this.table)) {
            LOG.error("Table undefined.");
            return -1;
        }
        if (!StringUtil.isBlank(this.filterIn) && !StringUtil.isBlank(this.filterOut)) {
            LOG.error("both filters in/out defined.");
            return -1;
        }
        if (!StringUtil.isBlank(this.acceptExpr)) {
            this.acceptRowFunc = new JexlPredicate(this.acceptExpr);
        }
        if (!StringUtil.isBlank(this.convertToStrExpr)) {
            this.toStringFunc = new JexlToString(this.convertToStrExpr);
        }
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    }
    try {
        LOG.info("Getting jdbc-driver");
        Class.forName("com.mysql.jdbc.Driver");
        this.connection = DriverManager.getConnection(jdbcuri + "/" + database + "?user=genome&password=");
        LOG.info("Getting jdbc-driver: Done.");
        final Statement stmt = this.connection.createStatement();
        final ResultSet row = stmt.executeQuery("describe " + this.database + "." + this.table);
        final Set<String> cols = new HashSet<String>();
        while (row.next()) {
            final String colName = row.getString("Field");
            if (StringUtil.isBlank(colName)) {
                LOG.error("empty field in " + this.database + "." + this.table);
                return -1;
            }
            cols.add(colName);
        }
        row.close();
        stmt.close();
        this.has_bin_column = cols.contains("bin");
        if (max_column_index > cols.size()) {
            LOG.error("No column index[" + max_column_index + "] for " + cols + " N=" + cols.size());
            return -1;
        }
        for (final String col : new String[] { "chrom" }) {
            if (this.chromColumn == null && cols.contains(col)) {
                this.chromColumn = col;
            }
        }
        if (this.chromColumn == null) {
            LOG.error("cannot find 'chrom' in the columns of '" + this.database + "." + this.table + "' : " + cols);
            return -1;
        }
        for (final String col : new String[] { "txStart", "cdsStart", "chromStart" }) {
            if (this.startColumn == null && cols.contains(col)) {
                this.startColumn = col;
            }
        }
        if (this.startColumn == null) {
            LOG.error("cannot find startColumn in " + cols);
            return -1;
        }
        for (final String col : new String[] { "txEnd", "cdsEnd", "chromEnd" }) {
            if (this.endColumn == null && cols.contains(col)) {
                this.endColumn = col;
            }
        }
        if (this.endColumn == null) {
            LOG.error("cannot find endColumn in " + cols);
            return -1;
        }
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    }
}
Also used : JexlPredicate(com.github.lindenb.jvarkit.jexl.JexlPredicate) PreparedStatement(java.sql.PreparedStatement) Statement(java.sql.Statement) ResultSet(java.sql.ResultSet) JexlToString(com.github.lindenb.jvarkit.jexl.JexlToString) JexlToString(com.github.lindenb.jvarkit.jexl.JexlToString) HashSet(java.util.HashSet)

Aggregations

JexlPredicate (com.github.lindenb.jvarkit.jexl.JexlPredicate)3 JexlToString (com.github.lindenb.jvarkit.jexl.JexlToString)2 JexlContext (org.apache.commons.jexl2.JexlContext)2 Parameter (com.beust.jcommander.Parameter)1 ParametersDelegate (com.beust.jcommander.ParametersDelegate)1 CharSplitter (com.github.lindenb.jvarkit.lang.CharSplitter)1 StringUtils (com.github.lindenb.jvarkit.lang.StringUtils)1 ContigNameConverter (com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter)1 LineIterator (com.github.lindenb.jvarkit.util.iterator.LineIterator)1 Launcher (com.github.lindenb.jvarkit.util.jcommander.Launcher)1 Program (com.github.lindenb.jvarkit.util.jcommander.Program)1 Logger (com.github.lindenb.jvarkit.util.log.Logger)1 KnownGene (com.github.lindenb.jvarkit.util.ucsc.KnownGene)1 WritingVariantsDelegate (com.github.lindenb.jvarkit.variant.variantcontext.writer.WritingVariantsDelegate)1 CloserUtil (htsjdk.samtools.util.CloserUtil)1 CoordMath (htsjdk.samtools.util.CoordMath)1 IOUtil (htsjdk.samtools.util.IOUtil)1 Interval (htsjdk.samtools.util.Interval)1 Locatable (htsjdk.samtools.util.Locatable)1 RuntimeIOException (htsjdk.samtools.util.RuntimeIOException)1