use of com.github.lindenb.jvarkit.jexl.JexlPredicate in project jvarkit by lindenb.
the class KnownGenesToBed method scan.
//
private void scan(final BufferedReader r) throws IOException {
boolean hide_introns = false;
boolean hide_utrs = false;
boolean hide_cds = false;
boolean hide_exons = false;
boolean hide_transcripts = false;
boolean hide_non_coding = false;
boolean hide_coding = false;
final Predicate<JexlContext> predicate = StringUtils.isBlank(selectExpr) ? KG -> true : new JexlPredicate(this.selectExpr);
for (String str : CharSplitter.COMMA.split(this.hideStr)) {
if (StringUtils.isBlank(str))
continue;
str = str.trim().toUpperCase();
if (str.equals("INSTRON") || str.equals("INSTRONS"))
hide_introns = true;
if (str.equals("UTR") || str.equals("UTRs"))
hide_utrs = true;
if (str.equals("CDS"))
hide_cds = true;
if (str.equals("EXON") || str.equals("EXONS"))
hide_exons = true;
if (str.equals("TRANSCRIPT") || str.equals("TRANSCRIPTS"))
hide_transcripts = true;
if (str.equals("NON_CODING"))
hide_non_coding = true;
if (str.equals("CODING"))
hide_coding = true;
}
String line;
final CharSplitter tab = CharSplitter.TAB;
while ((line = r.readLine()) != null) {
if (out.checkError())
break;
final String[] tokens = tab.split(line);
final KnownGene kg = new KnownGene(tokens);
if (hide_coding && !kg.isNonCoding())
continue;
if (hide_non_coding && kg.isNonCoding())
continue;
if (!predicate.test(new KgContext(kg)))
continue;
if (!hide_transcripts)
print(kg, kg.getTxStart(), kg.getTxEnd(), "TRANSCRIPT", kg.getName());
for (int i = 0; i < kg.getExonCount(); ++i) {
final KnownGene.Exon exon = kg.getExon(i);
if (!hide_exons)
print(kg, exon.getStart(), exon.getEnd(), "EXON", exon.getName());
if (!hide_utrs && kg.getCdsStart() > exon.getStart()) {
print(kg, exon.getStart(), Math.min(kg.getCdsStart(), exon.getEnd()), "UTR", "UTR" + (kg.isPositiveStrand() ? "5" : "3"));
}
if (!hide_cds && !(kg.getCdsStart() >= exon.getEnd() || kg.getCdsEnd() < exon.getStart())) {
print(kg, Math.max(kg.getCdsStart(), exon.getStart()), Math.min(kg.getCdsEnd(), exon.getEnd()), "CDS", exon.getName());
}
final KnownGene.Intron intron = exon.getNextIntron();
if (!hide_introns && intron != null) {
print(kg, intron.getStart(), intron.getEnd(), "INTRON", intron.getName());
}
if (!hide_utrs && kg.getCdsEnd() < exon.getEnd()) {
print(kg, Math.max(kg.getCdsEnd(), exon.getStart()), exon.getEnd(), "UTR", "UTR" + (kg.isPositiveStrand() ? "3" : "5"));
}
}
}
}
use of com.github.lindenb.jvarkit.jexl.JexlPredicate in project jvarkit by lindenb.
the class VcfUcscGdb method readRemoteResources.
private List<RemoteBigFile> readRemoteResources(final Path path) throws IOException {
final List<RemoteBigFile> remoteBigFiles = new ArrayList<>();
IOUtil.assertFileIsReadable(path);
try (BufferedReader br = IOUtil.openFileForBufferedReading(path)) {
final HashMap<String, String> hash = new HashMap<>();
final Function<String, String> required = (K) -> {
if (!hash.containsKey(K))
throw new RuntimeIOException("Key \"" + K + "\" missing. Found: " + hash.keySet());
final String v = hash.get(K).trim();
if (StringUtils.isBlank(v))
throw new RuntimeIOException("Key \"" + K + "\" is empty");
return v;
};
try (LineIterator iter = new LineIterator(br)) {
for (; ; ) {
final String line = (iter.hasNext() ? iter.next() : null);
if (StringUtils.isBlank(line)) {
if (hash.getOrDefault("enabled", "true").equals("false")) {
hash.clear();
}
if (!hash.isEmpty()) {
final RemoteBigFile bf = new RemoteBigFile();
bf.url = required.apply("url");
if (hash.containsKey("name")) {
bf.name = hash.get("name");
} else {
bf.name = bf.url;
int slah = bf.name.lastIndexOf('/');
bf.name = bf.name.substring(slah + 1);
int dot = bf.name.lastIndexOf('.');
bf.name = bf.name.substring(0, dot).replace('.', '_').replace('-', '_').replace(',', '_');
}
if (remoteBigFiles.stream().anyMatch(R -> R.name.equals(bf.name))) {
bf.close();
throw new RuntimeIOException("Duplicate remote resource: " + hash);
}
if (hash.containsKey("accept")) {
bf.accept = new JexlPredicate(hash.get("accept"));
}
if (hash.containsKey("tostring")) {
bf.converter = new JexlToString(hash.get("tostring"));
}
if (hash.containsKey("desc")) {
bf.description = hash.get("desc");
} else if (hash.containsKey("description")) {
bf.description = hash.get("description");
} else {
bf.description = "Data from " + bf.url;
}
if (hash.containsKey("limit")) {
bf.limit = Integer.parseInt(hash.get("limit"));
}
if (hash.containsKey("fractV")) {
bf.fractionOfVariant = Double.parseDouble(hash.get("fractV"));
}
if (hash.containsKey("fractF")) {
bf.fractionOfVariant = Double.parseDouble(hash.get("fractF"));
}
if (hash.containsKey("aggregate")) {
bf.wigAggregate = hash.get("aggregate");
if (!(bf.wigAggregate.equals("min") || bf.wigAggregate.equals("max"))) {
bf.close();
throw new RuntimeIOException("Bad value for aggregate accepted:(min/max))");
}
}
remoteBigFiles.add(bf);
}
if (line == null)
break;
hash.clear();
continue;
}
if (line.startsWith("#"))
continue;
int sep = line.indexOf(':');
if (sep == -1)
sep = line.indexOf('=');
if (sep == -1)
throw new RuntimeIOException("Cannot find ':' or '=' in " + line);
final String key = line.substring(0, sep).toLowerCase().trim();
if (hash.containsKey(key))
throw new RuntimeIOException("Duplicate key " + key + " in resource: " + hash);
final String value = line.substring(sep + 1).trim();
hash.put(key, value);
}
}
}
return remoteBigFiles;
}
use of com.github.lindenb.jvarkit.jexl.JexlPredicate in project jvarkit by lindenb.
the class VcfUcsc method beforeVcf.
@Override
protected int beforeVcf() {
int max_column_index = 0;
try {
if (StringUtil.isBlank(this.table)) {
LOG.error("Table undefined.");
return -1;
}
if (!StringUtil.isBlank(this.filterIn) && !StringUtil.isBlank(this.filterOut)) {
LOG.error("both filters in/out defined.");
return -1;
}
if (!StringUtil.isBlank(this.acceptExpr)) {
this.acceptRowFunc = new JexlPredicate(this.acceptExpr);
}
if (!StringUtil.isBlank(this.convertToStrExpr)) {
this.toStringFunc = new JexlToString(this.convertToStrExpr);
}
} catch (final Throwable err) {
LOG.error(err);
return -1;
}
try {
LOG.info("Getting jdbc-driver");
Class.forName("com.mysql.jdbc.Driver");
this.connection = DriverManager.getConnection(jdbcuri + "/" + database + "?user=genome&password=");
LOG.info("Getting jdbc-driver: Done.");
final Statement stmt = this.connection.createStatement();
final ResultSet row = stmt.executeQuery("describe " + this.database + "." + this.table);
final Set<String> cols = new HashSet<String>();
while (row.next()) {
final String colName = row.getString("Field");
if (StringUtil.isBlank(colName)) {
LOG.error("empty field in " + this.database + "." + this.table);
return -1;
}
cols.add(colName);
}
row.close();
stmt.close();
this.has_bin_column = cols.contains("bin");
if (max_column_index > cols.size()) {
LOG.error("No column index[" + max_column_index + "] for " + cols + " N=" + cols.size());
return -1;
}
for (final String col : new String[] { "chrom" }) {
if (this.chromColumn == null && cols.contains(col)) {
this.chromColumn = col;
}
}
if (this.chromColumn == null) {
LOG.error("cannot find 'chrom' in the columns of '" + this.database + "." + this.table + "' : " + cols);
return -1;
}
for (final String col : new String[] { "txStart", "cdsStart", "chromStart" }) {
if (this.startColumn == null && cols.contains(col)) {
this.startColumn = col;
}
}
if (this.startColumn == null) {
LOG.error("cannot find startColumn in " + cols);
return -1;
}
for (final String col : new String[] { "txEnd", "cdsEnd", "chromEnd" }) {
if (this.endColumn == null && cols.contains(col)) {
this.endColumn = col;
}
}
if (this.endColumn == null) {
LOG.error("cannot find endColumn in " + cols);
return -1;
}
return 0;
} catch (final Throwable err) {
LOG.error(err);
return -1;
}
}
Aggregations