use of exceptions.InvalidCommandLineException in project ASCIIGenome by dariober.
the class TrackSet method replaceAwkFuncs.
/**
*Replace in awk script the overloaded function name(s) with the actual names and args
* @throws InvalidCommandLineException
*/
private String replaceAwkFuncs(String awkScript, Track track) throws InvalidCommandLineException {
// Function name to be replaced
final String FUNC = "get";
// Search for 'foo FUNC(...) bar'. Use parenthesis to get the group "FUNC(...)".
Pattern pattern = Pattern.compile(".*(" + FUNC + "\\(.+?\\)).*");
while (awkScript.matches(".*\\b" + FUNC + "\\(.*")) {
Matcher m = pattern.matcher(awkScript);
m.matches();
// this looks like "get(DP, 1, 2)"
String func = m.group(1);
int gapStart = m.start(1);
int gapEnd = m.start(1) + func.length();
// Remove func name and brackets, leave only param list. E.g. ["DP", "1", "2"]
String xargs = func.replaceAll(".*\\(", "").replaceAll("\\).*", "");
List<String> args = new ArrayList<String>();
args.addAll(Splitter.on(",").splitToList(xargs));
// Get the first arg i.e., the tag name, possibly in double quotes.
String tag = args.get(0).replaceAll(".*\\(", "").replaceAll("\\).*", "").trim().replaceAll("^\"", "").replaceAll("\"$", "");
// Depending on track type and tag we decide what the replacement function is
String fname;
if (track.getTrackFormat().equals(TrackFormat.BAM)) {
fname = "getSamTag";
} else if (track.getTrackFormat().equals(TrackFormat.VCF)) {
if (tag.trim().startsWith("FMT/") || track.getVcfHeader().getFormatHeaderLine(tag) != null) {
fname = "getFmtTag";
} else if (tag.trim().startsWith("INFO/") || track.getVcfHeader().getInfoHeaderLine(tag) != null) {
fname = "getInfoTag";
} else {
System.err.println("Tag " + tag + " not found in VCF header of track " + track.getTrackTag() + ".\n" + "Please prepend 'INFO/' or 'FMT/' to the tag to search the INFO or FORMAT fields, respectively.");
throw new InvalidCommandLineException();
}
} else if (track.getTrackFormat().equals(TrackFormat.GTF)) {
fname = "getGtfTag";
} else if (track.getTrackFormat().equals(TrackFormat.GFF)) {
fname = "getGffTag";
} else {
System.err.println("Function " + FUNC + " is not available for track type " + track.getTrackFormat() + ".");
throw new InvalidCommandLineException();
}
args.set(0, '"' + tag + '"');
func = fname + "(" + Joiner.on(",").join(args) + ")";
awkScript = awkScript.substring(0, gapStart) + func + awkScript.substring(gapEnd, awkScript.length());
}
return awkScript;
}
use of exceptions.InvalidCommandLineException in project ASCIIGenome by dariober.
the class UcscFetch method executeGenePredToGtf.
private File executeGenePredToGtf(File genePredToGtfExec, String dbNameTableName) throws InvalidCommandLineException, IOException, InterruptedException {
List<String> args = parseCmdArgs(dbNameTableName);
this.db = args.get(0);
this.table = args.get(1);
File outgtf = new File(tmpdir.getAbsolutePath(), this.db + ":" + this.table + ".gtf");
outgtf.deleteOnExit();
ProcessBuilder probuilder = new ProcessBuilder(genePredToGtfExec.getAbsolutePath(), "-utr", db, table, outgtf.getAbsolutePath());
System.err.println("Querying UCSC...");
// System.err.println(Joiner.on(" ").join(probuilder.command()));
Process process = probuilder.start();
process.waitFor();
if (process.exitValue() != 0) {
// System.err.println(Joiner.on(" ").join(probuilder.command()));
throw new InvalidCommandLineException();
}
return outgtf;
}
use of exceptions.InvalidCommandLineException in project ASCIIGenome by dariober.
the class TrackSetTest method canSetAwkForTrackIntervalFeature.
@Test
public void canSetAwkForTrackIntervalFeature() throws InvalidCommandLineException, IOException, InvalidGenomicCoordsException, ClassNotFoundException, InvalidRecordException, SQLException {
TrackSet ts = new TrackSet();
GenomicCoords gc = new GenomicCoords("chr1:1-100", 80, null, null);
Track t1 = new TrackIntervalFeature("test_data/hg19_genes_head.gtf", gc);
ts.addTrack(t1, "x");
Track t2 = new TrackIntervalFeature("test_data/hg19_genes_head.gtf.gz", gc);
ts.addTrack(t2, "x");
Track t3 = new TrackIntervalFeature("test_data/refSeq.bed", gc);
ts.addTrack(t3, "x");
// Set for one track
String cmdInput = "awk '$3 == \"exon\"' #1";
ts.setAwkForTrack(Utils.tokenize(cmdInput, " "));
assertEquals("-F '\\t' '$3 == \"exon\"'", ts.getTrack(t1).getAwk());
// As default
assertEquals("", ts.getTrack(t3).getAwk());
// Use custom delim, some tracks
cmdInput = "awk -F _ '$3 == 10' #1 #3";
ts.setAwkForTrack(Utils.tokenize(cmdInput, " "));
assertEquals("-F _ '$3 == 10'", ts.getTrack(t1).getAwk());
assertEquals("-F _ '$3 == 10'", ts.getTrack(t3).getAwk());
// Use custom delim: All tracks
cmdInput = "awk -v FOO=foo -F _ '$3 == 20'";
ts.setAwkForTrack(Utils.tokenize(cmdInput, " "));
assertEquals("-v FOO=foo -F _ '$3 == 20'", ts.getTrack(t1).getAwk());
assertEquals("-v FOO=foo -F _ '$3 == 20'", ts.getTrack(t3).getAwk());
// Turn off one track
cmdInput = "awk -off #2";
ts.setAwkForTrack(Utils.tokenize(cmdInput, " "));
assertEquals("", ts.getTrack(t2).getAwk());
// Turn off all tracks
cmdInput = "awk";
ts.setAwkForTrack(Utils.tokenize(cmdInput, " "));
assertEquals("", ts.getTrack(t1).getAwk());
assertEquals("", ts.getTrack(t2).getAwk());
assertEquals("", ts.getTrack(t3).getAwk());
// Invalid function
cmdInput = "awk getSamTag()";
boolean pass = false;
try {
ts.setAwkForTrack(Utils.tokenize(cmdInput, " "));
} catch (InvalidCommandLineException e) {
pass = true;
}
assertTrue(pass);
cmdInput = "awk getInfoTag()";
pass = false;
try {
ts.setAwkForTrack(Utils.tokenize(cmdInput, " "));
} catch (InvalidCommandLineException e) {
pass = true;
}
assertTrue(pass);
Track t4 = new TrackWiggles("test_data/ear045.oxBS.actb.tdf", gc, 4);
ts.addTrack(t4, "x");
cmdInput = "awk '1<2'";
ts.setAwkForTrack(Utils.tokenize(cmdInput, " "));
}
Aggregations