use of ca.corefacility.bioinformatics.irida.model.workflow.analysis.ToolExecution in project irida by phac-nml.
the class AnalysisController method getOutputFile.
/**
* Read some lines or text from an {@link AnalysisOutputFile}.
*
* @param id {@link AnalysisSubmission} id
* @param fileId {@link AnalysisOutputFile} id
* @param limit Optional limit to number of lines to read from file
* @param start Optional line to start reading from
* @param end Optional line to stop reading at
* @param seek Optional file byte position to seek to and begin reading
* @param chunk Optional number of bytes to read from file
* @param response HTTP response object
* @return JSON with file text or lines as well as information about the file.
*/
@RequestMapping(value = "/ajax/{id}/outputs/{fileId}", method = RequestMethod.GET)
@ResponseBody
public AnalysisOutputFileInfo getOutputFile(@PathVariable Long id, @PathVariable Long fileId, @RequestParam(defaultValue = "100", required = false) Long limit, @RequestParam(required = false) Long start, @RequestParam(required = false) Long end, @RequestParam(defaultValue = "0", required = false) Long seek, @RequestParam(required = false) Long chunk, HttpServletResponse response) {
AnalysisSubmission submission = analysisSubmissionService.read(id);
Analysis analysis = submission.getAnalysis();
final Optional<AnalysisOutputFile> analysisOutputFile = analysis.getAnalysisOutputFiles().stream().filter(x -> Objects.equals(x.getId(), fileId)).findFirst();
if (analysisOutputFile.isPresent()) {
final AnalysisOutputFile aof = analysisOutputFile.get();
final Path aofFile = aof.getFile();
final ToolExecution tool = aof.getCreatedByTool();
final AnalysisOutputFileInfo contents = new AnalysisOutputFileInfo();
contents.setId(aof.getId());
contents.setAnalysisSubmissionId(submission.getId());
contents.setAnalysisId(analysis.getId());
contents.setFilename(aofFile.getFileName().toString());
contents.setFileExt(FileUtilities.getFileExt(aofFile.getFileName().toString()));
contents.setFileSizeBytes(aof.getFile().toFile().length());
contents.setToolName(tool.getToolName());
contents.setToolVersion(tool.getToolVersion());
try {
final File file = aofFile.toFile();
final RandomAccessFile randomAccessFile = new RandomAccessFile(file, "r");
randomAccessFile.seek(seek);
if (seek == 0) {
if (chunk != null && chunk > 0) {
contents.setText(FileUtilities.readChunk(randomAccessFile, seek, chunk));
contents.setChunk(chunk);
contents.setStartSeek(seek);
} else {
final BufferedReader reader = new BufferedReader(new FileReader(randomAccessFile.getFD()));
final List<String> lines = FileUtilities.readLinesLimit(reader, limit, start, end);
contents.setLines(lines);
contents.setLimit((long) lines.size());
contents.setStart(start);
contents.setEnd(start + lines.size());
}
} else {
if (chunk != null && chunk > 0) {
contents.setText(FileUtilities.readChunk(randomAccessFile, seek, chunk));
contents.setChunk(chunk);
contents.setStartSeek(seek);
} else {
final List<String> lines = FileUtilities.readLinesFromFilePointer(randomAccessFile, limit);
contents.setLines(lines);
contents.setStartSeek(seek);
contents.setStart(start);
contents.setLimit((long) lines.size());
}
}
contents.setFilePointer(randomAccessFile.getFilePointer());
} catch (IOException e) {
logger.error("Could not read output file '" + aof.getId() + "' " + e);
response.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR);
contents.setError("Could not read output file");
}
return contents;
} else {
response.setStatus(HttpServletResponse.SC_NOT_FOUND);
return null;
}
}
use of ca.corefacility.bioinformatics.irida.model.workflow.analysis.ToolExecution in project irida by phac-nml.
the class AnalysisController method getAnalysisOutputFileInfo.
/**
* Get {@link AnalysisOutputFileInfo}.
*
* @param submission {@link AnalysisSubmission} of {@code analysis}
* @param analysis {@link Analysis} to get {@link AnalysisOutputFile}s from
* @param outputName Workflow output name
* @return {@link AnalysisOutputFile} info
*/
private AnalysisOutputFileInfo getAnalysisOutputFileInfo(AnalysisSubmission submission, Analysis analysis, String outputName) {
final ImmutableSet<String> BLACKLIST_FILE_EXT = ImmutableSet.of("zip");
// set of file extensions for indicating whether the first line of the file should be read
final ImmutableSet<String> FILE_EXT_READ_FIRST_LINE = ImmutableSet.of("tsv", "txt", "tabular", "csv", "tab");
final AnalysisOutputFile aof = analysis.getAnalysisOutputFile(outputName);
final Long aofId = aof.getId();
final String aofFilename = aof.getFile().getFileName().toString();
final String fileExt = FileUtilities.getFileExt(aofFilename);
if (BLACKLIST_FILE_EXT.contains(fileExt)) {
return null;
}
final ToolExecution tool = aof.getCreatedByTool();
final String toolName = tool.getToolName();
final String toolVersion = tool.getToolVersion();
final AnalysisOutputFileInfo info = new AnalysisOutputFileInfo();
info.setId(aofId);
info.setAnalysisSubmissionId(submission.getId());
info.setAnalysisId(analysis.getId());
info.setOutputName(outputName);
info.setFilename(aofFilename);
info.setFileSizeBytes(aof.getFile().toFile().length());
info.setToolName(toolName);
info.setToolVersion(toolVersion);
info.setFileExt(fileExt);
if (FILE_EXT_READ_FIRST_LINE.contains(fileExt)) {
addFirstLine(info, aof);
}
return info;
}
use of ca.corefacility.bioinformatics.irida.model.workflow.analysis.ToolExecution in project irida by phac-nml.
the class SNVPhylAnalysisIT method testSNVPhylSuccessHigherSNVReadProportion.
/**
* Tests out successfully executing the SNVPhyl pipeline and passing a higher value for fraction of reads to call a SNP.
*
* @throws Exception
*/
@Test
@WithMockUser(username = "aaron", roles = "ADMIN")
public void testSNVPhylSuccessHigherSNVReadProportion() throws Exception {
SequenceFilePair sequenceFilePairA = databaseSetupGalaxyITService.setupSampleSequenceFileInDatabase(1L, sequenceFilePathsA1List, sequenceFilePathsA2List).get(0);
SequenceFilePair sequenceFilePairB = databaseSetupGalaxyITService.setupSampleSequenceFileInDatabase(2L, sequenceFilePathsB1List, sequenceFilePathsB2List).get(0);
SequenceFilePair sequenceFilePairC = databaseSetupGalaxyITService.setupSampleSequenceFileInDatabase(3L, sequenceFilePathsC1List, sequenceFilePathsC2List).get(0);
waitForFilesToSettle(sequenceFilePairA, sequenceFilePairB, sequenceFilePairC);
Map<String, String> parameters = ImmutableMap.<String, String>builder().put("snv-abundance-ratio", "0.90").put("minimum-read-coverage", "2").put("minimum-percent-coverage", "75").put("minimum-mean-mapping-quality", "20").put("filter-density-threshold", "3").put("filter-density-window-size", "30").build();
AnalysisSubmission submission = databaseSetupGalaxyITService.setupPairSubmissionInDatabase(Sets.newHashSet(sequenceFilePairA, sequenceFilePairB, sequenceFilePairC), referenceFilePath, parameters, snvPhylWorkflow.getWorkflowIdentifier());
completeSubmittedAnalyses(submission.getId());
submission = analysisSubmissionRepository.findOne(submission.getId());
assertEquals("analysis state should be completed.", AnalysisState.COMPLETED, submission.getAnalysisState());
Analysis analysisPhylogenomics = submission.getAnalysis();
assertEquals("Should have generated a phylogenomics pipeline analysis type.", AnalysisType.PHYLOGENOMICS, analysisPhylogenomics.getAnalysisType());
assertEquals("the phylogenomics pipeline should have 8 output files.", 8, analysisPhylogenomics.getAnalysisOutputFiles().size());
@SuppressWarnings("resource") String matrixContent = new Scanner(analysisPhylogenomics.getAnalysisOutputFile(MATRIX_KEY).getFile().toFile()).useDelimiter("\\Z").next();
assertTrue("snpMatrix should be the same but is \"" + matrixContent + "\"", com.google.common.io.Files.equal(outputSnvMatrix2.toFile(), analysisPhylogenomics.getAnalysisOutputFile(MATRIX_KEY).getFile().toFile()));
assertNotNull("file should have tool provenance attached.", analysisPhylogenomics.getAnalysisOutputFile(MATRIX_KEY).getCreatedByTool());
@SuppressWarnings("resource") String snpTableContent = new Scanner(analysisPhylogenomics.getAnalysisOutputFile(TABLE_KEY).getFile().toFile()).useDelimiter("\\Z").next();
assertTrue("snpTable should be the same but is \"" + snpTableContent + "\"", com.google.common.io.Files.equal(outputSnvTable2.toFile(), analysisPhylogenomics.getAnalysisOutputFile(TABLE_KEY).getFile().toFile()));
assertNotNull("file should have tool provenance attached.", analysisPhylogenomics.getAnalysisOutputFile(TABLE_KEY).getCreatedByTool());
@SuppressWarnings("resource") String vcf2coreContent = new Scanner(analysisPhylogenomics.getAnalysisOutputFile(CORE_KEY).getFile().toFile()).useDelimiter("\\Z").next();
assertTrue("vcf2core should be the same but is \"" + vcf2coreContent + "\"", com.google.common.io.Files.equal(vcf2core2.toFile(), analysisPhylogenomics.getAnalysisOutputFile(CORE_KEY).getFile().toFile()));
assertNotNull("file should have tool provenance attached.", analysisPhylogenomics.getAnalysisOutputFile(CORE_KEY).getCreatedByTool());
// only check size of mapping quality file due to samples output in random order
assertTrue("the mapping quality file should not be empty.", Files.size(analysisPhylogenomics.getAnalysisOutputFile(QUALITY_KEY).getFile()) > 0);
@SuppressWarnings("resource") String filterStatsContent = new Scanner(analysisPhylogenomics.getAnalysisOutputFile(STATS_KEY).getFile().toFile()).useDelimiter("\\Z").next();
assertTrue("filterStats should be the same but is \"" + filterStatsContent + "\"", com.google.common.io.Files.equal(filterStats2.toFile(), analysisPhylogenomics.getAnalysisOutputFile(STATS_KEY).getFile().toFile()));
assertNotNull("file should have tool provenance attached.", analysisPhylogenomics.getAnalysisOutputFile(STATS_KEY).getCreatedByTool());
@SuppressWarnings("resource") String snvAlignContent = new Scanner(analysisPhylogenomics.getAnalysisOutputFile(ALIGN_KEY).getFile().toFile()).useDelimiter("\\Z").next();
assertTrue("snvAlign should be the same but is \"" + snvAlignContent + "\"", com.google.common.io.Files.equal(snvAlign2.toFile(), analysisPhylogenomics.getAnalysisOutputFile(ALIGN_KEY).getFile().toFile()));
assertNotNull("file should have tool provenance attached.", analysisPhylogenomics.getAnalysisOutputFile(ALIGN_KEY).getCreatedByTool());
// only test to make sure the files have a valid size since PhyML uses a
// random seed to generate the tree (and so changes results)
assertTrue("the phylogenetic tree file should not be empty.", Files.size(analysisPhylogenomics.getAnalysisOutputFile(TREE_KEY).getFile()) > 0);
assertTrue("the phylogenetic tree stats file should not be empty.", Files.size(analysisPhylogenomics.getAnalysisOutputFile(TREE_KEY).getFile()) > 0);
// try to follow the phylogenomics provenance all the way back to the
// upload tools
List<ToolExecution> toolsToVisit = Lists.newArrayList(analysisPhylogenomics.getAnalysisOutputFile(TREE_KEY).getCreatedByTool());
assertFalse("file should have tool provenance attached.", toolsToVisit.isEmpty());
String minVcf2AlignCov = null;
String altAlleleFraction = null;
String minimumPercentCoverage = null;
String minimumDepthVerify = null;
String filterDensityThreshold = null;
String filterDensityWindowSize = null;
// one where you upload the reads.
while (!toolsToVisit.isEmpty()) {
final ToolExecution ex = toolsToVisit.remove(0);
toolsToVisit.addAll(ex.getPreviousSteps());
if (ex.getToolName().contains("Consolidate VCFs")) {
final Map<String, String> params = ex.getExecutionTimeParameters();
minVcf2AlignCov = params.get("coverage");
altAlleleFraction = params.get("snv_abundance_ratio");
filterDensityThreshold = params.get("use_density_filter.threshold");
filterDensityWindowSize = params.get("use_density_filter.window_size");
break;
}
}
// try to follow the mapping quality provenance all the way back to the
// upload tools
toolsToVisit = Lists.newArrayList(analysisPhylogenomics.getAnalysisOutputFile(QUALITY_KEY).getCreatedByTool());
assertFalse("file should have tool provenance attached.", toolsToVisit.isEmpty());
while (!toolsToVisit.isEmpty()) {
final ToolExecution ex = toolsToVisit.remove(0);
toolsToVisit.addAll(ex.getPreviousSteps());
if (ex.getToolName().contains("Verify Mapping Quality")) {
final Map<String, String> params = ex.getExecutionTimeParameters();
minimumPercentCoverage = params.get("minmap");
minimumDepthVerify = params.get("mindepth");
}
}
assertEquals("incorrect minimum vcf 2 align coverage", "\"2\"", minVcf2AlignCov);
assertEquals("incorrect alternative allele fraction", "\"0.90\"", altAlleleFraction);
assertEquals("incorrect minimum depth for verify map", "\"2\"", minimumDepthVerify);
assertEquals("incorrect min percent coverage for verify map", "\"75\"", minimumPercentCoverage);
assertEquals("incorrect filter density threshold", "3", filterDensityThreshold);
assertEquals("incorrect filter density window size", "30", filterDensityWindowSize);
}
use of ca.corefacility.bioinformatics.irida.model.workflow.analysis.ToolExecution in project irida by phac-nml.
the class AnalysisExecutionServiceGalaxyIT method testTransferAnalysisResultsSuccessPhylogenomicsPairedNoParameters.
/**
* Tests out getting analysis results successfully for phylogenomics
* pipeline (paired test version with no parameters, using defaults).
*
* @throws Exception
*/
@Test
@WithMockUser(username = "aaron", roles = "ADMIN")
public void testTransferAnalysisResultsSuccessPhylogenomicsPairedNoParameters() throws Exception {
String validCoverageFromProvenance = "\"10\"";
String validMidCoverageFromProvenance = "10";
// I verify parameters were set
String validTreeFile = "10 10 10";
// correctly by checking output file
// (where parameters were printed).
AnalysisSubmission analysisSubmission = analysisExecutionGalaxyITService.setupPairSubmissionInDatabase(1L, pairedPaths1, pairedPaths2, referenceFilePath, iridaPhylogenomicsPairedParametersWorkflowId, false);
Future<AnalysisSubmission> analysisSubmittedFuture = analysisExecutionService.prepareSubmission(analysisSubmission);
AnalysisSubmission analysisSubmitted = analysisSubmittedFuture.get();
Future<AnalysisSubmission> analysisExecutionFuture = analysisExecutionService.executeAnalysis(analysisSubmitted);
AnalysisSubmission analysisExecuted = analysisExecutionFuture.get();
analysisExecutionGalaxyITService.waitUntilSubmissionComplete(analysisExecuted);
analysisExecuted.setAnalysisState(AnalysisState.FINISHED_RUNNING);
Future<AnalysisSubmission> analysisSubmissionCompletedFuture = analysisExecutionService.transferAnalysisResults(analysisExecuted);
analysisSubmissionCompletedFuture.get();
AnalysisSubmission analysisSubmissionCompletedDatabase = analysisSubmissionService.read(analysisSubmission.getId());
assertEquals("analysis state is not completed", AnalysisState.COMPLETED, analysisSubmissionCompletedDatabase.getAnalysisState());
Analysis analysisResults = analysisSubmissionCompletedDatabase.getAnalysis();
assertEquals("analysis results is an invalid class", AnalysisType.PHYLOGENOMICS, analysisResults.getAnalysisType());
assertEquals("invalid number of output files", 3, analysisResults.getAnalysisOutputFiles().size());
AnalysisOutputFile phylogeneticTree = analysisResults.getAnalysisOutputFile(TREE_KEY);
AnalysisOutputFile snpMatrix = analysisResults.getAnalysisOutputFile(MATRIX_KEY);
AnalysisOutputFile snpTable = analysisResults.getAnalysisOutputFile(TABLE_KEY);
// verify parameters were set properly by checking contents of file
@SuppressWarnings("resource") String treeContent = new Scanner(phylogeneticTree.getFile().toFile()).useDelimiter("\\Z").next();
assertEquals("phylogenetic trees containing the parameters should be equal", validTreeFile, treeContent);
// phy tree
final ToolExecution phyTreeCoreInputs = phylogeneticTree.getCreatedByTool();
assertEquals("The first tool execution should be by core_pipeline_outputs_paired_with_parameters v0.1.0", "core_pipeline_outputs_paired_with_parameters", phyTreeCoreInputs.getToolName());
assertEquals("The first tool execution should be by core_pipeline_outputs_paired_with_parameters v0.1.0", "0.1.0", phyTreeCoreInputs.getToolVersion());
Map<String, String> phyTreeCoreParameters = phyTreeCoreInputs.getExecutionTimeParameters();
assertEquals("incorrect number of non-file parameters", 4, phyTreeCoreParameters.size());
assertEquals("parameter coverageMin set incorrectly", validCoverageFromProvenance, phyTreeCoreParameters.get("coverageMin"));
assertEquals("parameter coverageMid set incorrectly", validMidCoverageFromProvenance, phyTreeCoreParameters.get("conditional.coverageMid"));
assertEquals("parameter coverageMax set incorrectly", validCoverageFromProvenance, phyTreeCoreParameters.get("coverageMax"));
assertEquals("parameter conditional_select set incorrectly", "all", phyTreeCoreParameters.get("conditional.conditional_select"));
Set<ToolExecution> phyTreeCorePreviousSteps = phyTreeCoreInputs.getPreviousSteps();
assertEquals("there should exist 2 previous steps", 2, phyTreeCorePreviousSteps.size());
Set<String> uploadedFileTypesPhy = Sets.newHashSet();
for (ToolExecution previousStep : phyTreeCorePreviousSteps) {
assertTrue("previous steps should be input tools.", previousStep.isInputTool());
uploadedFileTypesPhy.add(previousStep.getExecutionTimeParameters().get("file_type"));
}
assertEquals("uploaded files should have correct types", Sets.newHashSet("\"fastqsanger\"", "\"fasta\""), uploadedFileTypesPhy);
// snp matrix
final ToolExecution matrixCoreInputs = snpMatrix.getCreatedByTool();
assertEquals("The first tool execution should be by core_pipeline_outputs_paired_with_parameters v0.1.0", "core_pipeline_outputs_paired_with_parameters", matrixCoreInputs.getToolName());
assertEquals("The first tool execution should be by core_pipeline_outputs_paired_with_parameters v0.1.0", "0.1.0", matrixCoreInputs.getToolVersion());
Map<String, String> matrixCoreParameters = matrixCoreInputs.getExecutionTimeParameters();
assertEquals("incorrect number of non-file parameters", 4, matrixCoreParameters.size());
assertEquals("parameter coverageMin set incorrectly", validCoverageFromProvenance, matrixCoreParameters.get("coverageMin"));
assertEquals("parameter coverageMid set incorrectly", validMidCoverageFromProvenance, phyTreeCoreParameters.get("conditional.coverageMid"));
assertEquals("parameter coverageMax set incorrectly", validCoverageFromProvenance, matrixCoreParameters.get("coverageMax"));
assertEquals("parameter conditional_select set incorrectly", "all", phyTreeCoreParameters.get("conditional.conditional_select"));
Set<ToolExecution> matrixCorePreviousSteps = matrixCoreInputs.getPreviousSteps();
assertEquals("there should exist 2 previous steps", 2, matrixCorePreviousSteps.size());
Set<String> uploadedFileTypesMatrix = Sets.newHashSet();
for (ToolExecution previousStep : matrixCorePreviousSteps) {
assertTrue("previous steps should be input tools.", previousStep.isInputTool());
uploadedFileTypesMatrix.add(previousStep.getExecutionTimeParameters().get("file_type"));
}
assertEquals("uploaded files should have correct types", Sets.newHashSet("\"fastqsanger\"", "\"fasta\""), uploadedFileTypesMatrix);
// snp table
final ToolExecution tableCoreInputs = snpTable.getCreatedByTool();
assertEquals("The first tool execution should be by core_pipeline_outputs_paired_with_parameters v0.1.0", "core_pipeline_outputs_paired_with_parameters", tableCoreInputs.getToolName());
assertEquals("The first tool execution should be by core_pipeline_outputs_paired_with_parameters v0.1.0", "0.1.0", tableCoreInputs.getToolVersion());
Map<String, String> tableCoreParameters = tableCoreInputs.getExecutionTimeParameters();
assertEquals("incorrect number of non-file parameters", 4, tableCoreParameters.size());
assertEquals("parameter coverageMin set incorrectly", validCoverageFromProvenance, tableCoreParameters.get("coverageMin"));
assertEquals("parameter coverageMid set incorrectly", validMidCoverageFromProvenance, phyTreeCoreParameters.get("conditional.coverageMid"));
assertEquals("parameter coverageMax set incorrectly", validCoverageFromProvenance, tableCoreParameters.get("coverageMax"));
assertEquals("parameter conditional_select set incorrectly", "all", phyTreeCoreParameters.get("conditional.conditional_select"));
Set<ToolExecution> tablePreviousSteps = tableCoreInputs.getPreviousSteps();
assertEquals("there should exist 2 previous steps", 2, tablePreviousSteps.size());
Set<String> uploadedFileTypesTable = Sets.newHashSet();
for (ToolExecution previousStep : tablePreviousSteps) {
assertTrue("previous steps should be input tools.", previousStep.isInputTool());
uploadedFileTypesTable.add(previousStep.getExecutionTimeParameters().get("file_type"));
}
assertEquals("uploaded files should have correct types", Sets.newHashSet("\"fastqsanger\"", "\"fasta\""), uploadedFileTypesTable);
}
use of ca.corefacility.bioinformatics.irida.model.workflow.analysis.ToolExecution in project irida by phac-nml.
the class AnalysisProvenanceServiceGalaxyTest method testBuildSingleStepToolExecutionStrangeDataStructureDoToString.
@Test
public void testBuildSingleStepToolExecutionStrangeDataStructureDoToString() throws ExecutionManagerException {
final HistoryContents hc = new HistoryContents();
hc.setName(FILENAME);
final HistoryContentsProvenance hcp = new HistoryContentsProvenance();
hcp.setParameters(ImmutableMap.of("akey", "[[\"avalue\"]]"));
final JobDetails jd = new JobDetails();
jd.setCommandLine("");
when(galaxyHistoriesService.showHistoryContents(any(String.class))).thenReturn(Lists.newArrayList(hc));
when(galaxyHistoriesService.showProvenance(any(String.class), any(String.class))).thenReturn(hcp);
when(toolsClient.showTool(any(String.class))).thenReturn(new Tool());
when(jobsClient.showJob(any(String.class))).thenReturn(jd);
final ToolExecution toolExecution = provenanceService.buildToolExecutionForOutputFile(analysisSubmission(), analysisOutputFile());
assertTrue("tool execution should have the specified parameter.", toolExecution.getExecutionTimeParameters().containsKey("akey"));
assertEquals("tool execution parameter should be specified value.", "[[\"avalue\"]]", toolExecution.getExecutionTimeParameters().get("akey"));
}
Aggregations