use of ca.corefacility.bioinformatics.irida.model.sequenceFile.SequenceFilePair in project irida by phac-nml.
the class SNVPhylAnalysisIT method testSNVPhylSuccessRemoveSNVDensity.
/**
* Tests out successfully executing the SNVPhyl pipeline and passing a lower value for SNV density threshold to filter out SNVs.
*
* @throws Exception
*/
@Test
@WithMockUser(username = "aaron", roles = "ADMIN")
public void testSNVPhylSuccessRemoveSNVDensity() throws Exception {
SequenceFilePair sequenceFilePairA = databaseSetupGalaxyITService.setupSampleSequenceFileInDatabase(1L, sequenceFilePathsA1List, sequenceFilePathsA2List).get(0);
SequenceFilePair sequenceFilePairB = databaseSetupGalaxyITService.setupSampleSequenceFileInDatabase(2L, sequenceFilePathsB1List, sequenceFilePathsB2List).get(0);
SequenceFilePair sequenceFilePairC = databaseSetupGalaxyITService.setupSampleSequenceFileInDatabase(3L, sequenceFilePathsC1List, sequenceFilePathsC2List).get(0);
Map<String, String> parameters = ImmutableMap.of("snv-abundance-ratio", "0.75", "minimum-read-coverage", "2", "filter-density-threshold", "2", "filter-density-window-size", "4");
AnalysisSubmission submission = databaseSetupGalaxyITService.setupPairSubmissionInDatabase(Sets.newHashSet(sequenceFilePairA, sequenceFilePairB, sequenceFilePairC), referenceFilePath, parameters, snvPhylWorkflow.getWorkflowIdentifier());
completeSubmittedAnalyses(submission.getId());
submission = analysisSubmissionRepository.findOne(submission.getId());
assertEquals("analysis state should be completed.", AnalysisState.COMPLETED, submission.getAnalysisState());
Analysis analysisPhylogenomics = submission.getAnalysis();
assertEquals("Should have generated a phylogenomics pipeline analysis type.", AnalysisType.PHYLOGENOMICS, analysisPhylogenomics.getAnalysisType());
assertEquals("the phylogenomics pipeline should have 8 output files.", 8, analysisPhylogenomics.getAnalysisOutputFiles().size());
@SuppressWarnings("resource") String matrixContent = new Scanner(analysisPhylogenomics.getAnalysisOutputFile(MATRIX_KEY).getFile().toFile()).useDelimiter("\\Z").next();
assertTrue("snpMatrix should be the same but is \"" + matrixContent + "\"", com.google.common.io.Files.equal(outputSnvMatrix3.toFile(), analysisPhylogenomics.getAnalysisOutputFile(MATRIX_KEY).getFile().toFile()));
assertNotNull("file should have tool provenance attached.", analysisPhylogenomics.getAnalysisOutputFile(MATRIX_KEY).getCreatedByTool());
@SuppressWarnings("resource") String snpTableContent = new Scanner(analysisPhylogenomics.getAnalysisOutputFile(TABLE_KEY).getFile().toFile()).useDelimiter("\\Z").next();
assertTrue("snpTable should be the same but is \"" + snpTableContent + "\"", com.google.common.io.Files.equal(outputSnvTable3.toFile(), analysisPhylogenomics.getAnalysisOutputFile(TABLE_KEY).getFile().toFile()));
assertNotNull("file should have tool provenance attached.", analysisPhylogenomics.getAnalysisOutputFile(TABLE_KEY).getCreatedByTool());
@SuppressWarnings("resource") String vcf2coreContent = new Scanner(analysisPhylogenomics.getAnalysisOutputFile(CORE_KEY).getFile().toFile()).useDelimiter("\\Z").next();
assertTrue("vcf2core should be the same but is \"" + vcf2coreContent + "\"", com.google.common.io.Files.equal(vcf2core3.toFile(), analysisPhylogenomics.getAnalysisOutputFile(CORE_KEY).getFile().toFile()));
assertNotNull("file should have tool provenance attached.", analysisPhylogenomics.getAnalysisOutputFile(CORE_KEY).getCreatedByTool());
// only check size of mapping quality file due to samples output in random order
assertTrue("the mapping quality file should not be empty.", Files.size(analysisPhylogenomics.getAnalysisOutputFile(QUALITY_KEY).getFile()) > 0);
@SuppressWarnings("resource") String filterStatsContent = new Scanner(analysisPhylogenomics.getAnalysisOutputFile(STATS_KEY).getFile().toFile()).useDelimiter("\\Z").next();
assertTrue("filterStats should be the same but is \"" + filterStatsContent + "\"", com.google.common.io.Files.equal(filterStats3.toFile(), analysisPhylogenomics.getAnalysisOutputFile(STATS_KEY).getFile().toFile()));
assertNotNull("file should have tool provenance attached.", analysisPhylogenomics.getAnalysisOutputFile(STATS_KEY).getCreatedByTool());
@SuppressWarnings("resource") String snvAlignContent = new Scanner(analysisPhylogenomics.getAnalysisOutputFile(ALIGN_KEY).getFile().toFile()).useDelimiter("\\Z").next();
assertTrue("snvAlign should be the same but is \"" + snvAlignContent + "\"", com.google.common.io.Files.equal(snvAlign3.toFile(), analysisPhylogenomics.getAnalysisOutputFile(ALIGN_KEY).getFile().toFile()));
assertNotNull("file should have tool provenance attached.", analysisPhylogenomics.getAnalysisOutputFile(ALIGN_KEY).getCreatedByTool());
// only test to make sure the files have a valid size since PhyML uses a
// random seed to generate the tree (and so changes results)
assertTrue("the phylogenetic tree file should not be empty.", Files.size(analysisPhylogenomics.getAnalysisOutputFile(TREE_KEY).getFile()) > 0);
assertTrue("the phylogenetic tree stats file should not be empty.", Files.size(analysisPhylogenomics.getAnalysisOutputFile(TREE_KEY).getFile()) > 0);
// try to follow the phylogenomics provenance all the way back to the
// upload tools
List<ToolExecution> toolsToVisit = Lists.newArrayList(analysisPhylogenomics.getAnalysisOutputFile(TREE_KEY).getCreatedByTool());
assertFalse("file should have tool provenance attached.", toolsToVisit.isEmpty());
String minVcf2AlignCov = null;
String altAlleleFraction = null;
String minimumPercentCoverage = null;
String minimumDepthVerify = null;
String filterDensityThreshold = null;
String filterDensityWindowSize = null;
// one where you upload the reads.
while (!toolsToVisit.isEmpty()) {
final ToolExecution ex = toolsToVisit.remove(0);
toolsToVisit.addAll(ex.getPreviousSteps());
if (ex.getToolName().contains("Consolidate VCFs")) {
final Map<String, String> params = ex.getExecutionTimeParameters();
minVcf2AlignCov = params.get("coverage");
altAlleleFraction = params.get("snv_abundance_ratio");
filterDensityThreshold = params.get("use_density_filter.threshold");
filterDensityWindowSize = params.get("use_density_filter.window_size");
break;
}
}
// try to follow the mapping quality provenance all the way back to the
// upload tools
toolsToVisit = Lists.newArrayList(analysisPhylogenomics.getAnalysisOutputFile(QUALITY_KEY).getCreatedByTool());
assertFalse("file should have tool provenance attached.", toolsToVisit.isEmpty());
while (!toolsToVisit.isEmpty()) {
final ToolExecution ex = toolsToVisit.remove(0);
toolsToVisit.addAll(ex.getPreviousSteps());
if (ex.getToolName().contains("Verify Mapping Quality")) {
final Map<String, String> params = ex.getExecutionTimeParameters();
minimumPercentCoverage = params.get("minmap");
minimumDepthVerify = params.get("mindepth");
}
}
assertEquals("incorrect minimum vcf 2 align coverage", "\"2\"", minVcf2AlignCov);
assertEquals("incorrect alternative allele fraction", "\"0.75\"", altAlleleFraction);
assertEquals("incorrect minimum depth for verify map", "\"2\"", minimumDepthVerify);
assertEquals("incorrect min percent coverage for verify map", "\"80\"", minimumPercentCoverage);
assertEquals("incorrect filter density threshold", "2", filterDensityThreshold);
assertEquals("incorrect filter density window size", "4", filterDensityWindowSize);
}
use of ca.corefacility.bioinformatics.irida.model.sequenceFile.SequenceFilePair in project irida by phac-nml.
the class RESTAnalysisSubmissionController method getAnalysisInputFilePairs.
/**
* Get the {@link SequenceFilePair}s used for the {@link AnalysisSubmission}
*
* @param identifier
* {@link AnalysisSubmission} id
* @return list of {@link SequenceFilePair}s
*/
@RequestMapping("/{identifier}/sequenceFiles/pairs")
public ModelMap getAnalysisInputFilePairs(@PathVariable Long identifier) {
ModelMap map = new ModelMap();
AnalysisSubmission analysisSubmission = analysisSubmissionService.read(identifier);
Set<SequenceFilePair> pairs = sequencingObjectService.getSequencingObjectsOfTypeForAnalysisSubmission(analysisSubmission, SequenceFilePair.class);
ResourceCollection<SequenceFilePair> resources = new ResourceCollection<>(pairs.size());
for (SequenceFilePair pair : pairs) {
SampleSequencingObjectJoin join = sampleService.getSampleForSequencingObject(pair);
if (join != null) {
Long sampleId = join.getSubject().getId();
pair = RESTSampleSequenceFilesController.addSequencingObjectLinks(pair, sampleId);
resources.add(pair);
}
}
resources.add(linkTo(methodOn(RESTAnalysisSubmissionController.class).getAnalysisInputFilePairs(identifier)).withSelfRel());
map.addAttribute(RESTGenericController.RESOURCE_NAME, resources);
return map;
}
use of ca.corefacility.bioinformatics.irida.model.sequenceFile.SequenceFilePair in project irida by phac-nml.
the class AnalysisCollectionServiceGalaxyIT method testUploadSequenceFilesPairedSuccess.
/**
* Tests successfully uploading a paired-end sequence file to Galaxy and
* constructing a collection.
*
* @throws ExecutionManagerException
*/
@Test
@WithMockUser(username = "aaron", roles = "ADMIN")
public void testUploadSequenceFilesPairedSuccess() throws ExecutionManagerException {
History history = new History();
history.setName("testUploadSequenceFilesPaired");
HistoriesClient historiesClient = localGalaxy.getGalaxyInstanceAdmin().getHistoriesClient();
LibrariesClient librariesClient = localGalaxy.getGalaxyInstanceAdmin().getLibrariesClient();
History createdHistory = historiesClient.create(history);
Library library = new Library();
library.setName("testUploadSequenceFilesPaired");
Library createdLibrary = librariesClient.createLibrary(library);
Set<SequenceFilePair> sequenceFiles = Sets.newHashSet(databaseSetupGalaxyITService.setupSampleSequenceFileInDatabase(1L, pairSequenceFiles1A, pairSequenceFiles2A));
Map<Sample, IridaSequenceFilePair> sampleSequenceFilePairs = new HashMap<>(sequencingObjectService.getUniqueSamplesForSequencingObjects(sequenceFiles));
Sample sample1 = sampleRepository.findOne(1L);
CollectionResponse collectionResponse = analysisCollectionServiceGalaxy.uploadSequenceFilesPaired(sampleSequenceFilePairs, createdHistory, createdLibrary);
// verify correct files have been uploaded
List<HistoryContents> historyContents = historiesClient.showHistoryContents(createdHistory.getId());
assertEquals("history does not have correct number of files", 3, historyContents.size());
Map<String, HistoryContents> contentsMap = historyContentsAsMap(historyContents);
assertTrue("the history should have a sequence file with name " + sequenceFilePathA.toFile().getName(), contentsMap.containsKey(sequenceFilePathA.toFile().getName()));
assertTrue("the history should have a file with name " + sequenceFilePath2A.toFile().getName(), contentsMap.containsKey(sequenceFilePath2A.toFile().getName()));
assertTrue("the history should have a dataset collection with name " + INPUTS_PAIRED_NAME, contentsMap.containsKey(INPUTS_PAIRED_NAME));
// verify correct collection has been created
assertEquals("invalid type of dataset collection created", DatasetCollectionType.LIST_PAIRED.toString(), collectionResponse.getCollectionType());
List<CollectionElementResponse> collectionElements = collectionResponse.getElements();
assertEquals("invalid number of elements in the dataset collection", 1, collectionElements.size());
Map<String, CollectionElementResponse> collectionElementsMap = collectionElementsAsMap(collectionElements);
assertTrue("the dataset collection element should have name " + sample1.getSampleName(), collectionElementsMap.containsKey(sample1.getSampleName()));
CollectionElementResponse sample1Response = collectionElementsMap.get(sample1.getSampleName());
// verify collection has 2 files (paired end data)
ElementResponse subElements = sample1Response.getResponseElement();
assertEquals("invalid class for sub-element in dataset collection", CollectionResponse.class, subElements.getClass());
CollectionResponse subElementsCollection = (CollectionResponse) subElements;
assertEquals("invalid type for sub-element in dataset collection", DatasetCollectionType.PAIRED.toString(), subElementsCollection.getCollectionType());
List<CollectionElementResponse> subCollectionElements = subElementsCollection.getElements();
assertEquals("invalid number of files for paired dataset collection element", 2, subCollectionElements.size());
Map<String, CollectionElementResponse> subCollectionElementsMap = collectionElementsAsMap(subCollectionElements);
assertTrue("dataset collection should have a sub-element with name " + FORWARD_NAME, subCollectionElementsMap.containsKey(FORWARD_NAME));
assertTrue("dataset collection should have a sub-element with name " + REVERSE_NAME, subCollectionElementsMap.containsKey(REVERSE_NAME));
// verify paired-end files are correct type in collection
CollectionElementResponse sequenceFile1 = subCollectionElementsMap.get(FORWARD_NAME);
CollectionElementResponse sequenceFile2 = subCollectionElementsMap.get(REVERSE_NAME);
assertEquals("the " + FORWARD_NAME + " sub-element should be a history dataset", HISTORY_DATASET_NAME, sequenceFile1.getElementType());
assertEquals("the " + REVERSE_NAME + " sub-element should be a history dataset", HISTORY_DATASET_NAME, sequenceFile2.getElementType());
// verify paired-end files are in correct order in collection
ElementResponse sequenceFile1Response = sequenceFile1.getResponseElement();
assertEquals("the " + FORWARD_NAME + " element is not of the correct type", Dataset.class, sequenceFile1Response.getClass());
ElementResponse sequenceFile2Response = sequenceFile2.getResponseElement();
assertEquals("the " + REVERSE_NAME + " element is not of the correct type", Dataset.class, sequenceFile2Response.getClass());
Dataset sequenceFile1Dataset = (Dataset) sequenceFile1Response;
assertEquals("forward file in Galaxy is named incorrectly", sequenceFilePathA.getFileName().toString(), sequenceFile1Dataset.getName());
Dataset sequenceFile2Dataset = (Dataset) sequenceFile2Response;
assertEquals("reverse file in Galaxy is named incorrectly", sequenceFilePath2A.getFileName().toString(), sequenceFile2Dataset.getName());
}
use of ca.corefacility.bioinformatics.irida.model.sequenceFile.SequenceFilePair in project irida by phac-nml.
the class AnalysisWorkspaceServiceGalaxyIT method testGetAnalysisResultsTestAnalysisSinglePairedSuccess.
/**
* Tests out successfully getting results for an analysis (TestAnalysis)
* consisting of both single and paired sequence reads.
*
* @throws InterruptedException
* @throws ExecutionManagerException
* @throws IridaWorkflowNotFoundException
* @throws IOException
* @throws IridaWorkflowAnalysisTypeException
* @throws TimeoutException
*/
@Test
@WithMockUser(username = "aaron", roles = "ADMIN")
public void testGetAnalysisResultsTestAnalysisSinglePairedSuccess() throws InterruptedException, ExecutionManagerException, IridaWorkflowNotFoundException, IOException, IridaWorkflowAnalysisTypeException, TimeoutException {
History history = new History();
history.setName("testGetAnalysisResultsTestAnalysisSinglePairedSuccess");
HistoriesClient historiesClient = localGalaxy.getGalaxyInstanceAdmin().getHistoriesClient();
WorkflowsClient workflowsClient = localGalaxy.getGalaxyInstanceAdmin().getWorkflowsClient();
ToolsClient toolsClient = localGalaxy.getGalaxyInstanceAdmin().getToolsClient();
History createdHistory = historiesClient.create(history);
// upload test outputs
uploadFileToHistory(sequenceFilePathA, OUTPUT1_NAME, createdHistory.getId(), toolsClient);
uploadFileToHistory(sequenceFilePathA, OUTPUT2_NAME, createdHistory.getId(), toolsClient);
// wait for history
Util.waitUntilHistoryComplete(createdHistory.getId(), galaxyHistoriesService, 60);
IridaWorkflow iridaWorkflow = iridaWorkflowsService.getIridaWorkflow(validWorkflowIdSinglePaired);
Path workflowPath = iridaWorkflow.getWorkflowStructure().getWorkflowFile();
String workflowString = new String(Files.readAllBytes(workflowPath), StandardCharsets.UTF_8);
Workflow galaxyWorkflow = workflowsClient.importWorkflow(workflowString);
List<Path> paths1 = new ArrayList<>();
paths1.add(sequenceFilePathA);
List<Path> paths2 = new ArrayList<>();
paths2.add(sequenceFilePath2A);
AnalysisSubmission analysisSubmission = analysisExecutionGalaxyITService.setupSinglePairSubmissionInDatabaseSameSample(1L, paths1, paths2, sequenceFilePath3, referenceFilePath, validWorkflowIdSinglePaired);
Set<SingleEndSequenceFile> singleFiles = sequencingObjectService.getSequencingObjectsOfTypeForAnalysisSubmission(analysisSubmission, SingleEndSequenceFile.class);
Set<SequenceFilePair> pairedFiles = sequencingObjectService.getSequencingObjectsOfTypeForAnalysisSubmission(analysisSubmission, SequenceFilePair.class);
assertEquals("invalid number of single end input files", 1, singleFiles.size());
assertEquals("invalid number of paired end inputs", 1, pairedFiles.size());
SequenceFilePair submittedSp = pairedFiles.iterator().next();
Set<SequenceFile> submittedSf = submittedSp.getFiles();
assertEquals("invalid number of files for paired input", 2, submittedSf.size());
analysisSubmission.setRemoteAnalysisId(createdHistory.getId());
analysisSubmission.setRemoteWorkflowId(galaxyWorkflow.getId());
analysisSubmission.setAnalysisState(AnalysisState.COMPLETING);
analysisSubmissionRepository.save(analysisSubmission);
Analysis analysis = analysisWorkspaceService.getAnalysisResults(analysisSubmission);
assertNotNull("the analysis results were not properly created", analysis);
assertEquals("the Analysis results class is invalid", Analysis.class, analysis.getClass());
assertEquals("the analysis results has an invalid number of output files", 2, analysis.getAnalysisOutputFiles().size());
assertEquals("the analysis results output file has an invalid name", Paths.get(OUTPUT1_NAME), analysis.getAnalysisOutputFile(OUTPUT1_KEY).getFile().getFileName());
assertEquals("the analysis results output file has an invalid label", OUTPUT1_NAME, analysis.getAnalysisOutputFile(OUTPUT1_KEY).getLabel());
assertEquals("the analysis results output file has an invalid name", Paths.get(OUTPUT2_NAME), analysis.getAnalysisOutputFile(OUTPUT2_KEY).getFile().getFileName());
assertEquals("the analysis results output file has an invalid label", OUTPUT2_NAME, analysis.getAnalysisOutputFile(OUTPUT2_KEY).getLabel());
}
use of ca.corefacility.bioinformatics.irida.model.sequenceFile.SequenceFilePair in project irida by phac-nml.
the class AnalysisWorkspaceServiceGalaxyTest method testGetAnalysisResultsSuccessPairedEnd.
/**
* Tests successfully getting analysis results from Galaxy with paired end
* input files.
*
* @throws IridaWorkflowNotFoundException
* @throws IOException
* @throws ExecutionManagerException
* @throws IridaWorkflowAnalysisTypeException
*/
@Test
public void testGetAnalysisResultsSuccessPairedEnd() throws IridaWorkflowNotFoundException, IridaWorkflowAnalysisTypeException, ExecutionManagerException, IOException {
Set<SequenceFilePair> pairedFiles = Sets.newHashSet(sampleSequenceFilePairMap.values());
submission = AnalysisSubmission.builder(workflowId).name("my analysis").inputFiles(pairedInputFiles).referenceFile(referenceFile).build();
submission.setRemoteWorkflowId(WORKFLOW_ID);
submission.setRemoteAnalysisId(HISTORY_ID);
when(iridaWorkflowsService.getIridaWorkflow(workflowId)).thenReturn(iridaWorkflowSingle);
when(galaxyHistoriesService.getDatasetForFileInHistory(output1Filename, HISTORY_ID)).thenReturn(output1Dataset);
when(galaxyHistoriesService.getDatasetForFileInHistory(output2Filename, HISTORY_ID)).thenReturn(output2Dataset);
when(sequencingObjectService.getSequencingObjectsForAnalysisSubmission(submission)).thenReturn(Sets.newHashSet(pairedFiles));
when(sequencingObjectService.getUniqueSamplesForSequencingObjects(pairedFiles)).thenReturn(sampleSequenceFilePairMap);
Analysis analysis = workflowPreparation.getAnalysisResults(submission);
assertNotNull("analysis is not valid", analysis);
assertEquals("invalid number of output files", 2, analysis.getAnalysisOutputFiles().size());
assertEquals("missing output file for analysis", Paths.get("output1.txt"), analysis.getAnalysisOutputFile("output1").getFile().getFileName());
assertEquals("missing label for analysis output file", "SampleB-output1.txt", analysis.getAnalysisOutputFile("output1").getLabel());
assertEquals("missing output file for analysis", "SampleB-output2.txt", analysis.getAnalysisOutputFile("output2").getLabel());
verify(galaxyHistoriesService).getDatasetForFileInHistory("output1.txt", HISTORY_ID);
verify(galaxyHistoriesService).getDatasetForFileInHistory("output2.txt", HISTORY_ID);
}
Aggregations