use of ca.corefacility.bioinformatics.irida.model.sequenceFile.SequenceFilePair in project irida by phac-nml.
the class SNVPhylAnalysisIT method testSNVPhylSuccessHigherSNVReadProportion.
/**
* Tests out successfully executing the SNVPhyl pipeline and passing a higher value for fraction of reads to call a SNP.
*
* @throws Exception
*/
@Test
@WithMockUser(username = "aaron", roles = "ADMIN")
public void testSNVPhylSuccessHigherSNVReadProportion() throws Exception {
SequenceFilePair sequenceFilePairA = databaseSetupGalaxyITService.setupSampleSequenceFileInDatabase(1L, sequenceFilePathsA1List, sequenceFilePathsA2List).get(0);
SequenceFilePair sequenceFilePairB = databaseSetupGalaxyITService.setupSampleSequenceFileInDatabase(2L, sequenceFilePathsB1List, sequenceFilePathsB2List).get(0);
SequenceFilePair sequenceFilePairC = databaseSetupGalaxyITService.setupSampleSequenceFileInDatabase(3L, sequenceFilePathsC1List, sequenceFilePathsC2List).get(0);
waitForFilesToSettle(sequenceFilePairA, sequenceFilePairB, sequenceFilePairC);
Map<String, String> parameters = ImmutableMap.<String, String>builder().put("snv-abundance-ratio", "0.90").put("minimum-read-coverage", "2").put("minimum-percent-coverage", "75").put("minimum-mean-mapping-quality", "20").put("filter-density-threshold", "3").put("filter-density-window-size", "30").build();
AnalysisSubmission submission = databaseSetupGalaxyITService.setupPairSubmissionInDatabase(Sets.newHashSet(sequenceFilePairA, sequenceFilePairB, sequenceFilePairC), referenceFilePath, parameters, snvPhylWorkflow.getWorkflowIdentifier());
completeSubmittedAnalyses(submission.getId());
submission = analysisSubmissionRepository.findOne(submission.getId());
assertEquals("analysis state should be completed.", AnalysisState.COMPLETED, submission.getAnalysisState());
Analysis analysisPhylogenomics = submission.getAnalysis();
assertEquals("Should have generated a phylogenomics pipeline analysis type.", AnalysisType.PHYLOGENOMICS, analysisPhylogenomics.getAnalysisType());
assertEquals("the phylogenomics pipeline should have 8 output files.", 8, analysisPhylogenomics.getAnalysisOutputFiles().size());
@SuppressWarnings("resource") String matrixContent = new Scanner(analysisPhylogenomics.getAnalysisOutputFile(MATRIX_KEY).getFile().toFile()).useDelimiter("\\Z").next();
assertTrue("snpMatrix should be the same but is \"" + matrixContent + "\"", com.google.common.io.Files.equal(outputSnvMatrix2.toFile(), analysisPhylogenomics.getAnalysisOutputFile(MATRIX_KEY).getFile().toFile()));
assertNotNull("file should have tool provenance attached.", analysisPhylogenomics.getAnalysisOutputFile(MATRIX_KEY).getCreatedByTool());
@SuppressWarnings("resource") String snpTableContent = new Scanner(analysisPhylogenomics.getAnalysisOutputFile(TABLE_KEY).getFile().toFile()).useDelimiter("\\Z").next();
assertTrue("snpTable should be the same but is \"" + snpTableContent + "\"", com.google.common.io.Files.equal(outputSnvTable2.toFile(), analysisPhylogenomics.getAnalysisOutputFile(TABLE_KEY).getFile().toFile()));
assertNotNull("file should have tool provenance attached.", analysisPhylogenomics.getAnalysisOutputFile(TABLE_KEY).getCreatedByTool());
@SuppressWarnings("resource") String vcf2coreContent = new Scanner(analysisPhylogenomics.getAnalysisOutputFile(CORE_KEY).getFile().toFile()).useDelimiter("\\Z").next();
assertTrue("vcf2core should be the same but is \"" + vcf2coreContent + "\"", com.google.common.io.Files.equal(vcf2core2.toFile(), analysisPhylogenomics.getAnalysisOutputFile(CORE_KEY).getFile().toFile()));
assertNotNull("file should have tool provenance attached.", analysisPhylogenomics.getAnalysisOutputFile(CORE_KEY).getCreatedByTool());
// only check size of mapping quality file due to samples output in random order
assertTrue("the mapping quality file should not be empty.", Files.size(analysisPhylogenomics.getAnalysisOutputFile(QUALITY_KEY).getFile()) > 0);
@SuppressWarnings("resource") String filterStatsContent = new Scanner(analysisPhylogenomics.getAnalysisOutputFile(STATS_KEY).getFile().toFile()).useDelimiter("\\Z").next();
assertTrue("filterStats should be the same but is \"" + filterStatsContent + "\"", com.google.common.io.Files.equal(filterStats2.toFile(), analysisPhylogenomics.getAnalysisOutputFile(STATS_KEY).getFile().toFile()));
assertNotNull("file should have tool provenance attached.", analysisPhylogenomics.getAnalysisOutputFile(STATS_KEY).getCreatedByTool());
@SuppressWarnings("resource") String snvAlignContent = new Scanner(analysisPhylogenomics.getAnalysisOutputFile(ALIGN_KEY).getFile().toFile()).useDelimiter("\\Z").next();
assertTrue("snvAlign should be the same but is \"" + snvAlignContent + "\"", com.google.common.io.Files.equal(snvAlign2.toFile(), analysisPhylogenomics.getAnalysisOutputFile(ALIGN_KEY).getFile().toFile()));
assertNotNull("file should have tool provenance attached.", analysisPhylogenomics.getAnalysisOutputFile(ALIGN_KEY).getCreatedByTool());
// only test to make sure the files have a valid size since PhyML uses a
// random seed to generate the tree (and so changes results)
assertTrue("the phylogenetic tree file should not be empty.", Files.size(analysisPhylogenomics.getAnalysisOutputFile(TREE_KEY).getFile()) > 0);
assertTrue("the phylogenetic tree stats file should not be empty.", Files.size(analysisPhylogenomics.getAnalysisOutputFile(TREE_KEY).getFile()) > 0);
// try to follow the phylogenomics provenance all the way back to the
// upload tools
List<ToolExecution> toolsToVisit = Lists.newArrayList(analysisPhylogenomics.getAnalysisOutputFile(TREE_KEY).getCreatedByTool());
assertFalse("file should have tool provenance attached.", toolsToVisit.isEmpty());
String minVcf2AlignCov = null;
String altAlleleFraction = null;
String minimumPercentCoverage = null;
String minimumDepthVerify = null;
String filterDensityThreshold = null;
String filterDensityWindowSize = null;
// one where you upload the reads.
while (!toolsToVisit.isEmpty()) {
final ToolExecution ex = toolsToVisit.remove(0);
toolsToVisit.addAll(ex.getPreviousSteps());
if (ex.getToolName().contains("Consolidate VCFs")) {
final Map<String, String> params = ex.getExecutionTimeParameters();
minVcf2AlignCov = params.get("coverage");
altAlleleFraction = params.get("snv_abundance_ratio");
filterDensityThreshold = params.get("use_density_filter.threshold");
filterDensityWindowSize = params.get("use_density_filter.window_size");
break;
}
}
// try to follow the mapping quality provenance all the way back to the
// upload tools
toolsToVisit = Lists.newArrayList(analysisPhylogenomics.getAnalysisOutputFile(QUALITY_KEY).getCreatedByTool());
assertFalse("file should have tool provenance attached.", toolsToVisit.isEmpty());
while (!toolsToVisit.isEmpty()) {
final ToolExecution ex = toolsToVisit.remove(0);
toolsToVisit.addAll(ex.getPreviousSteps());
if (ex.getToolName().contains("Verify Mapping Quality")) {
final Map<String, String> params = ex.getExecutionTimeParameters();
minimumPercentCoverage = params.get("minmap");
minimumDepthVerify = params.get("mindepth");
}
}
assertEquals("incorrect minimum vcf 2 align coverage", "\"2\"", minVcf2AlignCov);
assertEquals("incorrect alternative allele fraction", "\"0.90\"", altAlleleFraction);
assertEquals("incorrect minimum depth for verify map", "\"2\"", minimumDepthVerify);
assertEquals("incorrect min percent coverage for verify map", "\"75\"", minimumPercentCoverage);
assertEquals("incorrect filter density threshold", "3", filterDensityThreshold);
assertEquals("incorrect filter density window size", "30", filterDensityWindowSize);
}
use of ca.corefacility.bioinformatics.irida.model.sequenceFile.SequenceFilePair in project irida by phac-nml.
the class SequenceFilePairTest method setup.
/**
* Sets up files for tests.
*
* @throws IOException
*/
@Before
public void setup() throws IOException {
Path tempDir = Paths.get("/tmp");
forwardPathGood = tempDir.resolve("Test_R1_001.fastq");
forwardPathBad = tempDir.resolve("Test_A.fastq");
reversePathGood = tempDir.resolve("Test_R2_001.fastq");
reversePathBad = tempDir.resolve("Test_B.fastq");
sequenceFileForwardGood = new SequenceFile(forwardPathGood);
sequenceFileForwardBad = new SequenceFile(forwardPathBad);
sequenceFileReverseGood = new SequenceFile(reversePathGood);
sequenceFileReverseBad = new SequenceFile(reversePathBad);
sequenceFilePairGood = new SequenceFilePair(sequenceFileForwardGood, sequenceFileReverseGood);
sequenceFilePairBad = new SequenceFilePair(sequenceFileForwardBad, sequenceFileReverseBad);
}
use of ca.corefacility.bioinformatics.irida.model.sequenceFile.SequenceFilePair in project irida by phac-nml.
the class SequenceFilePairConcatenatorTest method testConcatenateFiles.
@Test
public void testConcatenateFiles() throws IOException, ConcatenateException {
String newFileName = "newFile";
SequenceFile original1 = createSequenceFile("testFile_F");
SequenceFile original2 = createSequenceFile("testFile_R");
SequenceFile original3 = createSequenceFile("testFile2_F");
SequenceFile original4 = createSequenceFile("testFile2_R");
long originalLength = original1.getFile().toFile().length();
SequenceFilePair f1 = new SequenceFilePair(original1, original2);
SequenceFilePair f2 = new SequenceFilePair(original3, original4);
SequenceFilePair concatenateFiles = concat.concatenateFiles(Lists.newArrayList(f1, f2), newFileName);
SequenceFile forward = concatenateFiles.getForwardSequenceFile();
SequenceFile reverse = concatenateFiles.getReverseSequenceFile();
assertTrue("file exists", Files.exists(forward.getFile()));
assertTrue("file exists", Files.exists(reverse.getFile()));
long newFileSize = forward.getFile().toFile().length();
assertEquals("new file should be 2x size of originals", originalLength * 2, newFileSize);
}
use of ca.corefacility.bioinformatics.irida.model.sequenceFile.SequenceFilePair in project irida by phac-nml.
the class AssemblyFileProcessorTest method testAssembleFile.
@Test
public void testAssembleFile() {
Long sequenceFileId = 1L;
SequenceFilePair pair = new SequenceFilePair(new SequenceFile(Paths.get("file_R1_1.fastq.gz")), new SequenceFile(Paths.get("file_R2_1.fastq.gz")));
Sample sample = new Sample();
Project project = new Project();
project.setAssembleUploads(true);
when(objectRepository.findOne(sequenceFileId)).thenReturn(pair);
when(ssoRepository.getSampleForSequencingObject(pair)).thenReturn(new SampleSequencingObjectJoin(sample, pair));
when(psjRepository.getProjectForSample(sample)).thenReturn(ImmutableList.of(new ProjectSampleJoin(project, sample, true)));
assertTrue("should want to assemble file", processor.shouldProcessFile(sequenceFileId));
processor.process(pair);
verify(submissionRepository).save(any(AnalysisSubmission.class));
}
use of ca.corefacility.bioinformatics.irida.model.sequenceFile.SequenceFilePair in project irida by phac-nml.
the class AssemblyFileProcessorTest method testOneProjectEnabled.
@Test
public void testOneProjectEnabled() {
SequenceFilePair pair = new SequenceFilePair(new SequenceFile(Paths.get("file_R1_1.fastq.gz")), new SequenceFile(Paths.get("file_R2_1.fastq.gz")));
Sample sample = new Sample();
Project project = new Project();
project.setAssembleUploads(true);
Project disabledProject = new Project();
disabledProject.setAssembleUploads(false);
when(ssoRepository.getSampleForSequencingObject(pair)).thenReturn(new SampleSequencingObjectJoin(sample, pair));
when(psjRepository.getProjectForSample(sample)).thenReturn(ImmutableList.of(new ProjectSampleJoin(disabledProject, sample, true), new ProjectSampleJoin(project, sample, true)));
processor.process(pair);
verify(submissionRepository).save(any(AnalysisSubmission.class));
}
Aggregations