use of ca.corefacility.bioinformatics.irida.model.sequenceFile.OverrepresentedSequence in project irida by phac-nml.
the class SequencingObjectServiceImplIT method testCreateNotCompressedSequenceFile.
@Test
@WithMockUser(username = "fbristow", roles = "SEQUENCER")
public void testCreateNotCompressedSequenceFile() throws IOException, InterruptedException {
final Long expectedRevisionNumber = 1L;
SequenceFile sf = createSequenceFile("file1");
Path sequenceFile = sf.getFile();
SingleEndSequenceFile singleEndSequenceFile = new SingleEndSequenceFile(sf);
logger.trace("About to save the file.");
SequencingObject sequencingObject = asRole(Role.ROLE_SEQUENCER, "fbristow").objectService.create(singleEndSequenceFile);
logger.trace("Finished saving the file.");
assertNotNull("ID wasn't assigned.", sequencingObject.getId());
// Sleeping for a bit to let file processing run
Thread.sleep(10000);
// figure out what the version number of the sequence file is (should be
// 1; the file wasn't gzipped, but fastqc will have modified it.)
SequencingObject readObject = null;
do {
readObject = asRole(Role.ROLE_ADMIN, "admin").objectService.read(sequencingObject.getId());
sf = readObject.getFiles().iterator().next();
if (sf.getFileRevisionNumber() < expectedRevisionNumber) {
logger.info("Still waiting on thread to finish, having a bit of a sleep.");
Thread.sleep(1000);
}
} while (sf.getFileRevisionNumber() < expectedRevisionNumber);
assertEquals("Wrong version number after processing.", expectedRevisionNumber, sf.getFileRevisionNumber());
// verify the file checksum was taken properly
assertEquals("checksum should be equal", CHECKSUM, sf.getUploadSha256());
AnalysisFastQC analysis = asRole(Role.ROLE_ADMIN, "admin").analysisService.getFastQCAnalysisForSequenceFile(readObject, sf.getId());
assertNotNull("FastQCAnalysis should have been created for the file.", analysis);
Set<OverrepresentedSequence> overrepresentedSequences = analysis.getOverrepresentedSequences();
assertNotNull("No overrepresented sequences were found.", overrepresentedSequences);
assertEquals("Wrong number of overrepresented sequences were found.", 1, overrepresentedSequences.size());
OverrepresentedSequence overrepresentedSequence = overrepresentedSequences.iterator().next();
assertEquals("Sequence was not the correct sequence.", SEQUENCE, overrepresentedSequence.getSequence());
assertEquals("The count was not correct.", 2, overrepresentedSequence.getOverrepresentedSequenceCount());
assertEquals("The percent was not correct.", new BigDecimal("100.00"), overrepresentedSequence.getPercentage());
// confirm that the file structure is correct
Path idDirectory = baseDirectory.resolve(Paths.get(sf.getId().toString()));
assertTrue("Revision directory doesn't exist.", Files.exists(idDirectory.resolve(Paths.get(sf.getFileRevisionNumber().toString(), sequenceFile.getFileName().toString()))));
// no other files or directories should be beneath the ID directory
int fileCount = 0;
Iterator<Path> dir = Files.newDirectoryStream(idDirectory).iterator();
while (dir.hasNext()) {
dir.next();
fileCount++;
}
assertEquals("Wrong number of directories beneath the id directory", 1, fileCount);
}
use of ca.corefacility.bioinformatics.irida.model.sequenceFile.OverrepresentedSequence in project irida by phac-nml.
the class FastqcFileProcessor method handleOverRepresentedSequences.
/**
* Handle getting over represented sequences from fastqc.
*
* @param seqs
* overrepresented sequences.
* @return a collection of {@link OverrepresentedSequence} corresponding to
* the FastQC {@link OverRepresentedSeqs}.
*/
private Set<OverrepresentedSequence> handleOverRepresentedSequences(OverRepresentedSeqs seqs) {
OverrepresentedSeq[] sequences = seqs.getOverrepresentedSequences();
if (sequences == null) {
return Collections.emptySet();
}
Set<OverrepresentedSequence> overrepresentedSequences = new HashSet<>(sequences.length);
for (OverrepresentedSeq s : sequences) {
String sequenceString = s.seq();
int count = s.count();
BigDecimal percent = BigDecimal.valueOf(s.percentage());
String possibleSource = s.contaminantHit();
overrepresentedSequences.add(new OverrepresentedSequence(sequenceString, count, percent, possibleSource));
}
return overrepresentedSequences;
}
use of ca.corefacility.bioinformatics.irida.model.sequenceFile.OverrepresentedSequence in project irida by phac-nml.
the class FastqcFileProcessor method processSingleFile.
/**
* Process a single {@link SequenceFile}
*
* @param sequenceFile
* file to process
* @throws FileProcessorException
* if an error occurs while processing
*/
private void processSingleFile(SequenceFile sequenceFile) throws FileProcessorException {
Path fileToProcess = sequenceFile.getFile();
AnalysisFastQC.AnalysisFastQCBuilder analysis = AnalysisFastQC.builder().executionManagerAnalysisId(EXECUTION_MANAGER_ANALYSIS_ID).description(messageSource.getMessage("fastqc.file.processor.analysis.description", null, LocaleContextHolder.getLocale()));
try {
uk.ac.babraham.FastQC.Sequence.SequenceFile fastQCSequenceFile = SequenceFactory.getSequenceFile(fileToProcess.toFile());
BasicStats basicStats = new BasicStats();
PerBaseQualityScores pbqs = new PerBaseQualityScores();
PerSequenceQualityScores psqs = new PerSequenceQualityScores();
OverRepresentedSeqs overRep = new OverRepresentedSeqs();
QCModule[] moduleList = new QCModule[] { basicStats, pbqs, psqs, overRep };
logger.debug("Launching FastQC analysis modules on all sequences.");
while (fastQCSequenceFile.hasNext()) {
Sequence sequence = fastQCSequenceFile.next();
for (QCModule module : moduleList) {
module.processSequence(sequence);
}
}
logger.debug("Finished FastQC analysis modules.");
handleBasicStats(basicStats, analysis);
handlePerBaseQualityScores(pbqs, analysis);
handlePerSequenceQualityScores(psqs, analysis);
handleDuplicationLevel(overRep.duplicationLevelModule(), analysis);
Set<OverrepresentedSequence> overrepresentedSequences = handleOverRepresentedSequences(overRep);
logger.trace("Saving FastQC analysis.");
analysis.overrepresentedSequences(overrepresentedSequences);
sequenceFile.setFastQCAnalysis(analysis.build());
sequenceFileRepository.saveMetadata(sequenceFile);
} catch (Exception e) {
logger.error("FastQC failed to process the sequence file. Stack trace follows.", e);
throw new FileProcessorException("FastQC failed to parse the sequence file.", e);
}
}
use of ca.corefacility.bioinformatics.irida.model.sequenceFile.OverrepresentedSequence in project irida by phac-nml.
the class SequencingObjectServiceImplIT method testCreateCompressedSequenceFile.
@Test
@WithMockUser(username = "fbristow", roles = "SEQUENCER")
public void testCreateCompressedSequenceFile() throws IOException, InterruptedException {
final Long expectedRevisionNumber = 2L;
SequenceFile sf = new SequenceFile();
Path sequenceFile = Files.createTempFile("TEMPORARY-SEQUENCE-FILE", ".gz");
OutputStream gzOut = new GZIPOutputStream(Files.newOutputStream(sequenceFile));
gzOut.write(FASTQ_FILE_CONTENTS);
gzOut.close();
sf.setFile(sequenceFile);
SingleEndSequenceFile singleEndSequenceFile = new SingleEndSequenceFile(sf);
logger.trace("About to save the file.");
SequencingObject sequencingObject = objectService.create(singleEndSequenceFile);
logger.trace("Finished saving the file.");
assertNotNull("ID wasn't assigned.", sequencingObject.getId());
// Sleeping for a bit to let file processing run
Thread.sleep(10000);
// figure out what the version number of the sequence file is (should be
// 2; the file was gzipped)
// get the MOST RECENT version of the sequence file from the database
// (it will have been modified outside of the create method.)
SequencingObject readObject = null;
do {
readObject = asRole(Role.ROLE_ADMIN, "admin").objectService.read(sequencingObject.getId());
sf = readObject.getFiles().iterator().next();
if (sf.getFileRevisionNumber() < expectedRevisionNumber) {
logger.info("Still waiting on thread to finish, having a bit of a sleep.");
Thread.sleep(1000);
}
} while (sf.getFileRevisionNumber() < expectedRevisionNumber);
assertEquals("Wrong version number after processing.", expectedRevisionNumber, sf.getFileRevisionNumber());
assertFalse("File name is still gzipped.", sf.getFile().getFileName().toString().endsWith(".gz"));
AnalysisFastQC analysis = asRole(Role.ROLE_ADMIN, "admin").analysisService.getFastQCAnalysisForSequenceFile(readObject, sf.getId());
// verify the file checksum was taken properly
assertEquals("checksum should be equal", ZIPPED_CHECKSUM, sf.getUploadSha256());
Set<OverrepresentedSequence> overrepresentedSequences = analysis.getOverrepresentedSequences();
assertNotNull("No overrepresented sequences were found.", overrepresentedSequences);
assertEquals("Wrong number of overrepresented sequences were found.", 1, overrepresentedSequences.size());
OverrepresentedSequence overrepresentedSequence = overrepresentedSequences.iterator().next();
assertEquals("Sequence was not the correct sequence.", SEQUENCE, overrepresentedSequence.getSequence());
assertEquals("The count was not correct.", 2, overrepresentedSequence.getOverrepresentedSequenceCount());
assertEquals("The percent was not correct.", new BigDecimal("100.00"), overrepresentedSequence.getPercentage());
// confirm that the file structure is correct
String filename = sequenceFile.getFileName().toString();
filename = filename.substring(0, filename.lastIndexOf('.'));
Path idDirectory = baseDirectory.resolve(Paths.get(sf.getId().toString()));
assertTrue("Revision directory doesn't exist.", Files.exists(idDirectory.resolve(Paths.get(sf.getFileRevisionNumber().toString(), filename))));
// no other files or directories should be beneath the ID directory
int fileCount = 0;
Iterator<Path> dir = Files.newDirectoryStream(idDirectory).iterator();
while (dir.hasNext()) {
dir.next();
fileCount++;
}
assertEquals("Wrong number of directories beneath the id directory", 2, fileCount);
}
use of ca.corefacility.bioinformatics.irida.model.sequenceFile.OverrepresentedSequence in project irida by phac-nml.
the class FastqcFileProcessorTest method testHandleFastqFile.
@Test
public void testHandleFastqFile() throws IOException, IllegalArgumentException, IllegalAccessException {
// fastqc shouldn't barf on a fastq file.
Path fastq = Files.createTempFile(null, null);
Files.write(fastq, FASTQ_FILE_CONTENTS.getBytes());
Runtime.getRuntime().addShutdownHook(new DeleteFileOnExit(fastq));
ArgumentCaptor<SequenceFile> argument = ArgumentCaptor.forClass(SequenceFile.class);
SequenceFile sf = new SequenceFile(fastq);
sf.setId(1L);
SingleEndSequenceFile so = new SingleEndSequenceFile(sf);
try {
fileProcessor.process(so);
} catch (Exception e) {
e.printStackTrace();
fail();
}
verify(sequenceFileRepository).saveMetadata(argument.capture());
SequenceFile updatedFile = argument.getValue();
final Field fastqcAnalysis = ReflectionUtils.findField(SequenceFile.class, "fastqcAnalysis");
ReflectionUtils.makeAccessible(fastqcAnalysis);
AnalysisFastQC updated = (AnalysisFastQC) fastqcAnalysis.get(updatedFile);
assertEquals("GC Content was not set correctly.", Short.valueOf((short) 50), updated.getGcContent());
assertEquals("Filtered sequences was not 0.", Integer.valueOf(0), updated.getFilteredSequences());
assertEquals("File type was not correct.", "Conventional base calls", updated.getFileType());
assertEquals("Max length was not correct.", Integer.valueOf(SEQUENCE.length()), updated.getMaxLength());
assertEquals("Min length was not correct.", Integer.valueOf(SEQUENCE.length()), updated.getMinLength());
assertEquals("Total sequences was not correct.", Integer.valueOf(2), updated.getTotalSequences());
assertEquals("Encoding was not correct.", "Illumina <1.3", updated.getEncoding());
assertEquals("Total number of bases was not correct.", Long.valueOf(SEQUENCE.length() * 2), updated.getTotalBases());
assertNotNull("Per-base quality score chart was not created.", updated.getPerBaseQualityScoreChart());
assertTrue("Per-base quality score chart was created, but was empty.", ((byte[]) updated.getPerBaseQualityScoreChart()).length > 0);
assertNotNull("Per-sequence quality score chart was not created.", updated.getPerSequenceQualityScoreChart());
assertTrue("Per-sequence quality score chart was created, but was empty.", ((byte[]) updated.getPerSequenceQualityScoreChart()).length > 0);
assertNotNull("Duplication level chart was not created.", updated.getDuplicationLevelChart());
assertTrue("Duplication level chart was not created.", ((byte[]) updated.getDuplicationLevelChart()).length > 0);
Iterator<OverrepresentedSequence> ovrs = updated.getOverrepresentedSequences().iterator();
assertTrue("No overrepresented sequences added to analysis.", ovrs.hasNext());
OverrepresentedSequence overrepresentedSequence = updated.getOverrepresentedSequences().iterator().next();
assertEquals("Sequence was not the correct sequence.", SEQUENCE, overrepresentedSequence.getSequence());
assertEquals("The count was not correct.", 2, overrepresentedSequence.getOverrepresentedSequenceCount());
assertEquals("The percent was not correct.", BigDecimal.valueOf(100.), overrepresentedSequence.getPercentage());
}
Aggregations