use of ca.corefacility.bioinformatics.irida.ria.web.analysis.dto.AnalysisOutputFileInfo in project irida by phac-nml.
the class AnalysisController method getOutputFile.
/**
* Read some lines or text from an {@link AnalysisOutputFile}.
*
* @param id {@link AnalysisSubmission} id
* @param fileId {@link AnalysisOutputFile} id
* @param limit Optional limit to number of lines to read from file
* @param start Optional line to start reading from
* @param end Optional line to stop reading at
* @param seek Optional file byte position to seek to and begin reading
* @param chunk Optional number of bytes to read from file
* @param response HTTP response object
* @return JSON with file text or lines as well as information about the file.
*/
@RequestMapping(value = "/ajax/{id}/outputs/{fileId}", method = RequestMethod.GET)
@ResponseBody
public AnalysisOutputFileInfo getOutputFile(@PathVariable Long id, @PathVariable Long fileId, @RequestParam(defaultValue = "100", required = false) Long limit, @RequestParam(required = false) Long start, @RequestParam(required = false) Long end, @RequestParam(defaultValue = "0", required = false) Long seek, @RequestParam(required = false) Long chunk, HttpServletResponse response) {
AnalysisSubmission submission = analysisSubmissionService.read(id);
Analysis analysis = submission.getAnalysis();
final Optional<AnalysisOutputFile> analysisOutputFile = analysis.getAnalysisOutputFiles().stream().filter(x -> Objects.equals(x.getId(), fileId)).findFirst();
if (analysisOutputFile.isPresent()) {
final AnalysisOutputFile aof = analysisOutputFile.get();
final Path aofFile = aof.getFile();
final ToolExecution tool = aof.getCreatedByTool();
final AnalysisOutputFileInfo contents = new AnalysisOutputFileInfo();
contents.setId(aof.getId());
contents.setAnalysisSubmissionId(submission.getId());
contents.setAnalysisId(analysis.getId());
contents.setFilename(aofFile.getFileName().toString());
contents.setFileExt(FileUtilities.getFileExt(aofFile.getFileName().toString()));
contents.setFileSizeBytes(aof.getFile().toFile().length());
contents.setToolName(tool.getToolName());
contents.setToolVersion(tool.getToolVersion());
try {
final File file = aofFile.toFile();
final RandomAccessFile randomAccessFile = new RandomAccessFile(file, "r");
randomAccessFile.seek(seek);
if (seek == 0) {
if (chunk != null && chunk > 0) {
contents.setText(FileUtilities.readChunk(randomAccessFile, seek, chunk));
contents.setChunk(chunk);
contents.setStartSeek(seek);
} else {
final BufferedReader reader = new BufferedReader(new FileReader(randomAccessFile.getFD()));
final List<String> lines = FileUtilities.readLinesLimit(reader, limit, start, end);
contents.setLines(lines);
contents.setLimit((long) lines.size());
contents.setStart(start);
contents.setEnd(start + lines.size());
}
} else {
if (chunk != null && chunk > 0) {
contents.setText(FileUtilities.readChunk(randomAccessFile, seek, chunk));
contents.setChunk(chunk);
contents.setStartSeek(seek);
} else {
final List<String> lines = FileUtilities.readLinesFromFilePointer(randomAccessFile, limit);
contents.setLines(lines);
contents.setStartSeek(seek);
contents.setStart(start);
contents.setLimit((long) lines.size());
}
}
contents.setFilePointer(randomAccessFile.getFilePointer());
} catch (IOException e) {
logger.error("Could not read output file '" + aof.getId() + "' " + e);
response.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR);
contents.setError("Could not read output file");
}
return contents;
} else {
response.setStatus(HttpServletResponse.SC_NOT_FOUND);
return null;
}
}
use of ca.corefacility.bioinformatics.irida.ria.web.analysis.dto.AnalysisOutputFileInfo in project irida by phac-nml.
the class AnalysisController method getAnalysisOutputFileInfo.
/**
* Get {@link AnalysisOutputFileInfo}.
*
* @param submission {@link AnalysisSubmission} of {@code analysis}
* @param analysis {@link Analysis} to get {@link AnalysisOutputFile}s from
* @param outputName Workflow output name
* @return {@link AnalysisOutputFile} info
*/
private AnalysisOutputFileInfo getAnalysisOutputFileInfo(AnalysisSubmission submission, Analysis analysis, String outputName) {
final ImmutableSet<String> BLACKLIST_FILE_EXT = ImmutableSet.of("zip");
// set of file extensions for indicating whether the first line of the file should be read
final ImmutableSet<String> FILE_EXT_READ_FIRST_LINE = ImmutableSet.of("tsv", "txt", "tabular", "csv", "tab");
final AnalysisOutputFile aof = analysis.getAnalysisOutputFile(outputName);
final Long aofId = aof.getId();
final String aofFilename = aof.getFile().getFileName().toString();
final String fileExt = FileUtilities.getFileExt(aofFilename);
if (BLACKLIST_FILE_EXT.contains(fileExt)) {
return null;
}
final ToolExecution tool = aof.getCreatedByTool();
final String toolName = tool.getToolName();
final String toolVersion = tool.getToolVersion();
final AnalysisOutputFileInfo info = new AnalysisOutputFileInfo();
info.setId(aofId);
info.setAnalysisSubmissionId(submission.getId());
info.setAnalysisId(analysis.getId());
info.setOutputName(outputName);
info.setFilename(aofFilename);
info.setFileSizeBytes(aof.getFile().toFile().length());
info.setToolName(toolName);
info.setToolVersion(toolVersion);
info.setFileExt(fileExt);
if (FILE_EXT_READ_FIRST_LINE.contains(fileExt)) {
addFirstLine(info, aof);
}
return info;
}
use of ca.corefacility.bioinformatics.irida.ria.web.analysis.dto.AnalysisOutputFileInfo in project irida by phac-nml.
the class AnalysisControllerTest method testGetOutputFileByteSizedChunks.
@Test
public void testGetOutputFileByteSizedChunks() {
final Long submissionId = 1L;
final MockHttpServletResponse response = new MockHttpServletResponse();
when(analysisSubmissionServiceMock.read(submissionId)).thenReturn(TestDataFactory.constructAnalysisSubmission());
// get analysis output file summary info
final List<AnalysisOutputFileInfo> infos = analysisController.getOutputFilesInfo(submissionId);
assertEquals("Expecting 5 analysis output file info items", 5, infos.size());
final Optional<AnalysisOutputFileInfo> optInfo = infos.stream().filter(x -> Objects.equals(x.getOutputName(), "refseq-masher-matches")).findFirst();
assertTrue("Should be a refseq-masher-matches.tsv output file", optInfo.isPresent());
final AnalysisOutputFileInfo info = optInfo.get();
final String firstLine = "sample\ttop_taxonomy_name\tdistance\tpvalue\tmatching\tfull_taxonomy\ttaxonomic_subspecies\ttaxonomic_species\ttaxonomic_genus\ttaxonomic_family\ttaxonomic_order\ttaxonomic_class\ttaxonomic_phylum\ttaxonomic_superkingdom\tsubspecies\tserovar\tplasmid\tbioproject\tbiosample\ttaxid\tassembly_accession\tmatch_id";
assertEquals("First line of file should be read since it has a tabular file extension", firstLine, info.getFirstLine());
final Long seekTo = 290L;
final Long expFileSize = 61875L;
assertEquals("FilePointer should be first character of second line of file", seekTo, info.getFilePointer());
assertEquals("File size in bytes should be returned", expFileSize, info.getFileSizeBytes());
final Long chunkSize = 10L;
final AnalysisOutputFileInfo chunkInfo = analysisController.getOutputFile(submissionId, info.getId(), null, null, null, seekTo, chunkSize, response);
assertEquals("Should get the first 10 characters of the 2nd line starting at file pointer position 290", "SRR1203042", chunkInfo.getText());
final long expFilePointer = seekTo + chunkSize;
assertEquals("After reading byte chunk of size x starting at position y, filePointer should be x+y", expFilePointer, chunkInfo.getFilePointer().longValue());
String nextTextChunk = "\tSalmonella enterica subsp. enterica serovar Abony str. 0014";
final AnalysisOutputFileInfo nextChunkInfo = analysisController.getOutputFile(submissionId, info.getId(), null, null, null, chunkInfo.getFilePointer(), (long) nextTextChunk.length(), response);
assertEquals("Should be able to continue reading from last file pointer position", nextTextChunk, nextChunkInfo.getText());
final AnalysisOutputFileInfo lastChunkOfFile = analysisController.getOutputFile(submissionId, info.getId(), null, null, null, expFileSize - chunkSize, chunkSize, response);
final String lastChunkText = "_str..fna\n";
assertEquals("Should have successfully read the last chunk of the file", lastChunkText, lastChunkOfFile.getText());
final AnalysisOutputFileInfo chunkOutsideRangeOfFile = analysisController.getOutputFile(submissionId, info.getId(), null, null, null, expFileSize + chunkSize, chunkSize, response);
assertEquals("Should return empty string since nothing can be read outside of file range", "", chunkOutsideRangeOfFile.getText());
assertEquals("Should have seeked to an position of file size + chunkSize", expFileSize + chunkSize, (long) chunkOutsideRangeOfFile.getStartSeek());
assertEquals("FilePointer shouldn't have changed from startSeek", expFileSize + chunkSize, (long) chunkOutsideRangeOfFile.getFilePointer());
}
use of ca.corefacility.bioinformatics.irida.ria.web.analysis.dto.AnalysisOutputFileInfo in project irida by phac-nml.
the class AnalysisControllerTest method testGetOutputFileLines.
@Test
public void testGetOutputFileLines() {
final Long submissionId = 1L;
final MockHttpServletResponse response = new MockHttpServletResponse();
when(analysisSubmissionServiceMock.read(submissionId)).thenReturn(TestDataFactory.constructAnalysisSubmission());
// get analysis output file summary info
final List<AnalysisOutputFileInfo> infos = analysisController.getOutputFilesInfo(submissionId);
assertEquals("Expecting 5 analysis output file info items", 5, infos.size());
final Optional<AnalysisOutputFileInfo> optInfo = infos.stream().filter(x -> Objects.equals(x.getOutputName(), "refseq-masher-matches")).findFirst();
assertTrue("Should be a refseq-masher-matches.tsv output file", optInfo.isPresent());
final AnalysisOutputFileInfo info = optInfo.get();
final String firstLine = "sample\ttop_taxonomy_name\tdistance\tpvalue\tmatching\tfull_taxonomy\ttaxonomic_subspecies\ttaxonomic_species\ttaxonomic_genus\ttaxonomic_family\ttaxonomic_order\ttaxonomic_class\ttaxonomic_phylum\ttaxonomic_superkingdom\tsubspecies\tserovar\tplasmid\tbioproject\tbiosample\ttaxid\tassembly_accession\tmatch_id";
assertEquals("First line of file should be read since it has a tabular file extension", firstLine, info.getFirstLine());
final Long seekTo = 290L;
assertEquals("FilePointer should be first character of second line of file", seekTo, info.getFilePointer());
assertEquals("File size in bytes should be returned", Long.valueOf(61875), info.getFileSizeBytes());
final Long limit = 3L;
final AnalysisOutputFileInfo lineInfo = analysisController.getOutputFile(submissionId, info.getId(), limit, 0L, null, 0L, null, response);
assertEquals(limit.intValue(), lineInfo.getLines().size());
String expLine = "SRR1203042\tSalmonella enterica subsp. enterica serovar Abony str. 0014\t0.00650877\t0.0\t328/400\tBacteria; Proteobacteria; Gammaproteobacteria; Enterobacterales; Enterobacteriaceae; Salmonella; enterica; subsp. enterica; serovar Abony; str. 0014\tSalmonella enterica subsp. enterica\tSalmonella enterica\tSalmonella\tEnterobacteriaceae\tEnterobacterales\tGammaproteobacteria\tProteobacteria\tBacteria\tenterica\tAbony\t\tPRJNA224116\tSAMN01823751\t1029983\tGCF_000487615.2\t./rcn/refseq-NZ-1029983-PRJNA224116-SAMN01823751-GCF_000487615.2-.-Salmonella_enterica_subsp._enterica_serovar_Abony_str._0014.fna";
assertEquals(expLine, lineInfo.getLines().get(0));
// begin reading lines after first line file pointer position
final AnalysisOutputFileInfo lineInfoRandomAccess = analysisController.getOutputFile(submissionId, info.getId(), limit, 0L, null, info.getFilePointer(), null, response);
assertEquals("Using the RandomAccessFile reading method with seek>0, should give the same results as using a BufferedReader if both start reading at the same position", limit.intValue(), lineInfoRandomAccess.getLines().size());
assertEquals("Using the RandomAccessFile reading method with seek>0, should give the same results as using a BufferedReader if both start reading at the same position", expLine, lineInfoRandomAccess.getLines().get(0));
}
Aggregations