use of datawave.webservice.results.mr.MapReduceInfoResponseList in project datawave by NationalSecurityAgency.
the class MapReduceStatePersisterTest method testFind.
@Test
public void testFind() throws Exception {
// create some entries
testPersistentCreate();
PowerMock.resetAll();
id = UUID.randomUUID().toString();
testPersistentCreate();
PowerMock.resetAll();
id = UUID.randomUUID().toString();
testPersistentCreate();
PowerMock.resetAll();
EasyMock.expect(ctx.getCallerPrincipal()).andReturn(principal);
HashMap<String, String> trackingMap = new HashMap<>();
expect(connectionFactory.getTrackingMap(EasyMock.anyObject())).andReturn(trackingMap);
expect(connectionFactory.getConnection(EasyMock.eq(AccumuloConnectionFactory.Priority.ADMIN), EasyMock.eq(trackingMap))).andReturn(connection);
connectionFactory.returnConnection(connection);
replayAll();
MapReduceInfoResponseList result = bean.find();
verifyAll();
assertEquals(3, result.getResults().size());
}
use of datawave.webservice.results.mr.MapReduceInfoResponseList in project datawave by NationalSecurityAgency.
the class MapReduceBean method list.
/**
* Returns status of a job with the given jobId
*
* @param jobId
* @return datawave.webservice.results.mr.MapReduceInfoResponseList
* @RequestHeader X-ProxiedEntitiesChain use when proxying request for user by specifying a chain of DNs of the identities to proxy
* @RequestHeader X-ProxiedIssuersChain required when using X-ProxiedEntitiesChain, specify one issuer DN per subject DN listed in X-ProxiedEntitiesChain
* @ResponseHeader X-OperationTimeInMS time spent on the server performing the operation, does not account for network or result serialization
* @HTTP 200 success
* @HTTP 404 if jobId is invalid or cannot be found
* @HTTP 500
*/
@GET
@Produces({ "application/xml", "text/xml", "application/json", "text/yaml", "text/x-yaml", "application/x-yaml", "application/x-protobuf", "application/x-protostuff" })
@javax.ws.rs.Path("/{jobId}/list")
@GZIP
public MapReduceInfoResponseList list(@PathParam("jobId") String jobId) {
MapReduceInfoResponseList response = mapReduceState.findById(jobId);
if (null == response) {
response = new MapReduceInfoResponseList();
}
if (null == response.getResults() || response.getResults().isEmpty()) {
NotFoundQueryException qe = new NotFoundQueryException(DatawaveErrorCode.NO_QUERY_OBJECT_MATCH);
response.addException(qe);
throw new NotFoundException(qe, response);
}
if (response.getResults().size() > 1) {
NotFoundQueryException qe = new NotFoundQueryException(DatawaveErrorCode.TOO_MANY_MAPREDUCE_OBJECT_MATCHES);
response.addException(qe);
throw new NotFoundException(qe, response);
}
return response;
}
use of datawave.webservice.results.mr.MapReduceInfoResponseList in project datawave by NationalSecurityAgency.
the class MapReduceBean method restart.
/**
* Kill any job running associated with the BulkResults id and start a new job.
*
* @param jobId
* @return {@code datawave.webservice.result.GenericResponse<String>}
* @RequestHeader X-ProxiedEntitiesChain use when proxying request for user by specifying a chain of DNs of the identities to proxy
* @RequestHeader X-ProxiedIssuersChain required when using X-ProxiedEntitiesChain, specify one issuer DN per subject DN listed in X-ProxiedEntitiesChain
* @ResponseHeader X-OperationTimeInMS time spent on the server performing the operation, does not account for network or result serialization
* @HTTP 200 success
* @HTTP 404 if jobId is invalid or cannot be found
* @HTTP 500 error restarting the job
*/
@PUT
@Produces({ "application/xml", "text/xml", "application/json", "text/yaml", "text/x-yaml", "application/x-yaml", "application/x-protobuf", "application/x-protostuff" })
@javax.ws.rs.Path("/{jobId}/restart")
@GZIP
public GenericResponse<String> restart(@PathParam("jobId") String jobId) {
GenericResponse<String> response = new GenericResponse<>();
// Find all potential running jobs
MapReduceInfoResponseList list = mapReduceState.findById(jobId);
// Should contain zero or one job
if (list.getResults().isEmpty()) {
NotFoundQueryException qe = new NotFoundQueryException(DatawaveErrorCode.NO_MAPREDUCE_OBJECT_MATCH);
response.addException(qe);
throw new NotFoundException(qe, response);
} else if (list.getResults().size() > 1) {
NotFoundQueryException qe = new NotFoundQueryException(DatawaveErrorCode.TOO_MANY_MAPREDUCE_OBJECT_MATCHES);
response.addException(qe);
throw new NotFoundException(qe, response);
} else {
MapReduceInfoResponse thisJob = list.getResults().get(0);
// Call cancel for this job. This will kill any running jobs and remove the results directory
cancel(jobId);
// Now re-submit this job after adding the JOB_ID to the runtime parameters to signal that this job has been restarted
String jobName = thisJob.getJobName();
// Now call submit
return submit(jobName, thisJob.getRuntimeParameters() + PARAMETER_SEPARATOR + JOB_ID + PARAMETER_NAME_VALUE_SEPARATOR + jobId);
}
}
use of datawave.webservice.results.mr.MapReduceInfoResponseList in project datawave by NationalSecurityAgency.
the class MapReduceBean method getResultFile.
/**
* Returns the contents of a result file. The list of resulting output files from the MapReduce job is listed in the response object of the status
* operation.
*
* @param jobId
* @param fileName
* @return file contents
* @RequestHeader X-ProxiedEntitiesChain use when proxying request for user by specifying a chain of DNs of the identities to proxy
* @RequestHeader X-ProxiedIssuersChain required when using X-ProxiedEntitiesChain, specify one issuer DN per subject DN listed in X-ProxiedEntitiesChain
* @ResponseHeader X-OperationTimeInMS time spent on the server performing the operation, does not account for network or result serialization
* @HTTP 200 success
* @HTTP 404 if jobId is invalid or cannot be found
* @HTTP 500
*/
@GET
@Produces("*/*")
@javax.ws.rs.Path("/{jobId}/getFile/{fileName}")
@GZIP
public StreamingOutput getResultFile(@PathParam("jobId") String jobId, @PathParam("fileName") String fileName) {
MapReduceInfoResponseList response = list(jobId);
MapReduceInfoResponse result = response.getResults().get(0);
String hdfs = result.getHdfs();
String resultsDir = result.getResultsDirectory();
final FileSystem fs = getFS(hdfs, response);
final Path resultFile = new Path(resultsDir, fileName);
FSDataInputStream fis;
try {
if (!fs.exists(resultFile) || !fs.isFile(resultFile)) {
NotFoundQueryException qe = new NotFoundQueryException(DatawaveErrorCode.FILE_NOT_FOUND, MessageFormat.format("{0} at path {1}", fileName, resultsDir));
response.addException(qe);
throw new NotFoundException(qe, response);
}
fis = fs.open(resultFile);
} catch (IOException e1) {
NotFoundQueryException qe = new NotFoundQueryException(DatawaveErrorCode.RESULT_FILE_ACCESS_ERROR, e1, MessageFormat.format("{0}", resultFile.toString()));
log.error(qe);
response.addException(qe);
throw new NotFoundException(qe, response);
}
// Make a final reference to the fis for referencing inside the inner class
final FSDataInputStream fiz = fis;
return new StreamingOutput() {
private Logger log = Logger.getLogger(this.getClass());
@Override
public void write(java.io.OutputStream output) throws IOException, WebApplicationException {
byte[] buf = new byte[BUFFER_SIZE];
int read;
try {
read = fiz.read(buf);
while (read != -1) {
output.write(buf, 0, read);
read = fiz.read(buf);
}
} catch (Exception e) {
log.error("Error writing result file to output", e);
throw new WebApplicationException(e);
} finally {
try {
if (null != fiz)
fiz.close();
} catch (IOException e) {
log.error("Error closing FSDataInputStream for file: " + resultFile, e);
}
try {
if (null != fs)
fs.close();
} catch (IOException e) {
log.error("Error closing HDFS client", e);
}
}
}
};
}
use of datawave.webservice.results.mr.MapReduceInfoResponseList in project datawave by NationalSecurityAgency.
the class MapReduceBean method getResultFiles.
/**
* Returns the a tar file where each tar entry is a result file.
*
* @param jobId
* @return tar file
* @RequestHeader X-ProxiedEntitiesChain use when proxying request for user by specifying a chain of DNs of the identities to proxy
* @RequestHeader X-ProxiedIssuersChain required when using X-ProxiedEntitiesChain, specify one issuer DN per subject DN listed in X-ProxiedEntitiesChain
* @ResponseHeader X-OperationTimeInMS time spent on the server performing the operation, does not account for network or result serialization
* @HTTP 200 success
* @HTTP 404 if jobId is invalid or cannot be found
* @HTTP 500
*/
@GET
@Produces("*/*")
@javax.ws.rs.Path("/{jobId}/getAllFiles")
@GZIP
public StreamingOutput getResultFiles(@Required("jobId") @PathParam("jobId") final String jobId, @Context HttpServletResponse httpResponse) {
MapReduceInfoResponseList response = list(jobId);
MapReduceInfoResponse result = response.getResults().get(0);
String hdfs = result.getHdfs();
String resultsDir = result.getResultsDirectory();
final FileSystem fs = getFS(hdfs, response);
final Path jobDirectory = new Path(resultsDir);
final int jobDirectoryPathLength = jobDirectory.toUri().getPath().length();
try {
if (!fs.exists(jobDirectory) || !fs.getFileStatus(jobDirectory).isDirectory()) {
NotFoundQueryException qe = new NotFoundQueryException(DatawaveErrorCode.JOB_DIRECTORY_NOT_FOUND, MessageFormat.format("{0} at path {1}", jobId, jobDirectory));
response.addException(qe);
throw new NotFoundException(qe, response);
}
} catch (IOException e1) {
NotFoundQueryException qe = new NotFoundQueryException(DatawaveErrorCode.RESULT_DIRECTORY_ACCESS_ERROR, e1, MessageFormat.format("{0}", resultsDir));
log.error(qe);
response.addException(qe);
throw new NotFoundException(qe, response);
}
// Get the children
List<FileStatus> resultFiles = new ArrayList<>();
try {
// recurse through the directory to find all files
Queue<FileStatus> fileQueue = new LinkedList<>();
fileQueue.add(fs.getFileStatus(jobDirectory));
while (!fileQueue.isEmpty()) {
FileStatus currentFileStatus = fileQueue.remove();
if (currentFileStatus.isFile()) {
resultFiles.add(currentFileStatus);
} else {
FileStatus[] dirList = fs.listStatus(currentFileStatus.getPath());
Collections.addAll(fileQueue, dirList);
}
}
} catch (IOException e) {
QueryException qe = new QueryException(DatawaveErrorCode.DFS_DIRECTORY_LISTING_ERROR, e, MessageFormat.format("directory: {0}", resultsDir));
log.error(qe);
response.addException(qe);
throw new DatawaveWebApplicationException(qe, response);
}
String filename = jobId + "-files.tar";
httpResponse.addHeader("Content-Disposition", "attachment; filename=\"" + filename + "\"");
// Make final references for use in anonymous class
final List<FileStatus> paths = resultFiles;
return output -> {
TarArchiveOutputStream tos = new TarArchiveOutputStream(output);
tos.setLongFileMode(TarArchiveOutputStream.LONGFILE_POSIX);
try {
for (FileStatus fileStatus : paths) {
if (fileStatus.isDirectory())
continue;
// The archive entry will be started when the first (and possibly only) chunk is
// written out. It is done this way because we need to know the size of the file
// for the archive entry, and don't want to scan twice to get that info (once
// here and again in streamFile).
String fileName = fileStatus.getPath().toUri().getPath().substring(jobDirectoryPathLength + 1);
TarArchiveEntry entry = new TarArchiveEntry(jobId + "/" + fileName, false);
entry.setSize(fileStatus.getLen());
tos.putArchiveEntry(entry);
FSDataInputStream fis = fs.open(fileStatus.getPath());
byte[] buf = new byte[BUFFER_SIZE];
int read;
try {
read = fis.read(buf);
while (read != -1) {
tos.write(buf, 0, read);
read = fis.read(buf);
}
} catch (Exception e) {
log.error("Error writing result file to output", e);
throw new WebApplicationException(e);
} finally {
try {
if (null != fis)
fis.close();
} catch (IOException e) {
log.error("Error closing FSDataInputStream for file: " + fileStatus.getPath().getName(), e);
}
}
tos.closeArchiveEntry();
}
tos.finish();
} catch (Exception e) {
log.error(e.getMessage(), e);
} finally {
try {
if (null != tos)
tos.close();
} catch (IOException ioe) {
log.error("Error closing TarArchiveOutputStream", ioe);
}
try {
if (null != fs)
fs.close();
} catch (IOException ioe) {
log.error("Error closing HDFS client", ioe);
}
}
};
}
Aggregations