use of datawave.webservice.results.mr.MapReduceInfoResponse in project datawave by NationalSecurityAgency.
the class MapReduceBean method getResultFiles.
/**
* Returns the a tar file where each tar entry is a result file.
*
* @param jobId
* @return tar file
* @RequestHeader X-ProxiedEntitiesChain use when proxying request for user by specifying a chain of DNs of the identities to proxy
* @RequestHeader X-ProxiedIssuersChain required when using X-ProxiedEntitiesChain, specify one issuer DN per subject DN listed in X-ProxiedEntitiesChain
* @ResponseHeader X-OperationTimeInMS time spent on the server performing the operation, does not account for network or result serialization
* @HTTP 200 success
* @HTTP 404 if jobId is invalid or cannot be found
* @HTTP 500
*/
@GET
@Produces("*/*")
@javax.ws.rs.Path("/{jobId}/getAllFiles")
@GZIP
public StreamingOutput getResultFiles(@Required("jobId") @PathParam("jobId") final String jobId, @Context HttpServletResponse httpResponse) {
MapReduceInfoResponseList response = list(jobId);
MapReduceInfoResponse result = response.getResults().get(0);
String hdfs = result.getHdfs();
String resultsDir = result.getResultsDirectory();
final FileSystem fs = getFS(hdfs, response);
final Path jobDirectory = new Path(resultsDir);
final int jobDirectoryPathLength = jobDirectory.toUri().getPath().length();
try {
if (!fs.exists(jobDirectory) || !fs.getFileStatus(jobDirectory).isDirectory()) {
NotFoundQueryException qe = new NotFoundQueryException(DatawaveErrorCode.JOB_DIRECTORY_NOT_FOUND, MessageFormat.format("{0} at path {1}", jobId, jobDirectory));
response.addException(qe);
throw new NotFoundException(qe, response);
}
} catch (IOException e1) {
NotFoundQueryException qe = new NotFoundQueryException(DatawaveErrorCode.RESULT_DIRECTORY_ACCESS_ERROR, e1, MessageFormat.format("{0}", resultsDir));
log.error(qe);
response.addException(qe);
throw new NotFoundException(qe, response);
}
// Get the children
List<FileStatus> resultFiles = new ArrayList<>();
try {
// recurse through the directory to find all files
Queue<FileStatus> fileQueue = new LinkedList<>();
fileQueue.add(fs.getFileStatus(jobDirectory));
while (!fileQueue.isEmpty()) {
FileStatus currentFileStatus = fileQueue.remove();
if (currentFileStatus.isFile()) {
resultFiles.add(currentFileStatus);
} else {
FileStatus[] dirList = fs.listStatus(currentFileStatus.getPath());
Collections.addAll(fileQueue, dirList);
}
}
} catch (IOException e) {
QueryException qe = new QueryException(DatawaveErrorCode.DFS_DIRECTORY_LISTING_ERROR, e, MessageFormat.format("directory: {0}", resultsDir));
log.error(qe);
response.addException(qe);
throw new DatawaveWebApplicationException(qe, response);
}
String filename = jobId + "-files.tar";
httpResponse.addHeader("Content-Disposition", "attachment; filename=\"" + filename + "\"");
// Make final references for use in anonymous class
final List<FileStatus> paths = resultFiles;
return output -> {
TarArchiveOutputStream tos = new TarArchiveOutputStream(output);
tos.setLongFileMode(TarArchiveOutputStream.LONGFILE_POSIX);
try {
for (FileStatus fileStatus : paths) {
if (fileStatus.isDirectory())
continue;
// The archive entry will be started when the first (and possibly only) chunk is
// written out. It is done this way because we need to know the size of the file
// for the archive entry, and don't want to scan twice to get that info (once
// here and again in streamFile).
String fileName = fileStatus.getPath().toUri().getPath().substring(jobDirectoryPathLength + 1);
TarArchiveEntry entry = new TarArchiveEntry(jobId + "/" + fileName, false);
entry.setSize(fileStatus.getLen());
tos.putArchiveEntry(entry);
FSDataInputStream fis = fs.open(fileStatus.getPath());
byte[] buf = new byte[BUFFER_SIZE];
int read;
try {
read = fis.read(buf);
while (read != -1) {
tos.write(buf, 0, read);
read = fis.read(buf);
}
} catch (Exception e) {
log.error("Error writing result file to output", e);
throw new WebApplicationException(e);
} finally {
try {
if (null != fis)
fis.close();
} catch (IOException e) {
log.error("Error closing FSDataInputStream for file: " + fileStatus.getPath().getName(), e);
}
}
tos.closeArchiveEntry();
}
tos.finish();
} catch (Exception e) {
log.error(e.getMessage(), e);
} finally {
try {
if (null != tos)
tos.close();
} catch (IOException ioe) {
log.error("Error closing TarArchiveOutputStream", ioe);
}
try {
if (null != fs)
fs.close();
} catch (IOException ioe) {
log.error("Error closing HDFS client", ioe);
}
}
};
}
use of datawave.webservice.results.mr.MapReduceInfoResponse in project datawave by NationalSecurityAgency.
the class MapReduceBean method remove.
/**
* Removes the MapReduce entry and associated data
*
* @param jobId
* @return datawave.webservice.result.VoidResponse
* @RequestHeader X-ProxiedEntitiesChain use when proxying request for user by specifying a chain of DNs of the identities to proxy
* @RequestHeader X-ProxiedIssuersChain required when using X-ProxiedEntitiesChain, specify one issuer DN per subject DN listed in X-ProxiedEntitiesChain
* @ResponseHeader X-OperationTimeInMS time spent on the server performing the operation, does not account for network or result serialization
* @HTTP 200 success
* @HTTP 404 if jobId is invalid or cannot be found
* @HTTP 500 error removing the job
*/
@DELETE
@Produces({ "application/xml", "text/xml", "application/json", "text/yaml", "text/x-yaml", "application/x-yaml", "application/x-protobuf", "application/x-protostuff" })
@javax.ws.rs.Path("/{jobId}/remove")
@GZIP
public VoidResponse remove(@PathParam("jobId") String jobId) {
VoidResponse response = new VoidResponse();
// Call cancel which will kill any running jobs and remove the results directory in HDFS.
cancel(jobId);
// Remove the working directory from HDFS
MapReduceInfoResponseList list = list(jobId);
MapReduceInfoResponse result = list.getResults().get(0);
String hdfs = result.getHdfs();
String wdir = result.getWorkingDirectory();
Path p = new Path(wdir);
try {
FileSystem fs = getFS(hdfs, response);
if (fs.exists(p) && !fs.delete(p, true)) {
NotFoundQueryException qe = new NotFoundQueryException(DatawaveErrorCode.WORKING_DIRECTORY_DELETE_ERROR, MessageFormat.format("{0}", wdir));
log.error(qe);
response.addException(qe);
throw new NotFoundException(qe, response);
}
} catch (IOException e) {
NotFoundQueryException qe = new NotFoundQueryException(DatawaveErrorCode.WORKING_DIRECTORY_DELETE_ERROR, e, MessageFormat.format("{0}", wdir));
log.error(qe);
throw new NotFoundException(qe, response);
}
// Remove any persisted state information
try {
mapReduceState.remove(jobId);
} catch (QueryException e) {
log.error("Error removing job state information", e);
response.addException(e.getBottomQueryException());
throw new DatawaveWebApplicationException(e, response);
}
return response;
}
use of datawave.webservice.results.mr.MapReduceInfoResponse in project datawave by NationalSecurityAgency.
the class MapReduceStatePersisterBean method populateResponse.
private MapReduceInfoResponse populateResponse(Iterable<Entry<Key, Value>> data) throws IOException {
MapReduceInfoResponse result = null;
String hdfs = null;
TreeSet<JobExecution> jobs = null;
for (Entry<Key, Value> entry : data) {
if (null == result)
result = new MapReduceInfoResponse();
result.setId(entry.getKey().getRow().toString());
String colq = entry.getKey().getColumnQualifier().toString();
if (colq.equals(WORKING_DIRECTORY)) {
result.setWorkingDirectory(new String(entry.getValue().get()));
} else if (colq.equals(RESULTS_LOCATION)) {
if (null != entry.getValue() && entry.getValue().get().length > 0) {
result.setResultsDirectory(new String(entry.getValue().get()));
}
} else if (colq.equals(PARAMS)) {
result.setRuntimeParameters(new String(entry.getValue().get()));
} else if (colq.equals(HDFS)) {
result.setHdfs(new String(entry.getValue().get()));
hdfs = new String(entry.getValue().get());
} else if (colq.equals(JT)) {
result.setJobTracker(new String(entry.getValue().get()));
} else if (colq.startsWith(STATE)) {
if (null == jobs)
jobs = new TreeSet<>();
JobExecution job = new JobExecution();
job.setMapReduceJobId(colq.substring(STATE.length() + 1));
job.setState(new String(entry.getValue().get()));
job.setTimestamp(entry.getKey().getTimestamp());
jobs.add(job);
} else if (colq.equals(NAME)) {
result.setJobName(new String(entry.getValue().get()));
}
}
if (null != jobs)
result.setJobExecutions(new ArrayList<>(jobs));
try {
if (null != hdfs && !hdfs.isEmpty() && null != result.getResultsDirectory()) {
Configuration conf = new Configuration();
conf.set("fs.defaultFS", hdfs);
// If we can't talk to HDFS then I want to fail fast, default is to retry 10 times.
conf.setInt("ipc.client.connect.max.retries", 0);
Path resultDirectoryPath = new Path(result.getResultsDirectory());
int resultDirectoryPathLength = resultDirectoryPath.toUri().getPath().length();
FileSystem fs = FileSystem.get(resultDirectoryPath.toUri(), conf);
List<FileStatus> stats = new ArrayList<>();
// recurse through the directory to find all files
Queue<FileStatus> fileQueue = new LinkedList<>();
fileQueue.add(fs.getFileStatus(resultDirectoryPath));
while (!fileQueue.isEmpty()) {
FileStatus currentFileStatus = fileQueue.remove();
if (currentFileStatus.isFile()) {
stats.add(currentFileStatus);
} else {
FileStatus[] dirList = fs.listStatus(currentFileStatus.getPath());
Collections.addAll(fileQueue, dirList);
}
}
if (!stats.isEmpty()) {
List<ResultFile> resultFiles = new ArrayList<>();
for (FileStatus f : stats) {
if (!f.isDirectory()) {
ResultFile rf = new ResultFile();
String fullPath = f.getPath().toUri().getPath().substring(resultDirectoryPathLength + 1);
rf.setFileName(fullPath);
rf.setLength(f.getLen());
resultFiles.add(rf);
}
}
result.setResultFiles(resultFiles);
}
}
} catch (IOException e) {
log.warn("Unable to populate result files portion of response", e);
}
return result;
}
use of datawave.webservice.results.mr.MapReduceInfoResponse in project datawave by NationalSecurityAgency.
the class MapReduceStatePersisterTest method testFindById.
@Test
public void testFindById() throws Exception {
// create the initial entry
testPersistentCreate();
PowerMock.resetAll();
EasyMock.expect(ctx.getCallerPrincipal()).andReturn(principal);
HashMap<String, String> trackingMap = new HashMap<>();
expect(connectionFactory.getTrackingMap(EasyMock.anyObject())).andReturn(trackingMap);
expect(connectionFactory.getConnection(EasyMock.eq(AccumuloConnectionFactory.Priority.ADMIN), EasyMock.eq(trackingMap))).andReturn(connection);
connectionFactory.returnConnection(connection);
replayAll();
MapReduceInfoResponseList result = bean.findById(id);
verifyAll();
assertEquals(1, result.getResults().size());
assertNull(result.getExceptions());
MapReduceInfoResponse response = result.getResults().get(0);
assertEquals(id, response.getId());
assertEquals(hdfs, response.getHdfs());
assertEquals(jt, response.getJobTracker());
assertEquals(jobName, response.getJobName());
assertEquals(workingDirectory, response.getWorkingDirectory());
assertEquals(resultsDirectory, response.getResultsDirectory());
assertEquals(runtimeParameters, response.getRuntimeParameters());
assertEquals(1, response.getJobExecutions().size());
assertEquals(mapReduceJobId, response.getJobExecutions().get(0).getMapReduceJobId());
assertEquals(MapReduceState.STARTED.toString(), response.getJobExecutions().get(0).getState());
}
use of datawave.webservice.results.mr.MapReduceInfoResponse in project datawave by NationalSecurityAgency.
the class MapReduceBean method cancel.
/**
* Cancels any MapReduce jobs with the specified jobId and clears out the results directory
*
* @param jobId
* @return {@code datawave.webservice.result.GenericResponse<Boolean>}
* @RequestHeader X-ProxiedEntitiesChain use when proxying request for user by specifying a chain of DNs of the identities to proxy
* @RequestHeader X-ProxiedIssuersChain required when using X-ProxiedEntitiesChain, specify one issuer DN per subject DN listed in X-ProxiedEntitiesChain
* @ResponseHeader X-OperationTimeInMS time spent on the server performing the operation, does not account for network or result serialization
* @HTTP 200 success
* @HTTP 404 if jobId is invalid or cannot be found
* @HTTP 500 error killing the job
*/
@PUT
@Produces({ "application/xml", "text/xml", "application/json", "text/yaml", "text/x-yaml", "application/x-yaml", "application/x-protobuf", "application/x-protostuff" })
@javax.ws.rs.Path("/{jobId}/cancel")
@GZIP
public GenericResponse<Boolean> cancel(@PathParam("jobId") String jobId) {
GenericResponse<Boolean> response = new GenericResponse<>();
// Find all potential running jobs
MapReduceInfoResponseList list = mapReduceState.findById(jobId);
List<String> jobIdsToKill = new ArrayList<>();
// Should contain zero or one bulk result job
if (list.getResults().isEmpty()) {
NotFoundQueryException qe = new NotFoundQueryException(DatawaveErrorCode.NO_MAPREDUCE_OBJECT_MATCH);
response.addException(qe);
throw new NotFoundException(qe, response);
} else if (list.getResults().size() > 1) {
NotFoundQueryException qe = new NotFoundQueryException(DatawaveErrorCode.TOO_MANY_MAPREDUCE_OBJECT_MATCHES);
response.addException(qe);
throw new NotFoundException(qe, response);
} else {
MapReduceInfoResponse thisJob = list.getResults().get(0);
// Get all the executions for this job
String prevId = null;
String prevState = null;
// add it to the kill list
for (JobExecution ex : thisJob.getJobExecutions()) {
if (prevId != null) {
if (prevState.equals(MapReduceState.STARTED.toString()) && !ex.getMapReduceJobId().equals(prevId))
jobIdsToKill.add(prevId);
}
prevId = ex.getMapReduceJobId();
prevState = ex.getState();
}
// Get the last one
if (MapReduceState.STARTED.toString().equals(prevState))
jobIdsToKill.add(prevId);
FileSystem hdfs = null;
try {
hdfs = getFS(thisJob.getHdfs(), response);
Path resultsDir = new Path(thisJob.getResultsDirectory());
hdfs.getConf().set("mapreduce.jobtracker.address", thisJob.getJobTracker());
// Create a Job object
try (JobClient job = new JobClient(new JobConf(hdfs.getConf()))) {
for (String killId : jobIdsToKill) {
try {
JobID jid = JobID.forName(killId);
RunningJob rj = job.getJob(new org.apache.hadoop.mapred.JobID(jid.getJtIdentifier(), jid.getId()));
// job.getJob(jid);
if (null != rj)
rj.killJob();
else
mapReduceState.updateState(killId, MapReduceState.KILLED);
} catch (IOException | QueryException e) {
QueryException qe = new QueryException(DatawaveErrorCode.MAPREDUCE_JOB_KILL_ERROR, e, MessageFormat.format("job_id: {0}", killId));
log.error(qe);
response.addException(qe.getBottomQueryException());
throw new DatawaveWebApplicationException(qe, response);
}
}
}
// Delete the contents of the results directory
if (hdfs.exists(resultsDir) && !hdfs.delete(resultsDir, true)) {
QueryException qe = new QueryException(DatawaveErrorCode.MAPRED_RESULTS_DELETE_ERROR, MessageFormat.format("directory: {0}", resultsDir.toString()));
log.error(qe);
response.addException(qe);
throw new DatawaveWebApplicationException(qe, response);
}
response.setResult(true);
return response;
} catch (IOException e) {
QueryException qe = new QueryException(DatawaveErrorCode.JOBTRACKER_CONNECTION_ERROR, e, MessageFormat.format("JobTracker: {0}", thisJob.getJobTracker()));
log.error(qe);
response.addException(qe);
throw new DatawaveWebApplicationException(qe, response);
} finally {
if (null != hdfs) {
try {
hdfs.close();
} catch (IOException e) {
log.error("Error closing HDFS client", e);
}
}
}
}
}
Aggregations