use of datawave.webservice.results.mr.JobExecution in project datawave by NationalSecurityAgency.
the class MapReduceStatePersisterBean method remove.
/**
* Removes Bulk Results information and related directory in HDFS for the given job id.
*
* @param id
* bulk results id
*/
public void remove(String id) throws QueryException {
// Find out who/what called this method
Principal p = ctx.getCallerPrincipal();
String sid = p.getName();
if (p instanceof DatawavePrincipal) {
DatawavePrincipal dp = (DatawavePrincipal) p;
sid = dp.getShortName();
}
MapReduceInfoResponseList results = findById(id);
if (null == results)
throw new NotFoundQueryException(DatawaveErrorCode.NO_QUERY_OBJECT_MATCH);
if (results.getResults().size() > 1)
throw new NotFoundQueryException(DatawaveErrorCode.TOO_MANY_QUERY_OBJECT_MATCHES);
else {
MapReduceInfoResponse r = results.getResults().get(0);
List<Mutation> indexEntries = new ArrayList<>();
Mutation m = new Mutation(r.getId());
m.putDelete(sid, WORKING_DIRECTORY);
m.putDelete(sid, HDFS);
m.putDelete(sid, JT);
m.putDelete(sid, NAME);
m.putDelete(sid, RESULTS_LOCATION);
m.putDelete(sid, PARAMS);
for (JobExecution job : r.getJobExecutions()) {
m.putDelete(sid, STATE + NULL + job.getMapReduceJobId());
Mutation i = new Mutation(job.getMapReduceJobId());
i.putDelete(sid, r.getId());
indexEntries.add(i);
}
Connector c = null;
try {
Map<String, String> trackingMap = connectionFactory.getTrackingMap(Thread.currentThread().getStackTrace());
c = connectionFactory.getConnection(AccumuloConnectionFactory.Priority.ADMIN, trackingMap);
tableCheck(c);
// using BatchWriter instead of MultiTableBatchWriter because Mock CB does not support
// MultiTableBatchWriter
BatchWriterConfig bwCfg = new BatchWriterConfig().setMaxLatency(10, TimeUnit.SECONDS).setMaxMemory(10240L).setMaxWriteThreads(1);
try (BatchWriter tableWriter = c.createBatchWriter(TABLE_NAME, bwCfg);
BatchWriter indexWriter = c.createBatchWriter(INDEX_TABLE_NAME, bwCfg)) {
tableWriter.addMutation(m);
for (Mutation i : indexEntries) indexWriter.addMutation(i);
}
} catch (RuntimeException re) {
throw re;
} catch (Exception e) {
QueryException qe = new QueryException(DatawaveErrorCode.JOB_STATE_UPDATE_ERROR, e, MessageFormat.format("job_id: {0}", id));
log.error(qe);
throw new QueryException(qe);
} finally {
try {
connectionFactory.returnConnection(c);
} catch (Exception e) {
log.error("Error creating query", e);
}
}
}
}
use of datawave.webservice.results.mr.JobExecution in project datawave by NationalSecurityAgency.
the class MapReduceStatePersisterBean method populateResponse.
private MapReduceInfoResponse populateResponse(Iterable<Entry<Key, Value>> data) throws IOException {
MapReduceInfoResponse result = null;
String hdfs = null;
TreeSet<JobExecution> jobs = null;
for (Entry<Key, Value> entry : data) {
if (null == result)
result = new MapReduceInfoResponse();
result.setId(entry.getKey().getRow().toString());
String colq = entry.getKey().getColumnQualifier().toString();
if (colq.equals(WORKING_DIRECTORY)) {
result.setWorkingDirectory(new String(entry.getValue().get()));
} else if (colq.equals(RESULTS_LOCATION)) {
if (null != entry.getValue() && entry.getValue().get().length > 0) {
result.setResultsDirectory(new String(entry.getValue().get()));
}
} else if (colq.equals(PARAMS)) {
result.setRuntimeParameters(new String(entry.getValue().get()));
} else if (colq.equals(HDFS)) {
result.setHdfs(new String(entry.getValue().get()));
hdfs = new String(entry.getValue().get());
} else if (colq.equals(JT)) {
result.setJobTracker(new String(entry.getValue().get()));
} else if (colq.startsWith(STATE)) {
if (null == jobs)
jobs = new TreeSet<>();
JobExecution job = new JobExecution();
job.setMapReduceJobId(colq.substring(STATE.length() + 1));
job.setState(new String(entry.getValue().get()));
job.setTimestamp(entry.getKey().getTimestamp());
jobs.add(job);
} else if (colq.equals(NAME)) {
result.setJobName(new String(entry.getValue().get()));
}
}
if (null != jobs)
result.setJobExecutions(new ArrayList<>(jobs));
try {
if (null != hdfs && !hdfs.isEmpty() && null != result.getResultsDirectory()) {
Configuration conf = new Configuration();
conf.set("fs.defaultFS", hdfs);
// If we can't talk to HDFS then I want to fail fast, default is to retry 10 times.
conf.setInt("ipc.client.connect.max.retries", 0);
Path resultDirectoryPath = new Path(result.getResultsDirectory());
int resultDirectoryPathLength = resultDirectoryPath.toUri().getPath().length();
FileSystem fs = FileSystem.get(resultDirectoryPath.toUri(), conf);
List<FileStatus> stats = new ArrayList<>();
// recurse through the directory to find all files
Queue<FileStatus> fileQueue = new LinkedList<>();
fileQueue.add(fs.getFileStatus(resultDirectoryPath));
while (!fileQueue.isEmpty()) {
FileStatus currentFileStatus = fileQueue.remove();
if (currentFileStatus.isFile()) {
stats.add(currentFileStatus);
} else {
FileStatus[] dirList = fs.listStatus(currentFileStatus.getPath());
Collections.addAll(fileQueue, dirList);
}
}
if (!stats.isEmpty()) {
List<ResultFile> resultFiles = new ArrayList<>();
for (FileStatus f : stats) {
if (!f.isDirectory()) {
ResultFile rf = new ResultFile();
String fullPath = f.getPath().toUri().getPath().substring(resultDirectoryPathLength + 1);
rf.setFileName(fullPath);
rf.setLength(f.getLen());
resultFiles.add(rf);
}
}
result.setResultFiles(resultFiles);
}
}
} catch (IOException e) {
log.warn("Unable to populate result files portion of response", e);
}
return result;
}
use of datawave.webservice.results.mr.JobExecution in project datawave by NationalSecurityAgency.
the class MapReduceBean method cancel.
/**
* Cancels any MapReduce jobs with the specified jobId and clears out the results directory
*
* @param jobId
* @return {@code datawave.webservice.result.GenericResponse<Boolean>}
* @RequestHeader X-ProxiedEntitiesChain use when proxying request for user by specifying a chain of DNs of the identities to proxy
* @RequestHeader X-ProxiedIssuersChain required when using X-ProxiedEntitiesChain, specify one issuer DN per subject DN listed in X-ProxiedEntitiesChain
* @ResponseHeader X-OperationTimeInMS time spent on the server performing the operation, does not account for network or result serialization
* @HTTP 200 success
* @HTTP 404 if jobId is invalid or cannot be found
* @HTTP 500 error killing the job
*/
@PUT
@Produces({ "application/xml", "text/xml", "application/json", "text/yaml", "text/x-yaml", "application/x-yaml", "application/x-protobuf", "application/x-protostuff" })
@javax.ws.rs.Path("/{jobId}/cancel")
@GZIP
public GenericResponse<Boolean> cancel(@PathParam("jobId") String jobId) {
GenericResponse<Boolean> response = new GenericResponse<>();
// Find all potential running jobs
MapReduceInfoResponseList list = mapReduceState.findById(jobId);
List<String> jobIdsToKill = new ArrayList<>();
// Should contain zero or one bulk result job
if (list.getResults().isEmpty()) {
NotFoundQueryException qe = new NotFoundQueryException(DatawaveErrorCode.NO_MAPREDUCE_OBJECT_MATCH);
response.addException(qe);
throw new NotFoundException(qe, response);
} else if (list.getResults().size() > 1) {
NotFoundQueryException qe = new NotFoundQueryException(DatawaveErrorCode.TOO_MANY_MAPREDUCE_OBJECT_MATCHES);
response.addException(qe);
throw new NotFoundException(qe, response);
} else {
MapReduceInfoResponse thisJob = list.getResults().get(0);
// Get all the executions for this job
String prevId = null;
String prevState = null;
// add it to the kill list
for (JobExecution ex : thisJob.getJobExecutions()) {
if (prevId != null) {
if (prevState.equals(MapReduceState.STARTED.toString()) && !ex.getMapReduceJobId().equals(prevId))
jobIdsToKill.add(prevId);
}
prevId = ex.getMapReduceJobId();
prevState = ex.getState();
}
// Get the last one
if (MapReduceState.STARTED.toString().equals(prevState))
jobIdsToKill.add(prevId);
FileSystem hdfs = null;
try {
hdfs = getFS(thisJob.getHdfs(), response);
Path resultsDir = new Path(thisJob.getResultsDirectory());
hdfs.getConf().set("mapreduce.jobtracker.address", thisJob.getJobTracker());
// Create a Job object
try (JobClient job = new JobClient(new JobConf(hdfs.getConf()))) {
for (String killId : jobIdsToKill) {
try {
JobID jid = JobID.forName(killId);
RunningJob rj = job.getJob(new org.apache.hadoop.mapred.JobID(jid.getJtIdentifier(), jid.getId()));
// job.getJob(jid);
if (null != rj)
rj.killJob();
else
mapReduceState.updateState(killId, MapReduceState.KILLED);
} catch (IOException | QueryException e) {
QueryException qe = new QueryException(DatawaveErrorCode.MAPREDUCE_JOB_KILL_ERROR, e, MessageFormat.format("job_id: {0}", killId));
log.error(qe);
response.addException(qe.getBottomQueryException());
throw new DatawaveWebApplicationException(qe, response);
}
}
}
// Delete the contents of the results directory
if (hdfs.exists(resultsDir) && !hdfs.delete(resultsDir, true)) {
QueryException qe = new QueryException(DatawaveErrorCode.MAPRED_RESULTS_DELETE_ERROR, MessageFormat.format("directory: {0}", resultsDir.toString()));
log.error(qe);
response.addException(qe);
throw new DatawaveWebApplicationException(qe, response);
}
response.setResult(true);
return response;
} catch (IOException e) {
QueryException qe = new QueryException(DatawaveErrorCode.JOBTRACKER_CONNECTION_ERROR, e, MessageFormat.format("JobTracker: {0}", thisJob.getJobTracker()));
log.error(qe);
response.addException(qe);
throw new DatawaveWebApplicationException(qe, response);
} finally {
if (null != hdfs) {
try {
hdfs.close();
} catch (IOException e) {
log.error("Error closing HDFS client", e);
}
}
}
}
}
Aggregations