use of in project datawave by NationalSecurityAgency.
the class MapReduceStatePersisterBean method find.
* Returns all MapReduce jobs for the current user
* @return list of map reduce information
public MapReduceInfoResponseList find() {
// Find out who/what called this method
Principal p = ctx.getCallerPrincipal();
String sid = p.getName();
Set<Authorizations> auths = new HashSet<>();
if (p instanceof DatawavePrincipal) {
DatawavePrincipal dp = (DatawavePrincipal) p;
sid = dp.getShortName();
for (Collection<String> cbAuths : dp.getAuthorizations()) auths.add(new Authorizations(cbAuths.toArray(new String[cbAuths.size()])));
log.trace(sid + " has authorizations " + auths);
MapReduceInfoResponseList result = new MapReduceInfoResponseList();
Connector c = null;
try {
Map<String, String> trackingMap = connectionFactory.getTrackingMap(Thread.currentThread().getStackTrace());
c = connectionFactory.getConnection(AccumuloConnectionFactory.Priority.ADMIN, trackingMap);
try (Scanner scanner = ScannerHelper.createScanner(c, TABLE_NAME, auths)) {
scanner.fetchColumnFamily(new Text(sid));
// We need to create a response for each job
String previousRow = sid;
Map<Key, Value> batch = new HashMap<>();
for (Entry<Key, Value> entry : scanner) {
if (!previousRow.equals(entry.getKey().getRow().toString()) && !batch.isEmpty()) {
MapReduceInfoResponse response = populateResponse(batch.entrySet());
if (null != response)
} else {
batch.put(entry.getKey(), entry.getValue());
previousRow = entry.getKey().getRow().toString();
if (!batch.isEmpty()) {
MapReduceInfoResponse response = populateResponse(batch.entrySet());
if (null != response)
return result;
} catch (IOException ioe) {
QueryException qe = new QueryException(DatawaveErrorCode.RESPONSE_POPULATION_ERROR, ioe);
return result;
} catch (Exception e) {
QueryException qe = new QueryException(DatawaveErrorCode.QUERY_SETUP_ERROR, e);
return result;
} finally {
try {
} catch (Exception e) {
log.error("Error returning connection to connection pool", e);
use of in project datawave by NationalSecurityAgency.
the class MapReduceStatePersisterBean method findById.
* Information for a specific map reduce id
* @param id
* map reduce id
* @return list of map reduce information
public MapReduceInfoResponseList findById(String id) {
// Find out who/what called this method
Principal p = ctx.getCallerPrincipal();
String sid = p.getName();
Set<Authorizations> auths = new HashSet<>();
if (p instanceof DatawavePrincipal) {
DatawavePrincipal dp = (DatawavePrincipal) p;
sid = dp.getShortName();
for (Collection<String> cbAuths : dp.getAuthorizations()) auths.add(new Authorizations(cbAuths.toArray(new String[cbAuths.size()])));
log.trace(sid + " has authorizations " + auths);
MapReduceInfoResponseList result = new MapReduceInfoResponseList();
Connector c = null;
try {
Map<String, String> trackingMap = connectionFactory.getTrackingMap(Thread.currentThread().getStackTrace());
c = connectionFactory.getConnection(AccumuloConnectionFactory.Priority.ADMIN, trackingMap);
try (Scanner scanner = ScannerHelper.createScanner(c, TABLE_NAME, auths)) {
Range range = new Range(id);
scanner.fetchColumnFamily(new Text(sid));
MapReduceInfoResponse response = populateResponse(scanner);
if (null != response)
return result;
} catch (IOException ioe) {
QueryException qe = new QueryException(DatawaveErrorCode.RESPONSE_POPULATION_ERROR, ioe);
return result;
} catch (Exception e) {
QueryException qe = new QueryException(DatawaveErrorCode.QUERY_SETUP_ERROR, e);
return result;
} finally {
try {
} catch (Exception e) {
log.error("Error returning connection to connection pool", e);
use of in project datawave by NationalSecurityAgency.
the class MapReduceStatePersisterBean method remove.
* Removes Bulk Results information and related directory in HDFS for the given job id.
* @param id
* bulk results id
public void remove(String id) throws QueryException {
// Find out who/what called this method
Principal p = ctx.getCallerPrincipal();
String sid = p.getName();
if (p instanceof DatawavePrincipal) {
DatawavePrincipal dp = (DatawavePrincipal) p;
sid = dp.getShortName();
MapReduceInfoResponseList results = findById(id);
if (null == results)
throw new NotFoundQueryException(DatawaveErrorCode.NO_QUERY_OBJECT_MATCH);
if (results.getResults().size() > 1)
throw new NotFoundQueryException(DatawaveErrorCode.TOO_MANY_QUERY_OBJECT_MATCHES);
else {
MapReduceInfoResponse r = results.getResults().get(0);
List<Mutation> indexEntries = new ArrayList<>();
Mutation m = new Mutation(r.getId());
m.putDelete(sid, WORKING_DIRECTORY);
m.putDelete(sid, HDFS);
m.putDelete(sid, JT);
m.putDelete(sid, NAME);
m.putDelete(sid, RESULTS_LOCATION);
m.putDelete(sid, PARAMS);
for (JobExecution job : r.getJobExecutions()) {
m.putDelete(sid, STATE + NULL + job.getMapReduceJobId());
Mutation i = new Mutation(job.getMapReduceJobId());
i.putDelete(sid, r.getId());
Connector c = null;
try {
Map<String, String> trackingMap = connectionFactory.getTrackingMap(Thread.currentThread().getStackTrace());
c = connectionFactory.getConnection(AccumuloConnectionFactory.Priority.ADMIN, trackingMap);
// using BatchWriter instead of MultiTableBatchWriter because Mock CB does not support
// MultiTableBatchWriter
BatchWriterConfig bwCfg = new BatchWriterConfig().setMaxLatency(10, TimeUnit.SECONDS).setMaxMemory(10240L).setMaxWriteThreads(1);
try (BatchWriter tableWriter = c.createBatchWriter(TABLE_NAME, bwCfg);
BatchWriter indexWriter = c.createBatchWriter(INDEX_TABLE_NAME, bwCfg)) {
for (Mutation i : indexEntries) indexWriter.addMutation(i);
} catch (RuntimeException re) {
throw re;
} catch (Exception e) {
QueryException qe = new QueryException(DatawaveErrorCode.JOB_STATE_UPDATE_ERROR, e, MessageFormat.format("job_id: {0}", id));
throw new QueryException(qe);
} finally {
try {
} catch (Exception e) {
log.error("Error creating query", e);
use of in project datawave by NationalSecurityAgency.
the class MapReduceBean method restart.
* Kill any job running associated with the BulkResults id and start a new job.
* @param jobId
* @return {@code datawave.webservice.result.GenericResponse<String>}
* @RequestHeader X-ProxiedEntitiesChain use when proxying request for user by specifying a chain of DNs of the identities to proxy
* @RequestHeader X-ProxiedIssuersChain required when using X-ProxiedEntitiesChain, specify one issuer DN per subject DN listed in X-ProxiedEntitiesChain
* @ResponseHeader X-OperationTimeInMS time spent on the server performing the operation, does not account for network or result serialization
* @HTTP 200 success
* @HTTP 404 if jobId is invalid or cannot be found
* @HTTP 500 error restarting the job
@Produces({ "application/xml", "text/xml", "application/json", "text/yaml", "text/x-yaml", "application/x-yaml", "application/x-protobuf", "application/x-protostuff" })"/{jobId}/restart")
public GenericResponse<String> restart(@PathParam("jobId") String jobId) {
GenericResponse<String> response = new GenericResponse<>();
// Find all potential running jobs
MapReduceInfoResponseList list = mapReduceState.findById(jobId);
// Should contain zero or one job
if (list.getResults().isEmpty()) {
NotFoundQueryException qe = new NotFoundQueryException(DatawaveErrorCode.NO_MAPREDUCE_OBJECT_MATCH);
throw new NotFoundException(qe, response);
} else if (list.getResults().size() > 1) {
NotFoundQueryException qe = new NotFoundQueryException(DatawaveErrorCode.TOO_MANY_MAPREDUCE_OBJECT_MATCHES);
throw new NotFoundException(qe, response);
} else {
MapReduceInfoResponse thisJob = list.getResults().get(0);
// Call cancel for this job. This will kill any running jobs and remove the results directory
// Now re-submit this job after adding the JOB_ID to the runtime parameters to signal that this job has been restarted
String jobName = thisJob.getJobName();
// Now call submit
return submit(jobName, thisJob.getRuntimeParameters() + PARAMETER_SEPARATOR + JOB_ID + PARAMETER_NAME_VALUE_SEPARATOR + jobId);
use of in project datawave by NationalSecurityAgency.
the class MapReduceBean method getResultFile.
* Returns the contents of a result file. The list of resulting output files from the MapReduce job is listed in the response object of the status
* operation.
* @param jobId
* @param fileName
* @return file contents
* @RequestHeader X-ProxiedEntitiesChain use when proxying request for user by specifying a chain of DNs of the identities to proxy
* @RequestHeader X-ProxiedIssuersChain required when using X-ProxiedEntitiesChain, specify one issuer DN per subject DN listed in X-ProxiedEntitiesChain
* @ResponseHeader X-OperationTimeInMS time spent on the server performing the operation, does not account for network or result serialization
* @HTTP 200 success
* @HTTP 404 if jobId is invalid or cannot be found
* @HTTP 500
public StreamingOutput getResultFile(@PathParam("jobId") String jobId, @PathParam("fileName") String fileName) {
MapReduceInfoResponseList response = list(jobId);
MapReduceInfoResponse result = response.getResults().get(0);
String hdfs = result.getHdfs();
String resultsDir = result.getResultsDirectory();
final FileSystem fs = getFS(hdfs, response);
final Path resultFile = new Path(resultsDir, fileName);
FSDataInputStream fis;
try {
if (!fs.exists(resultFile) || !fs.isFile(resultFile)) {
NotFoundQueryException qe = new NotFoundQueryException(DatawaveErrorCode.FILE_NOT_FOUND, MessageFormat.format("{0} at path {1}", fileName, resultsDir));
throw new NotFoundException(qe, response);
fis =;
} catch (IOException e1) {
NotFoundQueryException qe = new NotFoundQueryException(DatawaveErrorCode.RESULT_FILE_ACCESS_ERROR, e1, MessageFormat.format("{0}", resultFile.toString()));
throw new NotFoundException(qe, response);
// Make a final reference to the fis for referencing inside the inner class
final FSDataInputStream fiz = fis;
return new StreamingOutput() {
private Logger log = Logger.getLogger(this.getClass());
public void write( output) throws IOException, WebApplicationException {
byte[] buf = new byte[BUFFER_SIZE];
int read;
try {
read =;
while (read != -1) {
output.write(buf, 0, read);
read =;
} catch (Exception e) {
log.error("Error writing result file to output", e);
throw new WebApplicationException(e);
} finally {
try {
if (null != fiz)
} catch (IOException e) {
log.error("Error closing FSDataInputStream for file: " + resultFile, e);
try {
if (null != fs)
} catch (IOException e) {
log.error("Error closing HDFS client", e);