Search in sources :

Example 1 with JobExecution

use of datawave.webservice.results.mr.JobExecution in project datawave by NationalSecurityAgency.

the class MapReduceStatePersisterBean method remove.

/**
 * Removes Bulk Results information and related directory in HDFS for the given job id.
 *
 * @param id
 *            bulk results id
 */
public void remove(String id) throws QueryException {
    // Find out who/what called this method
    Principal p = ctx.getCallerPrincipal();
    String sid = p.getName();
    if (p instanceof DatawavePrincipal) {
        DatawavePrincipal dp = (DatawavePrincipal) p;
        sid = dp.getShortName();
    }
    MapReduceInfoResponseList results = findById(id);
    if (null == results)
        throw new NotFoundQueryException(DatawaveErrorCode.NO_QUERY_OBJECT_MATCH);
    if (results.getResults().size() > 1)
        throw new NotFoundQueryException(DatawaveErrorCode.TOO_MANY_QUERY_OBJECT_MATCHES);
    else {
        MapReduceInfoResponse r = results.getResults().get(0);
        List<Mutation> indexEntries = new ArrayList<>();
        Mutation m = new Mutation(r.getId());
        m.putDelete(sid, WORKING_DIRECTORY);
        m.putDelete(sid, HDFS);
        m.putDelete(sid, JT);
        m.putDelete(sid, NAME);
        m.putDelete(sid, RESULTS_LOCATION);
        m.putDelete(sid, PARAMS);
        for (JobExecution job : r.getJobExecutions()) {
            m.putDelete(sid, STATE + NULL + job.getMapReduceJobId());
            Mutation i = new Mutation(job.getMapReduceJobId());
            i.putDelete(sid, r.getId());
            indexEntries.add(i);
        }
        Connector c = null;
        try {
            Map<String, String> trackingMap = connectionFactory.getTrackingMap(Thread.currentThread().getStackTrace());
            c = connectionFactory.getConnection(AccumuloConnectionFactory.Priority.ADMIN, trackingMap);
            tableCheck(c);
            // using BatchWriter instead of MultiTableBatchWriter because Mock CB does not support
            // MultiTableBatchWriter
            BatchWriterConfig bwCfg = new BatchWriterConfig().setMaxLatency(10, TimeUnit.SECONDS).setMaxMemory(10240L).setMaxWriteThreads(1);
            try (BatchWriter tableWriter = c.createBatchWriter(TABLE_NAME, bwCfg);
                BatchWriter indexWriter = c.createBatchWriter(INDEX_TABLE_NAME, bwCfg)) {
                tableWriter.addMutation(m);
                for (Mutation i : indexEntries) indexWriter.addMutation(i);
            }
        } catch (RuntimeException re) {
            throw re;
        } catch (Exception e) {
            QueryException qe = new QueryException(DatawaveErrorCode.JOB_STATE_UPDATE_ERROR, e, MessageFormat.format("job_id: {0}", id));
            log.error(qe);
            throw new QueryException(qe);
        } finally {
            try {
                connectionFactory.returnConnection(c);
            } catch (Exception e) {
                log.error("Error creating query", e);
            }
        }
    }
}
Also used : Connector(org.apache.accumulo.core.client.Connector) ArrayList(java.util.ArrayList) MapReduceInfoResponseList(datawave.webservice.results.mr.MapReduceInfoResponseList) DatawavePrincipal(datawave.security.authorization.DatawavePrincipal) NotFoundQueryException(datawave.webservice.query.exception.NotFoundQueryException) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) NotFoundQueryException(datawave.webservice.query.exception.NotFoundQueryException) TableExistsException(org.apache.accumulo.core.client.TableExistsException) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) IOException(java.io.IOException) AccumuloException(org.apache.accumulo.core.client.AccumuloException) QueryException(datawave.webservice.query.exception.QueryException) JobExecution(datawave.webservice.results.mr.JobExecution) NotFoundQueryException(datawave.webservice.query.exception.NotFoundQueryException) QueryException(datawave.webservice.query.exception.QueryException) MapReduceInfoResponse(datawave.webservice.results.mr.MapReduceInfoResponse) BatchWriterConfig(org.apache.accumulo.core.client.BatchWriterConfig) Mutation(org.apache.accumulo.core.data.Mutation) BatchWriter(org.apache.accumulo.core.client.BatchWriter) Principal(java.security.Principal) DatawavePrincipal(datawave.security.authorization.DatawavePrincipal)

Example 2 with JobExecution

use of datawave.webservice.results.mr.JobExecution in project datawave by NationalSecurityAgency.

the class MapReduceStatePersisterBean method populateResponse.

private MapReduceInfoResponse populateResponse(Iterable<Entry<Key, Value>> data) throws IOException {
    MapReduceInfoResponse result = null;
    String hdfs = null;
    TreeSet<JobExecution> jobs = null;
    for (Entry<Key, Value> entry : data) {
        if (null == result)
            result = new MapReduceInfoResponse();
        result.setId(entry.getKey().getRow().toString());
        String colq = entry.getKey().getColumnQualifier().toString();
        if (colq.equals(WORKING_DIRECTORY)) {
            result.setWorkingDirectory(new String(entry.getValue().get()));
        } else if (colq.equals(RESULTS_LOCATION)) {
            if (null != entry.getValue() && entry.getValue().get().length > 0) {
                result.setResultsDirectory(new String(entry.getValue().get()));
            }
        } else if (colq.equals(PARAMS)) {
            result.setRuntimeParameters(new String(entry.getValue().get()));
        } else if (colq.equals(HDFS)) {
            result.setHdfs(new String(entry.getValue().get()));
            hdfs = new String(entry.getValue().get());
        } else if (colq.equals(JT)) {
            result.setJobTracker(new String(entry.getValue().get()));
        } else if (colq.startsWith(STATE)) {
            if (null == jobs)
                jobs = new TreeSet<>();
            JobExecution job = new JobExecution();
            job.setMapReduceJobId(colq.substring(STATE.length() + 1));
            job.setState(new String(entry.getValue().get()));
            job.setTimestamp(entry.getKey().getTimestamp());
            jobs.add(job);
        } else if (colq.equals(NAME)) {
            result.setJobName(new String(entry.getValue().get()));
        }
    }
    if (null != jobs)
        result.setJobExecutions(new ArrayList<>(jobs));
    try {
        if (null != hdfs && !hdfs.isEmpty() && null != result.getResultsDirectory()) {
            Configuration conf = new Configuration();
            conf.set("fs.defaultFS", hdfs);
            // If we can't talk to HDFS then I want to fail fast, default is to retry 10 times.
            conf.setInt("ipc.client.connect.max.retries", 0);
            Path resultDirectoryPath = new Path(result.getResultsDirectory());
            int resultDirectoryPathLength = resultDirectoryPath.toUri().getPath().length();
            FileSystem fs = FileSystem.get(resultDirectoryPath.toUri(), conf);
            List<FileStatus> stats = new ArrayList<>();
            // recurse through the directory to find all files
            Queue<FileStatus> fileQueue = new LinkedList<>();
            fileQueue.add(fs.getFileStatus(resultDirectoryPath));
            while (!fileQueue.isEmpty()) {
                FileStatus currentFileStatus = fileQueue.remove();
                if (currentFileStatus.isFile()) {
                    stats.add(currentFileStatus);
                } else {
                    FileStatus[] dirList = fs.listStatus(currentFileStatus.getPath());
                    Collections.addAll(fileQueue, dirList);
                }
            }
            if (!stats.isEmpty()) {
                List<ResultFile> resultFiles = new ArrayList<>();
                for (FileStatus f : stats) {
                    if (!f.isDirectory()) {
                        ResultFile rf = new ResultFile();
                        String fullPath = f.getPath().toUri().getPath().substring(resultDirectoryPathLength + 1);
                        rf.setFileName(fullPath);
                        rf.setLength(f.getLen());
                        resultFiles.add(rf);
                    }
                }
                result.setResultFiles(resultFiles);
            }
        }
    } catch (IOException e) {
        log.warn("Unable to populate result files portion of response", e);
    }
    return result;
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) IOException(java.io.IOException) ResultFile(datawave.webservice.results.mr.ResultFile) LinkedList(java.util.LinkedList) JobExecution(datawave.webservice.results.mr.JobExecution) MapReduceInfoResponse(datawave.webservice.results.mr.MapReduceInfoResponse) FileSystem(org.apache.hadoop.fs.FileSystem) Value(org.apache.accumulo.core.data.Value) Key(org.apache.accumulo.core.data.Key)

Example 3 with JobExecution

use of datawave.webservice.results.mr.JobExecution in project datawave by NationalSecurityAgency.

the class MapReduceBean method cancel.

/**
 * Cancels any MapReduce jobs with the specified jobId and clears out the results directory
 *
 * @param jobId
 * @return {@code datawave.webservice.result.GenericResponse<Boolean>}
 * @RequestHeader X-ProxiedEntitiesChain use when proxying request for user by specifying a chain of DNs of the identities to proxy
 * @RequestHeader X-ProxiedIssuersChain required when using X-ProxiedEntitiesChain, specify one issuer DN per subject DN listed in X-ProxiedEntitiesChain
 * @ResponseHeader X-OperationTimeInMS time spent on the server performing the operation, does not account for network or result serialization
 * @HTTP 200 success
 * @HTTP 404 if jobId is invalid or cannot be found
 * @HTTP 500 error killing the job
 */
@PUT
@Produces({ "application/xml", "text/xml", "application/json", "text/yaml", "text/x-yaml", "application/x-yaml", "application/x-protobuf", "application/x-protostuff" })
@javax.ws.rs.Path("/{jobId}/cancel")
@GZIP
public GenericResponse<Boolean> cancel(@PathParam("jobId") String jobId) {
    GenericResponse<Boolean> response = new GenericResponse<>();
    // Find all potential running jobs
    MapReduceInfoResponseList list = mapReduceState.findById(jobId);
    List<String> jobIdsToKill = new ArrayList<>();
    // Should contain zero or one bulk result job
    if (list.getResults().isEmpty()) {
        NotFoundQueryException qe = new NotFoundQueryException(DatawaveErrorCode.NO_MAPREDUCE_OBJECT_MATCH);
        response.addException(qe);
        throw new NotFoundException(qe, response);
    } else if (list.getResults().size() > 1) {
        NotFoundQueryException qe = new NotFoundQueryException(DatawaveErrorCode.TOO_MANY_MAPREDUCE_OBJECT_MATCHES);
        response.addException(qe);
        throw new NotFoundException(qe, response);
    } else {
        MapReduceInfoResponse thisJob = list.getResults().get(0);
        // Get all the executions for this job
        String prevId = null;
        String prevState = null;
        // add it to the kill list
        for (JobExecution ex : thisJob.getJobExecutions()) {
            if (prevId != null) {
                if (prevState.equals(MapReduceState.STARTED.toString()) && !ex.getMapReduceJobId().equals(prevId))
                    jobIdsToKill.add(prevId);
            }
            prevId = ex.getMapReduceJobId();
            prevState = ex.getState();
        }
        // Get the last one
        if (MapReduceState.STARTED.toString().equals(prevState))
            jobIdsToKill.add(prevId);
        FileSystem hdfs = null;
        try {
            hdfs = getFS(thisJob.getHdfs(), response);
            Path resultsDir = new Path(thisJob.getResultsDirectory());
            hdfs.getConf().set("mapreduce.jobtracker.address", thisJob.getJobTracker());
            // Create a Job object
            try (JobClient job = new JobClient(new JobConf(hdfs.getConf()))) {
                for (String killId : jobIdsToKill) {
                    try {
                        JobID jid = JobID.forName(killId);
                        RunningJob rj = job.getJob(new org.apache.hadoop.mapred.JobID(jid.getJtIdentifier(), jid.getId()));
                        // job.getJob(jid);
                        if (null != rj)
                            rj.killJob();
                        else
                            mapReduceState.updateState(killId, MapReduceState.KILLED);
                    } catch (IOException | QueryException e) {
                        QueryException qe = new QueryException(DatawaveErrorCode.MAPREDUCE_JOB_KILL_ERROR, e, MessageFormat.format("job_id: {0}", killId));
                        log.error(qe);
                        response.addException(qe.getBottomQueryException());
                        throw new DatawaveWebApplicationException(qe, response);
                    }
                }
            }
            // Delete the contents of the results directory
            if (hdfs.exists(resultsDir) && !hdfs.delete(resultsDir, true)) {
                QueryException qe = new QueryException(DatawaveErrorCode.MAPRED_RESULTS_DELETE_ERROR, MessageFormat.format("directory: {0}", resultsDir.toString()));
                log.error(qe);
                response.addException(qe);
                throw new DatawaveWebApplicationException(qe, response);
            }
            response.setResult(true);
            return response;
        } catch (IOException e) {
            QueryException qe = new QueryException(DatawaveErrorCode.JOBTRACKER_CONNECTION_ERROR, e, MessageFormat.format("JobTracker: {0}", thisJob.getJobTracker()));
            log.error(qe);
            response.addException(qe);
            throw new DatawaveWebApplicationException(qe, response);
        } finally {
            if (null != hdfs) {
                try {
                    hdfs.close();
                } catch (IOException e) {
                    log.error("Error closing HDFS client", e);
                }
            }
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) GenericResponse(datawave.webservice.result.GenericResponse) ArrayList(java.util.ArrayList) NotFoundException(datawave.webservice.common.exception.NotFoundException) MapReduceInfoResponseList(datawave.webservice.results.mr.MapReduceInfoResponseList) IOException(java.io.IOException) JobClient(org.apache.hadoop.mapred.JobClient) NotFoundQueryException(datawave.webservice.query.exception.NotFoundQueryException) JobExecution(datawave.webservice.results.mr.JobExecution) NotFoundQueryException(datawave.webservice.query.exception.NotFoundQueryException) QueryException(datawave.webservice.query.exception.QueryException) UnauthorizedQueryException(datawave.webservice.query.exception.UnauthorizedQueryException) BadRequestQueryException(datawave.webservice.query.exception.BadRequestQueryException) MapReduceInfoResponse(datawave.webservice.results.mr.MapReduceInfoResponse) FileSystem(org.apache.hadoop.fs.FileSystem) RunningJob(org.apache.hadoop.mapred.RunningJob) DatawaveWebApplicationException(datawave.webservice.common.exception.DatawaveWebApplicationException) JobConf(org.apache.hadoop.mapred.JobConf) JobID(org.apache.hadoop.mapreduce.JobID) Produces(javax.ws.rs.Produces) GZIP(org.jboss.resteasy.annotations.GZIP) PUT(javax.ws.rs.PUT)

Aggregations

JobExecution (datawave.webservice.results.mr.JobExecution)3 MapReduceInfoResponse (datawave.webservice.results.mr.MapReduceInfoResponse)3 IOException (java.io.IOException)3 ArrayList (java.util.ArrayList)3 NotFoundQueryException (datawave.webservice.query.exception.NotFoundQueryException)2 QueryException (datawave.webservice.query.exception.QueryException)2 MapReduceInfoResponseList (datawave.webservice.results.mr.MapReduceInfoResponseList)2 FileSystem (org.apache.hadoop.fs.FileSystem)2 Path (org.apache.hadoop.fs.Path)2 DatawavePrincipal (datawave.security.authorization.DatawavePrincipal)1 DatawaveWebApplicationException (datawave.webservice.common.exception.DatawaveWebApplicationException)1 NotFoundException (datawave.webservice.common.exception.NotFoundException)1 BadRequestQueryException (datawave.webservice.query.exception.BadRequestQueryException)1 UnauthorizedQueryException (datawave.webservice.query.exception.UnauthorizedQueryException)1 GenericResponse (datawave.webservice.result.GenericResponse)1 ResultFile (datawave.webservice.results.mr.ResultFile)1 Principal (java.security.Principal)1 LinkedList (java.util.LinkedList)1 PUT (javax.ws.rs.PUT)1 Produces (javax.ws.rs.Produces)1