Examples with MapReduceInfoResponse - datawave.webservice.results.mr.MapReduceInfoResponse

Example 6 with MapReduceInfoResponse

use of datawave.webservice.results.mr.MapReduceInfoResponse in project datawave by NationalSecurityAgency.

the class MapReduceBean method getResultFiles.

/**
 * Returns the a tar file where each tar entry is a result file.
 *
 * @param jobId
 * @return tar file
 * @RequestHeader X-ProxiedEntitiesChain use when proxying request for user by specifying a chain of DNs of the identities to proxy
 * @RequestHeader X-ProxiedIssuersChain required when using X-ProxiedEntitiesChain, specify one issuer DN per subject DN listed in X-ProxiedEntitiesChain
 * @ResponseHeader X-OperationTimeInMS time spent on the server performing the operation, does not account for network or result serialization
 * @HTTP 200 success
 * @HTTP 404 if jobId is invalid or cannot be found
 * @HTTP 500
 */
@GET
@Produces("*/*")
@javax.ws.rs.Path("/{jobId}/getAllFiles")
@GZIP
public StreamingOutput getResultFiles(@Required("jobId") @PathParam("jobId") final String jobId, @Context HttpServletResponse httpResponse) {
    MapReduceInfoResponseList response = list(jobId);
    MapReduceInfoResponse result = response.getResults().get(0);
    String hdfs = result.getHdfs();
    String resultsDir = result.getResultsDirectory();
    final FileSystem fs = getFS(hdfs, response);
    final Path jobDirectory = new Path(resultsDir);
    final int jobDirectoryPathLength = jobDirectory.toUri().getPath().length();
    try {
        if (!fs.exists(jobDirectory) || !fs.getFileStatus(jobDirectory).isDirectory()) {
            NotFoundQueryException qe = new NotFoundQueryException(DatawaveErrorCode.JOB_DIRECTORY_NOT_FOUND, MessageFormat.format("{0} at path {1}", jobId, jobDirectory));
            response.addException(qe);
            throw new NotFoundException(qe, response);
        }
    } catch (IOException e1) {
        NotFoundQueryException qe = new NotFoundQueryException(DatawaveErrorCode.RESULT_DIRECTORY_ACCESS_ERROR, e1, MessageFormat.format("{0}", resultsDir));
        log.error(qe);
        response.addException(qe);
        throw new NotFoundException(qe, response);
    }
    // Get the children
    List<FileStatus> resultFiles = new ArrayList<>();
    try {
        // recurse through the directory to find all files
        Queue<FileStatus> fileQueue = new LinkedList<>();
        fileQueue.add(fs.getFileStatus(jobDirectory));
        while (!fileQueue.isEmpty()) {
            FileStatus currentFileStatus = fileQueue.remove();
            if (currentFileStatus.isFile()) {
                resultFiles.add(currentFileStatus);
            } else {
                FileStatus[] dirList = fs.listStatus(currentFileStatus.getPath());
                Collections.addAll(fileQueue, dirList);
            }
        }
    } catch (IOException e) {
        QueryException qe = new QueryException(DatawaveErrorCode.DFS_DIRECTORY_LISTING_ERROR, e, MessageFormat.format("directory: {0}", resultsDir));
        log.error(qe);
        response.addException(qe);
        throw new DatawaveWebApplicationException(qe, response);
    }
    String filename = jobId + "-files.tar";
    httpResponse.addHeader("Content-Disposition", "attachment; filename=\"" + filename + "\"");
    // Make final references for use in anonymous class
    final List<FileStatus> paths = resultFiles;
    return output -> {
        TarArchiveOutputStream tos = new TarArchiveOutputStream(output);
        tos.setLongFileMode(TarArchiveOutputStream.LONGFILE_POSIX);
        try {
            for (FileStatus fileStatus : paths) {
                if (fileStatus.isDirectory())
                    continue;
                // The archive entry will be started when the first (and possibly only) chunk is
                // written out. It is done this way because we need to know the size of the file
                // for the archive entry, and don't want to scan twice to get that info (once
                // here and again in streamFile).
                String fileName = fileStatus.getPath().toUri().getPath().substring(jobDirectoryPathLength + 1);
                TarArchiveEntry entry = new TarArchiveEntry(jobId + "/" + fileName, false);
                entry.setSize(fileStatus.getLen());
                tos.putArchiveEntry(entry);
                FSDataInputStream fis = fs.open(fileStatus.getPath());
                byte[] buf = new byte[BUFFER_SIZE];
                int read;
                try {
                    read = fis.read(buf);
                    while (read != -1) {
                        tos.write(buf, 0, read);
                        read = fis.read(buf);
                    }
                } catch (Exception e) {
                    log.error("Error writing result file to output", e);
                    throw new WebApplicationException(e);
                } finally {
                    try {
                        if (null != fis)
                            fis.close();
                    } catch (IOException e) {
                        log.error("Error closing FSDataInputStream for file: " + fileStatus.getPath().getName(), e);
                    }
                }
                tos.closeArchiveEntry();
            }
            tos.finish();
        } catch (Exception e) {
            log.error(e.getMessage(), e);
        } finally {
            try {
                if (null != tos)
                    tos.close();
            } catch (IOException ioe) {
                log.error("Error closing TarArchiveOutputStream", ioe);
            }
            try {
                if (null != fs)
                    fs.close();
            } catch (IOException ioe) {
                log.error("Error closing HDFS client", ioe);
            }
        }
    };
}

Also used : Path(org.apache.hadoop.fs.Path) StringUtils(org.apache.commons.lang.StringUtils) RolesAllowed(javax.annotation.security.RolesAllowed) Produces(javax.ws.rs.Produces) FileSystem(org.apache.hadoop.fs.FileSystem) FileStatus(org.apache.hadoop.fs.FileStatus) TarArchiveOutputStream(org.apache.commons.compress.archivers.tar.TarArchiveOutputStream) TransactionAttributeType(javax.ejb.TransactionAttributeType) DatawaveWebApplicationException(datawave.webservice.common.exception.DatawaveWebApplicationException) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) AccumuloConnectionFactory(datawave.webservice.common.connection.AccumuloConnectionFactory) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) MapReduceState(datawave.webservice.mr.state.MapReduceStatePersisterBean.MapReduceState) Resource(javax.annotation.Resource) ConnectionPoolsConfiguration(datawave.webservice.common.connection.config.ConnectionPoolsConfiguration) Set(java.util.Set) OozieJobConfiguration(datawave.webservice.mr.configuration.OozieJobConfiguration) MapReduceStatePersisterBean(datawave.webservice.mr.state.MapReduceStatePersisterBean) WebApplicationException(javax.ws.rs.WebApplicationException) Exclude(org.apache.deltaspike.core.api.exclude.Exclude) NotFoundQueryException(datawave.webservice.query.exception.NotFoundQueryException) GET(javax.ws.rs.GET) MapReduceJobConfiguration(datawave.webservice.mr.configuration.MapReduceJobConfiguration) MapReduceInfoResponseList(datawave.webservice.results.mr.MapReduceInfoResponseList) ArrayList(java.util.ArrayList) TarArchiveEntry(org.apache.commons.compress.archivers.tar.TarArchiveEntry) JSSESecurityDomain(org.jboss.security.JSSESecurityDomain) LocalBean(javax.ejb.LocalBean) EJBContext(javax.ejb.EJBContext) Auditor(datawave.webservice.common.audit.Auditor) Properties(java.util.Properties) NeedQueryLogicFactory(datawave.webservice.mr.configuration.NeedQueryLogicFactory) InputFormat(org.apache.hadoop.mapreduce.InputFormat) IOException(java.io.IOException) DatawaveErrorCode(datawave.webservice.query.exception.DatawaveErrorCode) QueryException(datawave.webservice.query.exception.QueryException) NeedAccumuloDetails(datawave.webservice.mr.configuration.NeedAccumuloDetails) TransactionManagementType(javax.ejb.TransactionManagementType) Required(datawave.annotation.Required) OozieClient(org.apache.oozie.client.OozieClient) BadRequestException(datawave.webservice.common.exception.BadRequestException) PrivateAuditConstants(datawave.webservice.common.audit.PrivateAuditConstants) Logger(org.apache.log4j.Logger) MultivaluedMapImpl(org.jboss.resteasy.specimpl.MultivaluedMapImpl) QueryParam(javax.ws.rs.QueryParam) DefaultValue(javax.ws.rs.DefaultValue) Path(org.apache.hadoop.fs.Path) NotFoundException(datawave.webservice.common.exception.NotFoundException) DELETE(javax.ws.rs.DELETE) Stateless(javax.ejb.Stateless) RunningJob(org.apache.hadoop.mapred.RunningJob) Context(javax.ws.rs.core.Context) Collection(java.util.Collection) GenericResponse(datawave.webservice.result.GenericResponse) UnauthorizedQueryException(datawave.webservice.query.exception.UnauthorizedQueryException) StreamingOutput(javax.ws.rs.core.StreamingOutput) NeedSecurityDomain(datawave.webservice.mr.configuration.NeedSecurityDomain) UUID(java.util.UUID) MapReduceConfiguration(datawave.webservice.mr.configuration.MapReduceConfiguration) MapReduceInfoResponse(datawave.webservice.results.mr.MapReduceInfoResponse) UnauthorizedException(datawave.webservice.common.exception.UnauthorizedException) List(java.util.List) Principal(java.security.Principal) Job(org.apache.hadoop.mapreduce.Job) QueryLogicFactory(datawave.webservice.query.logic.QueryLogicFactory) Entry(java.util.Map.Entry) Queue(java.util.Queue) DatawaveEmbeddedProjectStageHolder(datawave.configuration.DatawaveEmbeddedProjectStageHolder) MapReduceJobDescription(datawave.webservice.results.mr.MapReduceJobDescription) PathParam(javax.ws.rs.PathParam) QueryCache(datawave.webservice.query.cache.QueryCache) HashMap(java.util.HashMap) DeclareRoles(javax.annotation.security.DeclareRoles) MessageFormat(java.text.MessageFormat) Inject(javax.inject.Inject) HashSet(java.util.HashSet) AuditParameters(datawave.webservice.common.audit.AuditParameters) TransactionAttribute(javax.ejb.TransactionAttribute) BaseResponse(datawave.webservice.result.BaseResponse) NeedQueryPersister(datawave.webservice.mr.configuration.NeedQueryPersister) VoidResponse(datawave.webservice.result.VoidResponse) JobID(org.apache.hadoop.mapreduce.JobID) LinkedList(java.util.LinkedList) AuditBean(datawave.webservice.common.audit.AuditBean) BadRequestQueryException(datawave.webservice.query.exception.BadRequestQueryException) FormParam(javax.ws.rs.FormParam) Persister(datawave.webservice.query.factory.Persister) TransactionManagement(javax.ejb.TransactionManagement) POST(javax.ws.rs.POST) ServerPrincipal(datawave.security.system.ServerPrincipal) HttpServletResponse(javax.servlet.http.HttpServletResponse) AuthorizationsUtil(datawave.security.util.AuthorizationsUtil) NeedCallerDetails(datawave.webservice.mr.configuration.NeedCallerDetails) SpringBean(datawave.configuration.spring.SpringBean) JobExecution(datawave.webservice.results.mr.JobExecution) JobConf(org.apache.hadoop.mapred.JobConf) MultivaluedMap(javax.ws.rs.core.MultivaluedMap) DatawavePrincipal(datawave.security.authorization.DatawavePrincipal) NeedQueryCache(datawave.webservice.mr.configuration.NeedQueryCache) SecurityMarking(datawave.marking.SecurityMarking) NeedAccumuloConnectionFactory(datawave.webservice.mr.configuration.NeedAccumuloConnectionFactory) OozieJobConstants(datawave.webservice.mr.configuration.OozieJobConstants) PUT(javax.ws.rs.PUT) Collections(java.util.Collections) GZIP(org.jboss.resteasy.annotations.GZIP) JobClient(org.apache.hadoop.mapred.JobClient) FileStatus(org.apache.hadoop.fs.FileStatus) DatawaveWebApplicationException(datawave.webservice.common.exception.DatawaveWebApplicationException) WebApplicationException(javax.ws.rs.WebApplicationException) ArrayList(java.util.ArrayList) NotFoundException(datawave.webservice.common.exception.NotFoundException) MapReduceInfoResponseList(datawave.webservice.results.mr.MapReduceInfoResponseList) IOException(java.io.IOException) NotFoundQueryException(datawave.webservice.query.exception.NotFoundQueryException) LinkedList(java.util.LinkedList) TarArchiveEntry(org.apache.commons.compress.archivers.tar.TarArchiveEntry) DatawaveWebApplicationException(datawave.webservice.common.exception.DatawaveWebApplicationException) WebApplicationException(javax.ws.rs.WebApplicationException) NotFoundQueryException(datawave.webservice.query.exception.NotFoundQueryException) IOException(java.io.IOException) QueryException(datawave.webservice.query.exception.QueryException) BadRequestException(datawave.webservice.common.exception.BadRequestException) NotFoundException(datawave.webservice.common.exception.NotFoundException) UnauthorizedQueryException(datawave.webservice.query.exception.UnauthorizedQueryException) UnauthorizedException(datawave.webservice.common.exception.UnauthorizedException) BadRequestQueryException(datawave.webservice.query.exception.BadRequestQueryException) NotFoundQueryException(datawave.webservice.query.exception.NotFoundQueryException) QueryException(datawave.webservice.query.exception.QueryException) UnauthorizedQueryException(datawave.webservice.query.exception.UnauthorizedQueryException) BadRequestQueryException(datawave.webservice.query.exception.BadRequestQueryException) MapReduceInfoResponse(datawave.webservice.results.mr.MapReduceInfoResponse) FileSystem(org.apache.hadoop.fs.FileSystem) DatawaveWebApplicationException(datawave.webservice.common.exception.DatawaveWebApplicationException) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) TarArchiveOutputStream(org.apache.commons.compress.archivers.tar.TarArchiveOutputStream) Produces(javax.ws.rs.Produces) GET(javax.ws.rs.GET) GZIP(org.jboss.resteasy.annotations.GZIP)

Example 7 with MapReduceInfoResponse

use of datawave.webservice.results.mr.MapReduceInfoResponse in project datawave by NationalSecurityAgency.

the class MapReduceBean method remove.

/**
 * Removes the MapReduce entry and associated data
 *
 * @param jobId
 * @return datawave.webservice.result.VoidResponse
 * @RequestHeader X-ProxiedEntitiesChain use when proxying request for user by specifying a chain of DNs of the identities to proxy
 * @RequestHeader X-ProxiedIssuersChain required when using X-ProxiedEntitiesChain, specify one issuer DN per subject DN listed in X-ProxiedEntitiesChain
 * @ResponseHeader X-OperationTimeInMS time spent on the server performing the operation, does not account for network or result serialization
 * @HTTP 200 success
 * @HTTP 404 if jobId is invalid or cannot be found
 * @HTTP 500 error removing the job
 */
@DELETE
@Produces({ "application/xml", "text/xml", "application/json", "text/yaml", "text/x-yaml", "application/x-yaml", "application/x-protobuf", "application/x-protostuff" })
@javax.ws.rs.Path("/{jobId}/remove")
@GZIP
public VoidResponse remove(@PathParam("jobId") String jobId) {
    VoidResponse response = new VoidResponse();
    // Call cancel which will kill any running jobs and remove the results directory in HDFS.
    cancel(jobId);
    // Remove the working directory from HDFS
    MapReduceInfoResponseList list = list(jobId);
    MapReduceInfoResponse result = list.getResults().get(0);
    String hdfs = result.getHdfs();
    String wdir = result.getWorkingDirectory();
    Path p = new Path(wdir);
    try {
        FileSystem fs = getFS(hdfs, response);
        if (fs.exists(p) && !fs.delete(p, true)) {
            NotFoundQueryException qe = new NotFoundQueryException(DatawaveErrorCode.WORKING_DIRECTORY_DELETE_ERROR, MessageFormat.format("{0}", wdir));
            log.error(qe);
            response.addException(qe);
            throw new NotFoundException(qe, response);
        }
    } catch (IOException e) {
        NotFoundQueryException qe = new NotFoundQueryException(DatawaveErrorCode.WORKING_DIRECTORY_DELETE_ERROR, e, MessageFormat.format("{0}", wdir));
        log.error(qe);
        throw new NotFoundException(qe, response);
    }
    // Remove any persisted state information
    try {
        mapReduceState.remove(jobId);
    } catch (QueryException e) {
        log.error("Error removing job state information", e);
        response.addException(e.getBottomQueryException());
        throw new DatawaveWebApplicationException(e, response);
    }
    return response;
}

Also used : Path(org.apache.hadoop.fs.Path) NotFoundQueryException(datawave.webservice.query.exception.NotFoundQueryException) QueryException(datawave.webservice.query.exception.QueryException) UnauthorizedQueryException(datawave.webservice.query.exception.UnauthorizedQueryException) BadRequestQueryException(datawave.webservice.query.exception.BadRequestQueryException) MapReduceInfoResponse(datawave.webservice.results.mr.MapReduceInfoResponse) VoidResponse(datawave.webservice.result.VoidResponse) FileSystem(org.apache.hadoop.fs.FileSystem) NotFoundException(datawave.webservice.common.exception.NotFoundException) DatawaveWebApplicationException(datawave.webservice.common.exception.DatawaveWebApplicationException) MapReduceInfoResponseList(datawave.webservice.results.mr.MapReduceInfoResponseList) IOException(java.io.IOException) NotFoundQueryException(datawave.webservice.query.exception.NotFoundQueryException) DELETE(javax.ws.rs.DELETE) Produces(javax.ws.rs.Produces) GZIP(org.jboss.resteasy.annotations.GZIP)

Example 8 with MapReduceInfoResponse

use of datawave.webservice.results.mr.MapReduceInfoResponse in project datawave by NationalSecurityAgency.

the class MapReduceStatePersisterBean method populateResponse.

private MapReduceInfoResponse populateResponse(Iterable<Entry<Key, Value>> data) throws IOException {
    MapReduceInfoResponse result = null;
    String hdfs = null;
    TreeSet<JobExecution> jobs = null;
    for (Entry<Key, Value> entry : data) {
        if (null == result)
            result = new MapReduceInfoResponse();
        result.setId(entry.getKey().getRow().toString());
        String colq = entry.getKey().getColumnQualifier().toString();
        if (colq.equals(WORKING_DIRECTORY)) {
            result.setWorkingDirectory(new String(entry.getValue().get()));
        } else if (colq.equals(RESULTS_LOCATION)) {
            if (null != entry.getValue() && entry.getValue().get().length > 0) {
                result.setResultsDirectory(new String(entry.getValue().get()));
            }
        } else if (colq.equals(PARAMS)) {
            result.setRuntimeParameters(new String(entry.getValue().get()));
        } else if (colq.equals(HDFS)) {
            result.setHdfs(new String(entry.getValue().get()));
            hdfs = new String(entry.getValue().get());
        } else if (colq.equals(JT)) {
            result.setJobTracker(new String(entry.getValue().get()));
        } else if (colq.startsWith(STATE)) {
            if (null == jobs)
                jobs = new TreeSet<>();
            JobExecution job = new JobExecution();
            job.setMapReduceJobId(colq.substring(STATE.length() + 1));
            job.setState(new String(entry.getValue().get()));
            job.setTimestamp(entry.getKey().getTimestamp());
            jobs.add(job);
        } else if (colq.equals(NAME)) {
            result.setJobName(new String(entry.getValue().get()));
        }
    }
    if (null != jobs)
        result.setJobExecutions(new ArrayList<>(jobs));
    try {
        if (null != hdfs && !hdfs.isEmpty() && null != result.getResultsDirectory()) {
            Configuration conf = new Configuration();
            conf.set("fs.defaultFS", hdfs);
            // If we can't talk to HDFS then I want to fail fast, default is to retry 10 times.
            conf.setInt("ipc.client.connect.max.retries", 0);
            Path resultDirectoryPath = new Path(result.getResultsDirectory());
            int resultDirectoryPathLength = resultDirectoryPath.toUri().getPath().length();
            FileSystem fs = FileSystem.get(resultDirectoryPath.toUri(), conf);
            List<FileStatus> stats = new ArrayList<>();
            // recurse through the directory to find all files
            Queue<FileStatus> fileQueue = new LinkedList<>();
            fileQueue.add(fs.getFileStatus(resultDirectoryPath));
            while (!fileQueue.isEmpty()) {
                FileStatus currentFileStatus = fileQueue.remove();
                if (currentFileStatus.isFile()) {
                    stats.add(currentFileStatus);
                } else {
                    FileStatus[] dirList = fs.listStatus(currentFileStatus.getPath());
                    Collections.addAll(fileQueue, dirList);
                }
            }
            if (!stats.isEmpty()) {
                List<ResultFile> resultFiles = new ArrayList<>();
                for (FileStatus f : stats) {
                    if (!f.isDirectory()) {
                        ResultFile rf = new ResultFile();
                        String fullPath = f.getPath().toUri().getPath().substring(resultDirectoryPathLength + 1);
                        rf.setFileName(fullPath);
                        rf.setLength(f.getLen());
                        resultFiles.add(rf);
                    }
                }
                result.setResultFiles(resultFiles);
            }
        }
    } catch (IOException e) {
        log.warn("Unable to populate result files portion of response", e);
    }
    return result;
}

Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) IOException(java.io.IOException) ResultFile(datawave.webservice.results.mr.ResultFile) LinkedList(java.util.LinkedList) JobExecution(datawave.webservice.results.mr.JobExecution) MapReduceInfoResponse(datawave.webservice.results.mr.MapReduceInfoResponse) FileSystem(org.apache.hadoop.fs.FileSystem) Value(org.apache.accumulo.core.data.Value) Key(org.apache.accumulo.core.data.Key)

Example 9 with MapReduceInfoResponse

use of datawave.webservice.results.mr.MapReduceInfoResponse in project datawave by NationalSecurityAgency.

the class MapReduceStatePersisterTest method testFindById.

@Test
public void testFindById() throws Exception {
    // create the initial entry
    testPersistentCreate();
    PowerMock.resetAll();
    EasyMock.expect(ctx.getCallerPrincipal()).andReturn(principal);
    HashMap<String, String> trackingMap = new HashMap<>();
    expect(connectionFactory.getTrackingMap(EasyMock.anyObject())).andReturn(trackingMap);
    expect(connectionFactory.getConnection(EasyMock.eq(AccumuloConnectionFactory.Priority.ADMIN), EasyMock.eq(trackingMap))).andReturn(connection);
    connectionFactory.returnConnection(connection);
    replayAll();
    MapReduceInfoResponseList result = bean.findById(id);
    verifyAll();
    assertEquals(1, result.getResults().size());
    assertNull(result.getExceptions());
    MapReduceInfoResponse response = result.getResults().get(0);
    assertEquals(id, response.getId());
    assertEquals(hdfs, response.getHdfs());
    assertEquals(jt, response.getJobTracker());
    assertEquals(jobName, response.getJobName());
    assertEquals(workingDirectory, response.getWorkingDirectory());
    assertEquals(resultsDirectory, response.getResultsDirectory());
    assertEquals(runtimeParameters, response.getRuntimeParameters());
    assertEquals(1, response.getJobExecutions().size());
    assertEquals(mapReduceJobId, response.getJobExecutions().get(0).getMapReduceJobId());
    assertEquals(MapReduceState.STARTED.toString(), response.getJobExecutions().get(0).getState());
}

Also used : HashMap(java.util.HashMap) MapReduceInfoResponse(datawave.webservice.results.mr.MapReduceInfoResponse) MapReduceInfoResponseList(datawave.webservice.results.mr.MapReduceInfoResponseList) Test(org.junit.Test)

Example 10 with MapReduceInfoResponse

use of datawave.webservice.results.mr.MapReduceInfoResponse in project datawave by NationalSecurityAgency.

the class MapReduceBean method cancel.

/**
 * Cancels any MapReduce jobs with the specified jobId and clears out the results directory
 *
 * @param jobId
 * @return {@code datawave.webservice.result.GenericResponse<Boolean>}
 * @RequestHeader X-ProxiedEntitiesChain use when proxying request for user by specifying a chain of DNs of the identities to proxy
 * @RequestHeader X-ProxiedIssuersChain required when using X-ProxiedEntitiesChain, specify one issuer DN per subject DN listed in X-ProxiedEntitiesChain
 * @ResponseHeader X-OperationTimeInMS time spent on the server performing the operation, does not account for network or result serialization
 * @HTTP 200 success
 * @HTTP 404 if jobId is invalid or cannot be found
 * @HTTP 500 error killing the job
 */
@PUT
@Produces({ "application/xml", "text/xml", "application/json", "text/yaml", "text/x-yaml", "application/x-yaml", "application/x-protobuf", "application/x-protostuff" })
@javax.ws.rs.Path("/{jobId}/cancel")
@GZIP
public GenericResponse<Boolean> cancel(@PathParam("jobId") String jobId) {
    GenericResponse<Boolean> response = new GenericResponse<>();
    // Find all potential running jobs
    MapReduceInfoResponseList list = mapReduceState.findById(jobId);
    List<String> jobIdsToKill = new ArrayList<>();
    // Should contain zero or one bulk result job
    if (list.getResults().isEmpty()) {
        NotFoundQueryException qe = new NotFoundQueryException(DatawaveErrorCode.NO_MAPREDUCE_OBJECT_MATCH);
        response.addException(qe);
        throw new NotFoundException(qe, response);
    } else if (list.getResults().size() > 1) {
        NotFoundQueryException qe = new NotFoundQueryException(DatawaveErrorCode.TOO_MANY_MAPREDUCE_OBJECT_MATCHES);
        response.addException(qe);
        throw new NotFoundException(qe, response);
    } else {
        MapReduceInfoResponse thisJob = list.getResults().get(0);
        // Get all the executions for this job
        String prevId = null;
        String prevState = null;
        // add it to the kill list
        for (JobExecution ex : thisJob.getJobExecutions()) {
            if (prevId != null) {
                if (prevState.equals(MapReduceState.STARTED.toString()) && !ex.getMapReduceJobId().equals(prevId))
                    jobIdsToKill.add(prevId);
            }
            prevId = ex.getMapReduceJobId();
            prevState = ex.getState();
        }
        // Get the last one
        if (MapReduceState.STARTED.toString().equals(prevState))
            jobIdsToKill.add(prevId);
        FileSystem hdfs = null;
        try {
            hdfs = getFS(thisJob.getHdfs(), response);
            Path resultsDir = new Path(thisJob.getResultsDirectory());
            hdfs.getConf().set("mapreduce.jobtracker.address", thisJob.getJobTracker());
            // Create a Job object
            try (JobClient job = new JobClient(new JobConf(hdfs.getConf()))) {
                for (String killId : jobIdsToKill) {
                    try {
                        JobID jid = JobID.forName(killId);
                        RunningJob rj = job.getJob(new org.apache.hadoop.mapred.JobID(jid.getJtIdentifier(), jid.getId()));
                        // job.getJob(jid);
                        if (null != rj)
                            rj.killJob();
                        else
                            mapReduceState.updateState(killId, MapReduceState.KILLED);
                    } catch (IOException | QueryException e) {
                        QueryException qe = new QueryException(DatawaveErrorCode.MAPREDUCE_JOB_KILL_ERROR, e, MessageFormat.format("job_id: {0}", killId));
                        log.error(qe);
                        response.addException(qe.getBottomQueryException());
                        throw new DatawaveWebApplicationException(qe, response);
                    }
                }
            }
            // Delete the contents of the results directory
            if (hdfs.exists(resultsDir) && !hdfs.delete(resultsDir, true)) {
                QueryException qe = new QueryException(DatawaveErrorCode.MAPRED_RESULTS_DELETE_ERROR, MessageFormat.format("directory: {0}", resultsDir.toString()));
                log.error(qe);
                response.addException(qe);
                throw new DatawaveWebApplicationException(qe, response);
            }
            response.setResult(true);
            return response;
        } catch (IOException e) {
            QueryException qe = new QueryException(DatawaveErrorCode.JOBTRACKER_CONNECTION_ERROR, e, MessageFormat.format("JobTracker: {0}", thisJob.getJobTracker()));
            log.error(qe);
            response.addException(qe);
            throw new DatawaveWebApplicationException(qe, response);
        } finally {
            if (null != hdfs) {
                try {
                    hdfs.close();
                } catch (IOException e) {
                    log.error("Error closing HDFS client", e);
                }
            }
        }
    }
}

Also used : Path(org.apache.hadoop.fs.Path) GenericResponse(datawave.webservice.result.GenericResponse) ArrayList(java.util.ArrayList) NotFoundException(datawave.webservice.common.exception.NotFoundException) MapReduceInfoResponseList(datawave.webservice.results.mr.MapReduceInfoResponseList) IOException(java.io.IOException) JobClient(org.apache.hadoop.mapred.JobClient) NotFoundQueryException(datawave.webservice.query.exception.NotFoundQueryException) JobExecution(datawave.webservice.results.mr.JobExecution) NotFoundQueryException(datawave.webservice.query.exception.NotFoundQueryException) QueryException(datawave.webservice.query.exception.QueryException) UnauthorizedQueryException(datawave.webservice.query.exception.UnauthorizedQueryException) BadRequestQueryException(datawave.webservice.query.exception.BadRequestQueryException) MapReduceInfoResponse(datawave.webservice.results.mr.MapReduceInfoResponse) FileSystem(org.apache.hadoop.fs.FileSystem) RunningJob(org.apache.hadoop.mapred.RunningJob) DatawaveWebApplicationException(datawave.webservice.common.exception.DatawaveWebApplicationException) JobConf(org.apache.hadoop.mapred.JobConf) JobID(org.apache.hadoop.mapreduce.JobID) Produces(javax.ws.rs.Produces) GZIP(org.jboss.resteasy.annotations.GZIP) PUT(javax.ws.rs.PUT)

Aggregations

MapReduceInfoResponse (datawave.webservice.results.mr.MapReduceInfoResponse)10 MapReduceInfoResponseList (datawave.webservice.results.mr.MapReduceInfoResponseList)9 NotFoundQueryException (datawave.webservice.query.exception.NotFoundQueryException)8 IOException (java.io.IOException)8 QueryException (datawave.webservice.query.exception.QueryException)7 NotFoundException (datawave.webservice.common.exception.NotFoundException)5 Produces (javax.ws.rs.Produces)5 GZIP (org.jboss.resteasy.annotations.GZIP)5 DatawavePrincipal (datawave.security.authorization.DatawavePrincipal)4 DatawaveWebApplicationException (datawave.webservice.common.exception.DatawaveWebApplicationException)4 BadRequestQueryException (datawave.webservice.query.exception.BadRequestQueryException)4 UnauthorizedQueryException (datawave.webservice.query.exception.UnauthorizedQueryException)4 JobExecution (datawave.webservice.results.mr.JobExecution)4 ArrayList (java.util.ArrayList)4 FileSystem (org.apache.hadoop.fs.FileSystem)4 Path (org.apache.hadoop.fs.Path)4 GenericResponse (datawave.webservice.result.GenericResponse)3 Principal (java.security.Principal)3 HashMap (java.util.HashMap)3 PUT (javax.ws.rs.PUT)3