Search in sources :

Example 1 with MapReduceInfoResponse

use of datawave.webservice.results.mr.MapReduceInfoResponse in project datawave by NationalSecurityAgency.

the class MapReduceStatePersisterBean method find.

/**
 * Returns all MapReduce jobs for the current user
 *
 * @return list of map reduce information
 */
public MapReduceInfoResponseList find() {
    // Find out who/what called this method
    Principal p = ctx.getCallerPrincipal();
    String sid = p.getName();
    Set<Authorizations> auths = new HashSet<>();
    if (p instanceof DatawavePrincipal) {
        DatawavePrincipal dp = (DatawavePrincipal) p;
        sid = dp.getShortName();
        for (Collection<String> cbAuths : dp.getAuthorizations()) auths.add(new Authorizations(cbAuths.toArray(new String[cbAuths.size()])));
    }
    log.trace(sid + " has authorizations " + auths);
    MapReduceInfoResponseList result = new MapReduceInfoResponseList();
    Connector c = null;
    try {
        Map<String, String> trackingMap = connectionFactory.getTrackingMap(Thread.currentThread().getStackTrace());
        c = connectionFactory.getConnection(AccumuloConnectionFactory.Priority.ADMIN, trackingMap);
        tableCheck(c);
        try (Scanner scanner = ScannerHelper.createScanner(c, TABLE_NAME, auths)) {
            scanner.fetchColumnFamily(new Text(sid));
            // We need to create a response for each job
            String previousRow = sid;
            Map<Key, Value> batch = new HashMap<>();
            for (Entry<Key, Value> entry : scanner) {
                if (!previousRow.equals(entry.getKey().getRow().toString()) && !batch.isEmpty()) {
                    MapReduceInfoResponse response = populateResponse(batch.entrySet());
                    if (null != response)
                        result.getResults().add(response);
                    batch.clear();
                } else {
                    batch.put(entry.getKey(), entry.getValue());
                }
                previousRow = entry.getKey().getRow().toString();
            }
            if (!batch.isEmpty()) {
                MapReduceInfoResponse response = populateResponse(batch.entrySet());
                if (null != response)
                    result.getResults().add(response);
                batch.clear();
            }
            return result;
        }
    } catch (IOException ioe) {
        QueryException qe = new QueryException(DatawaveErrorCode.RESPONSE_POPULATION_ERROR, ioe);
        log.error(qe);
        result.addException(qe);
        return result;
    } catch (Exception e) {
        QueryException qe = new QueryException(DatawaveErrorCode.QUERY_SETUP_ERROR, e);
        log.error(qe);
        result.addException(qe.getBottomQueryException());
        return result;
    } finally {
        try {
            connectionFactory.returnConnection(c);
        } catch (Exception e) {
            log.error("Error returning connection to connection pool", e);
        }
    }
}
Also used : Connector(org.apache.accumulo.core.client.Connector) Scanner(org.apache.accumulo.core.client.Scanner) Authorizations(org.apache.accumulo.core.security.Authorizations) HashMap(java.util.HashMap) Text(org.apache.hadoop.io.Text) MapReduceInfoResponseList(datawave.webservice.results.mr.MapReduceInfoResponseList) IOException(java.io.IOException) DatawavePrincipal(datawave.security.authorization.DatawavePrincipal) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) NotFoundQueryException(datawave.webservice.query.exception.NotFoundQueryException) TableExistsException(org.apache.accumulo.core.client.TableExistsException) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) IOException(java.io.IOException) AccumuloException(org.apache.accumulo.core.client.AccumuloException) QueryException(datawave.webservice.query.exception.QueryException) NotFoundQueryException(datawave.webservice.query.exception.NotFoundQueryException) QueryException(datawave.webservice.query.exception.QueryException) MapReduceInfoResponse(datawave.webservice.results.mr.MapReduceInfoResponse) Value(org.apache.accumulo.core.data.Value) Principal(java.security.Principal) DatawavePrincipal(datawave.security.authorization.DatawavePrincipal) Key(org.apache.accumulo.core.data.Key) HashSet(java.util.HashSet)

Example 2 with MapReduceInfoResponse

use of datawave.webservice.results.mr.MapReduceInfoResponse in project datawave by NationalSecurityAgency.

the class MapReduceStatePersisterBean method findById.

/**
 * Information for a specific map reduce id
 *
 * @param id
 *            map reduce id
 * @return list of map reduce information
 */
public MapReduceInfoResponseList findById(String id) {
    // Find out who/what called this method
    Principal p = ctx.getCallerPrincipal();
    String sid = p.getName();
    Set<Authorizations> auths = new HashSet<>();
    if (p instanceof DatawavePrincipal) {
        DatawavePrincipal dp = (DatawavePrincipal) p;
        sid = dp.getShortName();
        for (Collection<String> cbAuths : dp.getAuthorizations()) auths.add(new Authorizations(cbAuths.toArray(new String[cbAuths.size()])));
    }
    log.trace(sid + " has authorizations " + auths);
    MapReduceInfoResponseList result = new MapReduceInfoResponseList();
    Connector c = null;
    try {
        Map<String, String> trackingMap = connectionFactory.getTrackingMap(Thread.currentThread().getStackTrace());
        c = connectionFactory.getConnection(AccumuloConnectionFactory.Priority.ADMIN, trackingMap);
        tableCheck(c);
        try (Scanner scanner = ScannerHelper.createScanner(c, TABLE_NAME, auths)) {
            Range range = new Range(id);
            scanner.setRange(range);
            scanner.fetchColumnFamily(new Text(sid));
            MapReduceInfoResponse response = populateResponse(scanner);
            if (null != response)
                result.getResults().add(response);
            return result;
        }
    } catch (IOException ioe) {
        QueryException qe = new QueryException(DatawaveErrorCode.RESPONSE_POPULATION_ERROR, ioe);
        log.error(qe);
        result.addException(qe);
        return result;
    } catch (Exception e) {
        QueryException qe = new QueryException(DatawaveErrorCode.QUERY_SETUP_ERROR, e);
        log.error(qe);
        result.addException(qe.getBottomQueryException());
        return result;
    } finally {
        try {
            connectionFactory.returnConnection(c);
        } catch (Exception e) {
            log.error("Error returning connection to connection pool", e);
        }
    }
}
Also used : Connector(org.apache.accumulo.core.client.Connector) Scanner(org.apache.accumulo.core.client.Scanner) Authorizations(org.apache.accumulo.core.security.Authorizations) Text(org.apache.hadoop.io.Text) MapReduceInfoResponseList(datawave.webservice.results.mr.MapReduceInfoResponseList) IOException(java.io.IOException) Range(org.apache.accumulo.core.data.Range) DatawavePrincipal(datawave.security.authorization.DatawavePrincipal) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) NotFoundQueryException(datawave.webservice.query.exception.NotFoundQueryException) TableExistsException(org.apache.accumulo.core.client.TableExistsException) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) IOException(java.io.IOException) AccumuloException(org.apache.accumulo.core.client.AccumuloException) QueryException(datawave.webservice.query.exception.QueryException) NotFoundQueryException(datawave.webservice.query.exception.NotFoundQueryException) QueryException(datawave.webservice.query.exception.QueryException) MapReduceInfoResponse(datawave.webservice.results.mr.MapReduceInfoResponse) Principal(java.security.Principal) DatawavePrincipal(datawave.security.authorization.DatawavePrincipal) HashSet(java.util.HashSet)

Example 3 with MapReduceInfoResponse

use of datawave.webservice.results.mr.MapReduceInfoResponse in project datawave by NationalSecurityAgency.

the class MapReduceStatePersisterBean method remove.

/**
 * Removes Bulk Results information and related directory in HDFS for the given job id.
 *
 * @param id
 *            bulk results id
 */
public void remove(String id) throws QueryException {
    // Find out who/what called this method
    Principal p = ctx.getCallerPrincipal();
    String sid = p.getName();
    if (p instanceof DatawavePrincipal) {
        DatawavePrincipal dp = (DatawavePrincipal) p;
        sid = dp.getShortName();
    }
    MapReduceInfoResponseList results = findById(id);
    if (null == results)
        throw new NotFoundQueryException(DatawaveErrorCode.NO_QUERY_OBJECT_MATCH);
    if (results.getResults().size() > 1)
        throw new NotFoundQueryException(DatawaveErrorCode.TOO_MANY_QUERY_OBJECT_MATCHES);
    else {
        MapReduceInfoResponse r = results.getResults().get(0);
        List<Mutation> indexEntries = new ArrayList<>();
        Mutation m = new Mutation(r.getId());
        m.putDelete(sid, WORKING_DIRECTORY);
        m.putDelete(sid, HDFS);
        m.putDelete(sid, JT);
        m.putDelete(sid, NAME);
        m.putDelete(sid, RESULTS_LOCATION);
        m.putDelete(sid, PARAMS);
        for (JobExecution job : r.getJobExecutions()) {
            m.putDelete(sid, STATE + NULL + job.getMapReduceJobId());
            Mutation i = new Mutation(job.getMapReduceJobId());
            i.putDelete(sid, r.getId());
            indexEntries.add(i);
        }
        Connector c = null;
        try {
            Map<String, String> trackingMap = connectionFactory.getTrackingMap(Thread.currentThread().getStackTrace());
            c = connectionFactory.getConnection(AccumuloConnectionFactory.Priority.ADMIN, trackingMap);
            tableCheck(c);
            // using BatchWriter instead of MultiTableBatchWriter because Mock CB does not support
            // MultiTableBatchWriter
            BatchWriterConfig bwCfg = new BatchWriterConfig().setMaxLatency(10, TimeUnit.SECONDS).setMaxMemory(10240L).setMaxWriteThreads(1);
            try (BatchWriter tableWriter = c.createBatchWriter(TABLE_NAME, bwCfg);
                BatchWriter indexWriter = c.createBatchWriter(INDEX_TABLE_NAME, bwCfg)) {
                tableWriter.addMutation(m);
                for (Mutation i : indexEntries) indexWriter.addMutation(i);
            }
        } catch (RuntimeException re) {
            throw re;
        } catch (Exception e) {
            QueryException qe = new QueryException(DatawaveErrorCode.JOB_STATE_UPDATE_ERROR, e, MessageFormat.format("job_id: {0}", id));
            log.error(qe);
            throw new QueryException(qe);
        } finally {
            try {
                connectionFactory.returnConnection(c);
            } catch (Exception e) {
                log.error("Error creating query", e);
            }
        }
    }
}
Also used : Connector(org.apache.accumulo.core.client.Connector) ArrayList(java.util.ArrayList) MapReduceInfoResponseList(datawave.webservice.results.mr.MapReduceInfoResponseList) DatawavePrincipal(datawave.security.authorization.DatawavePrincipal) NotFoundQueryException(datawave.webservice.query.exception.NotFoundQueryException) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) NotFoundQueryException(datawave.webservice.query.exception.NotFoundQueryException) TableExistsException(org.apache.accumulo.core.client.TableExistsException) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) IOException(java.io.IOException) AccumuloException(org.apache.accumulo.core.client.AccumuloException) QueryException(datawave.webservice.query.exception.QueryException) JobExecution(datawave.webservice.results.mr.JobExecution) NotFoundQueryException(datawave.webservice.query.exception.NotFoundQueryException) QueryException(datawave.webservice.query.exception.QueryException) MapReduceInfoResponse(datawave.webservice.results.mr.MapReduceInfoResponse) BatchWriterConfig(org.apache.accumulo.core.client.BatchWriterConfig) Mutation(org.apache.accumulo.core.data.Mutation) BatchWriter(org.apache.accumulo.core.client.BatchWriter) Principal(java.security.Principal) DatawavePrincipal(datawave.security.authorization.DatawavePrincipal)

Example 4 with MapReduceInfoResponse

use of datawave.webservice.results.mr.MapReduceInfoResponse in project datawave by NationalSecurityAgency.

the class MapReduceBean method restart.

/**
 * Kill any job running associated with the BulkResults id and start a new job.
 *
 * @param jobId
 * @return {@code datawave.webservice.result.GenericResponse<String>}
 * @RequestHeader X-ProxiedEntitiesChain use when proxying request for user by specifying a chain of DNs of the identities to proxy
 * @RequestHeader X-ProxiedIssuersChain required when using X-ProxiedEntitiesChain, specify one issuer DN per subject DN listed in X-ProxiedEntitiesChain
 * @ResponseHeader X-OperationTimeInMS time spent on the server performing the operation, does not account for network or result serialization
 * @HTTP 200 success
 * @HTTP 404 if jobId is invalid or cannot be found
 * @HTTP 500 error restarting the job
 */
@PUT
@Produces({ "application/xml", "text/xml", "application/json", "text/yaml", "text/x-yaml", "application/x-yaml", "application/x-protobuf", "application/x-protostuff" })
@javax.ws.rs.Path("/{jobId}/restart")
@GZIP
public GenericResponse<String> restart(@PathParam("jobId") String jobId) {
    GenericResponse<String> response = new GenericResponse<>();
    // Find all potential running jobs
    MapReduceInfoResponseList list = mapReduceState.findById(jobId);
    // Should contain zero or one job
    if (list.getResults().isEmpty()) {
        NotFoundQueryException qe = new NotFoundQueryException(DatawaveErrorCode.NO_MAPREDUCE_OBJECT_MATCH);
        response.addException(qe);
        throw new NotFoundException(qe, response);
    } else if (list.getResults().size() > 1) {
        NotFoundQueryException qe = new NotFoundQueryException(DatawaveErrorCode.TOO_MANY_MAPREDUCE_OBJECT_MATCHES);
        response.addException(qe);
        throw new NotFoundException(qe, response);
    } else {
        MapReduceInfoResponse thisJob = list.getResults().get(0);
        // Call cancel for this job. This will kill any running jobs and remove the results directory
        cancel(jobId);
        // Now re-submit this job after adding the JOB_ID to the runtime parameters to signal that this job has been restarted
        String jobName = thisJob.getJobName();
        // Now call submit
        return submit(jobName, thisJob.getRuntimeParameters() + PARAMETER_SEPARATOR + JOB_ID + PARAMETER_NAME_VALUE_SEPARATOR + jobId);
    }
}
Also used : GenericResponse(datawave.webservice.result.GenericResponse) MapReduceInfoResponse(datawave.webservice.results.mr.MapReduceInfoResponse) NotFoundException(datawave.webservice.common.exception.NotFoundException) MapReduceInfoResponseList(datawave.webservice.results.mr.MapReduceInfoResponseList) NotFoundQueryException(datawave.webservice.query.exception.NotFoundQueryException) Produces(javax.ws.rs.Produces) GZIP(org.jboss.resteasy.annotations.GZIP) PUT(javax.ws.rs.PUT)

Example 5 with MapReduceInfoResponse

use of datawave.webservice.results.mr.MapReduceInfoResponse in project datawave by NationalSecurityAgency.

the class MapReduceBean method getResultFile.

/**
 * Returns the contents of a result file. The list of resulting output files from the MapReduce job is listed in the response object of the status
 * operation.
 *
 * @param jobId
 * @param fileName
 * @return file contents
 * @RequestHeader X-ProxiedEntitiesChain use when proxying request for user by specifying a chain of DNs of the identities to proxy
 * @RequestHeader X-ProxiedIssuersChain required when using X-ProxiedEntitiesChain, specify one issuer DN per subject DN listed in X-ProxiedEntitiesChain
 * @ResponseHeader X-OperationTimeInMS time spent on the server performing the operation, does not account for network or result serialization
 * @HTTP 200 success
 * @HTTP 404 if jobId is invalid or cannot be found
 * @HTTP 500
 */
@GET
@Produces("*/*")
@javax.ws.rs.Path("/{jobId}/getFile/{fileName}")
@GZIP
public StreamingOutput getResultFile(@PathParam("jobId") String jobId, @PathParam("fileName") String fileName) {
    MapReduceInfoResponseList response = list(jobId);
    MapReduceInfoResponse result = response.getResults().get(0);
    String hdfs = result.getHdfs();
    String resultsDir = result.getResultsDirectory();
    final FileSystem fs = getFS(hdfs, response);
    final Path resultFile = new Path(resultsDir, fileName);
    FSDataInputStream fis;
    try {
        if (!fs.exists(resultFile) || !fs.isFile(resultFile)) {
            NotFoundQueryException qe = new NotFoundQueryException(DatawaveErrorCode.FILE_NOT_FOUND, MessageFormat.format("{0} at path {1}", fileName, resultsDir));
            response.addException(qe);
            throw new NotFoundException(qe, response);
        }
        fis = fs.open(resultFile);
    } catch (IOException e1) {
        NotFoundQueryException qe = new NotFoundQueryException(DatawaveErrorCode.RESULT_FILE_ACCESS_ERROR, e1, MessageFormat.format("{0}", resultFile.toString()));
        log.error(qe);
        response.addException(qe);
        throw new NotFoundException(qe, response);
    }
    // Make a final reference to the fis for referencing inside the inner class
    final FSDataInputStream fiz = fis;
    return new StreamingOutput() {

        private Logger log = Logger.getLogger(this.getClass());

        @Override
        public void write(java.io.OutputStream output) throws IOException, WebApplicationException {
            byte[] buf = new byte[BUFFER_SIZE];
            int read;
            try {
                read = fiz.read(buf);
                while (read != -1) {
                    output.write(buf, 0, read);
                    read = fiz.read(buf);
                }
            } catch (Exception e) {
                log.error("Error writing result file to output", e);
                throw new WebApplicationException(e);
            } finally {
                try {
                    if (null != fiz)
                        fiz.close();
                } catch (IOException e) {
                    log.error("Error closing FSDataInputStream for file: " + resultFile, e);
                }
                try {
                    if (null != fs)
                        fs.close();
                } catch (IOException e) {
                    log.error("Error closing HDFS client", e);
                }
            }
        }
    };
}
Also used : Path(org.apache.hadoop.fs.Path) DatawaveWebApplicationException(datawave.webservice.common.exception.DatawaveWebApplicationException) WebApplicationException(javax.ws.rs.WebApplicationException) TarArchiveOutputStream(org.apache.commons.compress.archivers.tar.TarArchiveOutputStream) NotFoundException(datawave.webservice.common.exception.NotFoundException) StreamingOutput(javax.ws.rs.core.StreamingOutput) MapReduceInfoResponseList(datawave.webservice.results.mr.MapReduceInfoResponseList) IOException(java.io.IOException) Logger(org.apache.log4j.Logger) NotFoundQueryException(datawave.webservice.query.exception.NotFoundQueryException) DatawaveWebApplicationException(datawave.webservice.common.exception.DatawaveWebApplicationException) WebApplicationException(javax.ws.rs.WebApplicationException) NotFoundQueryException(datawave.webservice.query.exception.NotFoundQueryException) IOException(java.io.IOException) QueryException(datawave.webservice.query.exception.QueryException) BadRequestException(datawave.webservice.common.exception.BadRequestException) NotFoundException(datawave.webservice.common.exception.NotFoundException) UnauthorizedQueryException(datawave.webservice.query.exception.UnauthorizedQueryException) UnauthorizedException(datawave.webservice.common.exception.UnauthorizedException) BadRequestQueryException(datawave.webservice.query.exception.BadRequestQueryException) MapReduceInfoResponse(datawave.webservice.results.mr.MapReduceInfoResponse) FileSystem(org.apache.hadoop.fs.FileSystem) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) Produces(javax.ws.rs.Produces) GET(javax.ws.rs.GET) GZIP(org.jboss.resteasy.annotations.GZIP)

Aggregations

MapReduceInfoResponse (datawave.webservice.results.mr.MapReduceInfoResponse)10 MapReduceInfoResponseList (datawave.webservice.results.mr.MapReduceInfoResponseList)9 NotFoundQueryException (datawave.webservice.query.exception.NotFoundQueryException)8 IOException (java.io.IOException)8 QueryException (datawave.webservice.query.exception.QueryException)7 NotFoundException (datawave.webservice.common.exception.NotFoundException)5 Produces (javax.ws.rs.Produces)5 GZIP (org.jboss.resteasy.annotations.GZIP)5 DatawavePrincipal (datawave.security.authorization.DatawavePrincipal)4 DatawaveWebApplicationException (datawave.webservice.common.exception.DatawaveWebApplicationException)4 BadRequestQueryException (datawave.webservice.query.exception.BadRequestQueryException)4 UnauthorizedQueryException (datawave.webservice.query.exception.UnauthorizedQueryException)4 JobExecution (datawave.webservice.results.mr.JobExecution)4 ArrayList (java.util.ArrayList)4 FileSystem (org.apache.hadoop.fs.FileSystem)4 Path (org.apache.hadoop.fs.Path)4 GenericResponse (datawave.webservice.result.GenericResponse)3 Principal (java.security.Principal)3 HashMap (java.util.HashMap)3 PUT (javax.ws.rs.PUT)3