Search in sources :

Example 1 with SerializationFormat

use of datawave.webservice.mr.bulkresults.map.SerializationFormat in project datawave by NationalSecurityAgency.

the class BulkResultsJobConfiguration method _initializeConfiguration.

@Override
public void _initializeConfiguration(Job job, Path jobDir, String jobId, Map<String, String> runtimeParameters, DatawavePrincipal serverPrincipal) throws IOException, QueryException {
    String queryId = runtimeParameters.get("queryId");
    SerializationFormat format = SerializationFormat.valueOf(runtimeParameters.get("format"));
    String outputFormatParameter = runtimeParameters.get("outputFormat");
    if (outputFormatParameter != null && outputFormatParameter.equalsIgnoreCase("TEXT")) {
        this.outputFormatClass = TextOutputFormat.class;
    }
    if (runtimeParameters.containsKey("outputTableName"))
        this.tableName = runtimeParameters.get("outputTableName");
    // Initialize the Query
    QueryLogic<?> logic;
    GenericQueryConfiguration queryConfig;
    String base64EncodedQuery;
    Class<? extends Query> queryImplClass;
    Set<Authorizations> runtimeQueryAuthorizations;
    try {
        QuerySettings settings = setupQuery(sid, queryId, principal);
        logic = settings.getLogic();
        queryConfig = settings.getQueryConfig();
        base64EncodedQuery = settings.getBase64EncodedQuery();
        queryImplClass = settings.getQueryImplClass();
        runtimeQueryAuthorizations = settings.getRuntimeQueryAuthorizations();
    } catch (QueryException qe) {
        log.error("Error getting Query for id: " + queryId, qe);
        throw qe;
    } catch (Exception e) {
        log.error("Error setting up Query for id: " + queryId, e);
        throw new QueryException(e);
    }
    // Setup and run the MapReduce job
    try {
        setupJob(job, jobDir, queryConfig, logic, base64EncodedQuery, queryImplClass, runtimeQueryAuthorizations, serverPrincipal);
        if (null == this.tableName) {
            // Setup job for output to HDFS
            // set the mapper
            job.setMapperClass(BulkResultsFileOutputMapper.class);
            job.getConfiguration().set(BulkResultsFileOutputMapper.RESULT_SERIALIZATION_FORMAT, format.name());
            // Setup the output
            job.setOutputFormatClass(outputFormatClass);
            job.setOutputKeyClass(Key.class);
            job.setOutputValueClass(Value.class);
            if (this.outputFormatClass.equals(SequenceFileOutputFormat.class)) {
                SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);
            } else if (this.outputFormatClass.equals(TextOutputFormat.class)) {
                // if we are writing Text output to hdfs, we don't want to write key-tab-value, we want just the value
                // this property gets fetched in the Mapper to skip writing the key
                job.setOutputKeyClass(NullWritable.class);
            }
            job.setNumReduceTasks(0);
            SequenceFileOutputFormat.setOutputPath(job, new Path(this.getResultsDir()));
        } else {
            // Setup job for output to table.
            // set the mapper
            job.setMapperClass(BulkResultsTableOutputMapper.class);
            job.getConfiguration().set(BulkResultsTableOutputMapper.TABLE_NAME, tableName);
            job.getConfiguration().set(BulkResultsFileOutputMapper.RESULT_SERIALIZATION_FORMAT, format.name());
            // Setup the output
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(Mutation.class);
            job.setNumReduceTasks(0);
            job.setOutputFormatClass(AccumuloOutputFormat.class);
            AccumuloOutputFormat.setZooKeeperInstance(job, ClientConfiguration.loadDefault().withInstance(instanceName).withZkHosts(zookeepers));
            AccumuloOutputFormat.setConnectorInfo(job, user, new PasswordToken(password));
            AccumuloOutputFormat.setCreateTables(job, true);
            AccumuloOutputFormat.setDefaultTableName(job, tableName);
            // AccumuloOutputFormat.loglevel
            AccumuloOutputFormat.setLogLevel(job, Level.INFO);
            // AccumuloOutputFormat.maxlatency
            // AccumuloOutputFormat.maxmemory
            // AccumuloOutputFormat.writethreads
            AccumuloOutputFormat.setBatchWriterOptions(job, new BatchWriterConfig().setMaxLatency(30, TimeUnit.SECONDS).setMaxMemory(10485760).setMaxWriteThreads(2));
        }
    } catch (WebApplicationException wex) {
        throw wex;
    } catch (Exception e) {
        log.error("Error starting job", e);
        throw new QueryException(DatawaveErrorCode.JOB_STARTING_ERROR, e);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Authorizations(org.apache.accumulo.core.security.Authorizations) WebApplicationException(javax.ws.rs.WebApplicationException) NullWritable(org.apache.hadoop.io.NullWritable) GenericQueryConfiguration(datawave.webservice.query.configuration.GenericQueryConfiguration) WebApplicationException(javax.ws.rs.WebApplicationException) NoResultsException(datawave.webservice.common.exception.NoResultsException) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) IOException(java.io.IOException) QueryException(datawave.webservice.query.exception.QueryException) QueryException(datawave.webservice.query.exception.QueryException) PasswordToken(org.apache.accumulo.core.client.security.tokens.PasswordToken) SerializationFormat(datawave.webservice.mr.bulkresults.map.SerializationFormat) TextOutputFormat(org.apache.hadoop.mapreduce.lib.output.TextOutputFormat) BatchWriterConfig(org.apache.accumulo.core.client.BatchWriterConfig)

Aggregations

NoResultsException (datawave.webservice.common.exception.NoResultsException)1 SerializationFormat (datawave.webservice.mr.bulkresults.map.SerializationFormat)1 GenericQueryConfiguration (datawave.webservice.query.configuration.GenericQueryConfiguration)1 QueryException (datawave.webservice.query.exception.QueryException)1 IOException (java.io.IOException)1 WebApplicationException (javax.ws.rs.WebApplicationException)1 AccumuloSecurityException (org.apache.accumulo.core.client.AccumuloSecurityException)1 BatchWriterConfig (org.apache.accumulo.core.client.BatchWriterConfig)1 PasswordToken (org.apache.accumulo.core.client.security.tokens.PasswordToken)1 Authorizations (org.apache.accumulo.core.security.Authorizations)1 Path (org.apache.hadoop.fs.Path)1 NullWritable (org.apache.hadoop.io.NullWritable)1 TextOutputFormat (org.apache.hadoop.mapreduce.lib.output.TextOutputFormat)1