use of datawave.webservice.mr.bulkresults.map.SerializationFormat in project datawave by NationalSecurityAgency.
the class BulkResultsJobConfiguration method _initializeConfiguration.
@Override
public void _initializeConfiguration(Job job, Path jobDir, String jobId, Map<String, String> runtimeParameters, DatawavePrincipal serverPrincipal) throws IOException, QueryException {
String queryId = runtimeParameters.get("queryId");
SerializationFormat format = SerializationFormat.valueOf(runtimeParameters.get("format"));
String outputFormatParameter = runtimeParameters.get("outputFormat");
if (outputFormatParameter != null && outputFormatParameter.equalsIgnoreCase("TEXT")) {
this.outputFormatClass = TextOutputFormat.class;
}
if (runtimeParameters.containsKey("outputTableName"))
this.tableName = runtimeParameters.get("outputTableName");
// Initialize the Query
QueryLogic<?> logic;
GenericQueryConfiguration queryConfig;
String base64EncodedQuery;
Class<? extends Query> queryImplClass;
Set<Authorizations> runtimeQueryAuthorizations;
try {
QuerySettings settings = setupQuery(sid, queryId, principal);
logic = settings.getLogic();
queryConfig = settings.getQueryConfig();
base64EncodedQuery = settings.getBase64EncodedQuery();
queryImplClass = settings.getQueryImplClass();
runtimeQueryAuthorizations = settings.getRuntimeQueryAuthorizations();
} catch (QueryException qe) {
log.error("Error getting Query for id: " + queryId, qe);
throw qe;
} catch (Exception e) {
log.error("Error setting up Query for id: " + queryId, e);
throw new QueryException(e);
}
// Setup and run the MapReduce job
try {
setupJob(job, jobDir, queryConfig, logic, base64EncodedQuery, queryImplClass, runtimeQueryAuthorizations, serverPrincipal);
if (null == this.tableName) {
// Setup job for output to HDFS
// set the mapper
job.setMapperClass(BulkResultsFileOutputMapper.class);
job.getConfiguration().set(BulkResultsFileOutputMapper.RESULT_SERIALIZATION_FORMAT, format.name());
// Setup the output
job.setOutputFormatClass(outputFormatClass);
job.setOutputKeyClass(Key.class);
job.setOutputValueClass(Value.class);
if (this.outputFormatClass.equals(SequenceFileOutputFormat.class)) {
SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);
} else if (this.outputFormatClass.equals(TextOutputFormat.class)) {
// if we are writing Text output to hdfs, we don't want to write key-tab-value, we want just the value
// this property gets fetched in the Mapper to skip writing the key
job.setOutputKeyClass(NullWritable.class);
}
job.setNumReduceTasks(0);
SequenceFileOutputFormat.setOutputPath(job, new Path(this.getResultsDir()));
} else {
// Setup job for output to table.
// set the mapper
job.setMapperClass(BulkResultsTableOutputMapper.class);
job.getConfiguration().set(BulkResultsTableOutputMapper.TABLE_NAME, tableName);
job.getConfiguration().set(BulkResultsFileOutputMapper.RESULT_SERIALIZATION_FORMAT, format.name());
// Setup the output
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Mutation.class);
job.setNumReduceTasks(0);
job.setOutputFormatClass(AccumuloOutputFormat.class);
AccumuloOutputFormat.setZooKeeperInstance(job, ClientConfiguration.loadDefault().withInstance(instanceName).withZkHosts(zookeepers));
AccumuloOutputFormat.setConnectorInfo(job, user, new PasswordToken(password));
AccumuloOutputFormat.setCreateTables(job, true);
AccumuloOutputFormat.setDefaultTableName(job, tableName);
// AccumuloOutputFormat.loglevel
AccumuloOutputFormat.setLogLevel(job, Level.INFO);
// AccumuloOutputFormat.maxlatency
// AccumuloOutputFormat.maxmemory
// AccumuloOutputFormat.writethreads
AccumuloOutputFormat.setBatchWriterOptions(job, new BatchWriterConfig().setMaxLatency(30, TimeUnit.SECONDS).setMaxMemory(10485760).setMaxWriteThreads(2));
}
} catch (WebApplicationException wex) {
throw wex;
} catch (Exception e) {
log.error("Error starting job", e);
throw new QueryException(DatawaveErrorCode.JOB_STARTING_ERROR, e);
}
}
Aggregations