use of uk.gov.gchq.gaffer.accumulostore.operation.hdfs.handler.job.factory.SampleDataForSplitPointsJobFactory in project Gaffer by gchq.
the class SampleDataAndCreateSplitsFileTool method run.
@Override
public int run(final String[] strings) throws OperationException {
try {
LOGGER.info("Creating job using SampleDataForSplitPointsJobFactory");
job = new SampleDataForSplitPointsJobFactory().createJob(operation, store);
} catch (final IOException e) {
LOGGER.error("Failed to create Hadoop job: {}", e.getMessage());
throw new OperationException("Failed to create the Hadoop job: " + e.getMessage(), e);
}
try {
LOGGER.info("Running SampleDataForSplitPoints job (job name is {})", job.getJobName());
job.waitForCompletion(true);
} catch (final IOException | InterruptedException | ClassNotFoundException e) {
LOGGER.error("Exception running job: {}", e.getMessage());
throw new OperationException("Error while waiting for job to complete: " + e.getMessage(), e);
}
try {
if (!job.isSuccessful()) {
LOGGER.error("Job was not successful (job name is {})", job.getJobName());
throw new OperationException("Error running job");
}
} catch (final IOException e) {
LOGGER.error("Exception running job: {}", e.getMessage());
throw new OperationException("Error running job" + e.getMessage(), e);
}
// Find the number of records output
// NB In the following line use mapred.Task.Counter.REDUCE_OUTPUT_RECORDS rather than
// mapreduce.TaskCounter.REDUCE_OUTPUT_RECORDS as this is more compatible with earlier
// versions of Hadoop.
Counter counter;
try {
counter = job.getCounters().findCounter(Task.Counter.REDUCE_OUTPUT_RECORDS);
LOGGER.info("Number of records output = {}", counter);
} catch (final IOException e) {
LOGGER.error("Failed to get counter org.apache.hadoop.mapred.Task.Counter.REDUCE_OUTPUT_RECORDS from job: {}", e.getMessage());
throw new OperationException("Failed to get counter: " + Task.Counter.REDUCE_OUTPUT_RECORDS, e);
}
int numberTabletServers;
try {
numberTabletServers = store.getConnection().instanceOperations().getTabletServers().size();
LOGGER.info("Number of tablet servers is {}", numberTabletServers);
} catch (final StoreException e) {
LOGGER.error("Exception thrown getting number of tablet servers: {}", e.getMessage());
throw new OperationException(e.getMessage(), e);
}
long outputEveryNthRecord = counter.getValue() / (numberTabletServers - 1);
final Path resultsFile = new Path(operation.getOutputPath(), "part-r-00000");
LOGGER.info("Will output every {}-th record from {}", outputEveryNthRecord, resultsFile);
// Read through resulting file, pick out the split points and write to file.
final Configuration conf = getConf();
final FileSystem fs;
try {
fs = FileSystem.get(conf);
} catch (final IOException e) {
LOGGER.error("Exception getting filesystem: {}", e.getMessage());
throw new OperationException("Failed to get filesystem from configuration: " + e.getMessage(), e);
}
LOGGER.info("Writing splits to {}", operation.getResultingSplitsFilePath());
final Key key = new Key();
final Value value = new Value();
long count = 0;
int numberSplitPointsOutput = 0;
try (final SequenceFile.Reader reader = new SequenceFile.Reader(fs, resultsFile, conf);
final PrintStream splitsWriter = new PrintStream(new BufferedOutputStream(fs.create(new Path(operation.getResultingSplitsFilePath()), true)), false, CommonConstants.UTF_8)) {
while (reader.next(key, value) && numberSplitPointsOutput < numberTabletServers - 1) {
count++;
if (count % outputEveryNthRecord == 0) {
LOGGER.debug("Outputting split point number {} ({})", numberSplitPointsOutput, Base64.encodeBase64(key.getRow().getBytes()));
numberSplitPointsOutput++;
splitsWriter.println(new String(Base64.encodeBase64(key.getRow().getBytes()), CommonConstants.UTF_8));
}
}
LOGGER.info("Total number of records read was {}", count);
} catch (final IOException e) {
LOGGER.error("Exception reading results file and outputting split points: {}", e.getMessage());
throw new OperationException(e.getMessage(), e);
}
try {
fs.delete(resultsFile, true);
LOGGER.info("Deleted the results file {}", resultsFile);
} catch (final IOException e) {
LOGGER.error("Failed to delete the results file {}", resultsFile);
throw new OperationException("Failed to delete the results file: " + e.getMessage(), e);
}
return SUCCESS_RESPONSE;
}
Aggregations