use of com.linkedin.thirdeye.hadoop.config.ThirdEyeConfig in project pinot by linkedin.
the class AggregationPhaseJob method run.
public Job run() throws Exception {
Job job = Job.getInstance(getConf());
job.setJobName(name);
job.setJarByClass(AggregationPhaseJob.class);
FileSystem fs = FileSystem.get(getConf());
Configuration configuration = job.getConfiguration();
// Properties
LOGGER.info("Properties {}", props);
// Input Path
String inputPathDir = getAndSetConfiguration(configuration, AGG_PHASE_INPUT_PATH);
LOGGER.info("Input path dir: " + inputPathDir);
for (String inputPath : inputPathDir.split(ThirdEyeConstants.FIELD_SEPARATOR)) {
LOGGER.info("Adding input:" + inputPath);
Path input = new Path(inputPath);
FileInputFormat.addInputPath(job, input);
}
// Output path
Path outputPath = new Path(getAndSetConfiguration(configuration, AGG_PHASE_OUTPUT_PATH));
LOGGER.info("Output path dir: " + outputPath.toString());
if (fs.exists(outputPath)) {
fs.delete(outputPath, true);
}
FileOutputFormat.setOutputPath(job, outputPath);
// Schema
Schema avroSchema = ThirdeyeAvroUtils.getSchema(inputPathDir);
LOGGER.info("Schema : {}", avroSchema.toString(true));
job.getConfiguration().set(AGG_PHASE_AVRO_SCHEMA.toString(), avroSchema.toString());
// ThirdEyeConfig
String metricTypesProperty = ThirdeyeAvroUtils.getMetricTypesProperty(props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_NAMES.toString()), props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString()), avroSchema);
props.setProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString(), metricTypesProperty);
ThirdEyeConfig thirdeyeConfig = ThirdEyeConfig.fromProperties(props);
LOGGER.info("Thirdeye Config {}", thirdeyeConfig.encode());
job.getConfiguration().set(AGG_PHASE_THIRDEYE_CONFIG.toString(), OBJECT_MAPPER.writeValueAsString(thirdeyeConfig));
// Map config
job.setMapperClass(AggregationMapper.class);
job.setInputFormatClass(AvroKeyInputFormat.class);
job.setMapOutputKeyClass(BytesWritable.class);
job.setMapOutputValueClass(BytesWritable.class);
// Reduce config
job.setReducerClass(AggregationReducer.class);
job.setOutputKeyClass(AvroKey.class);
job.setOutputValueClass(NullWritable.class);
AvroJob.setOutputKeySchema(job, avroSchema);
job.setOutputFormatClass(AvroKeyOutputFormat.class);
String numReducers = props.getProperty(ThirdEyeJobProperties.THIRDEYE_NUM_REDUCERS.getName());
LOGGER.info("Num Reducers : {}", numReducers);
if (StringUtils.isNotBlank(numReducers)) {
job.setNumReduceTasks(Integer.valueOf(numReducers));
LOGGER.info("Setting num reducers {}", job.getNumReduceTasks());
}
job.waitForCompletion(true);
Counter counter = job.getCounters().findCounter(AggregationCounter.NUMBER_OF_RECORDS);
LOGGER.info(counter.getDisplayName() + " : " + counter.getValue());
if (counter.getValue() == 0) {
throw new IllegalStateException("No input records in " + inputPathDir);
}
counter = job.getCounters().findCounter(AggregationCounter.NUMBER_OF_RECORDS_FLATTENED);
LOGGER.info(counter.getDisplayName() + " : " + counter.getValue());
for (String metric : thirdeyeConfig.getMetricNames()) {
counter = job.getCounters().findCounter(thirdeyeConfig.getCollection(), metric);
LOGGER.info(counter.getDisplayName() + " : " + counter.getValue());
}
return job;
}
use of com.linkedin.thirdeye.hadoop.config.ThirdEyeConfig in project pinot by linkedin.
the class SegmentCreationPhaseJob method run.
public Job run() throws Exception {
Job job = Job.getInstance(getConf());
job.setJarByClass(SegmentCreationPhaseJob.class);
job.setJobName(name);
FileSystem fs = FileSystem.get(getConf());
Configuration configuration = job.getConfiguration();
String inputSegmentDir = getAndSetConfiguration(configuration, SEGMENT_CREATION_INPUT_PATH);
LOGGER.info("Input path : {}", inputSegmentDir);
Schema avroSchema = ThirdeyeAvroUtils.getSchema(inputSegmentDir);
LOGGER.info("Schema : {}", avroSchema);
String metricTypesProperty = ThirdeyeAvroUtils.getMetricTypesProperty(props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_NAMES.toString()), props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString()), avroSchema);
props.setProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString(), metricTypesProperty);
ThirdEyeConfig thirdeyeConfig = ThirdEyeConfig.fromProperties(props);
LOGGER.info("ThirdEyeConfig {}", thirdeyeConfig.encode());
String outputDir = getAndSetConfiguration(configuration, SEGMENT_CREATION_OUTPUT_PATH);
LOGGER.info("Output path : {}", outputDir);
Path stagingDir = new Path(outputDir, TEMP);
LOGGER.info("Staging dir : {}", stagingDir);
String segmentWallClockStart = getAndSetConfiguration(configuration, SEGMENT_CREATION_WALLCLOCK_START_TIME);
LOGGER.info("Segment wallclock start time : {}", segmentWallClockStart);
String segmentWallClockEnd = getAndSetConfiguration(configuration, SEGMENT_CREATION_WALLCLOCK_END_TIME);
LOGGER.info("Segment wallclock end time : {}", segmentWallClockEnd);
String schedule = getAndSetConfiguration(configuration, SEGMENT_CREATION_SCHEDULE);
LOGGER.info("Segment schedule : {}", schedule);
String isBackfill = props.getProperty(SEGMENT_CREATION_BACKFILL.toString(), DEFAULT_BACKFILL);
configuration.set(SEGMENT_CREATION_BACKFILL.toString(), isBackfill);
LOGGER.info("Is Backfill : {}", configuration.get(SEGMENT_CREATION_BACKFILL.toString()));
// Create temporary directory
if (fs.exists(stagingDir)) {
LOGGER.warn("Found the temp folder, deleting it");
fs.delete(stagingDir, true);
}
fs.mkdirs(stagingDir);
fs.mkdirs(new Path(stagingDir + "/input/"));
// Create output directory
if (fs.exists(new Path(outputDir))) {
LOGGER.warn("Found the output folder deleting it");
fs.delete(new Path(outputDir), true);
}
fs.mkdirs(new Path(outputDir));
// Read input files
List<FileStatus> inputDataFiles = new ArrayList<>();
for (String input : inputSegmentDir.split(",")) {
Path inputPathPattern = new Path(input);
inputDataFiles.addAll(Arrays.asList(fs.listStatus(inputPathPattern)));
}
LOGGER.info("size {}", inputDataFiles.size());
try {
for (int seqId = 0; seqId < inputDataFiles.size(); ++seqId) {
FileStatus file = inputDataFiles.get(seqId);
String completeFilePath = " " + file.getPath().toString() + " " + seqId;
Path newOutPutFile = new Path((stagingDir + "/input/" + file.getPath().toString().replace('.', '_').replace('/', '_').replace(':', '_') + ".txt"));
FSDataOutputStream stream = fs.create(newOutPutFile);
LOGGER.info("wrote {}", completeFilePath);
stream.writeUTF(completeFilePath);
stream.flush();
stream.close();
}
} catch (Exception e) {
LOGGER.error("Exception while reading input files ", e);
}
job.setMapperClass(SegmentCreationPhaseMapReduceJob.SegmentCreationMapper.class);
if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
job.getConfiguration().set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
}
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(Text.class);
FileInputFormat.addInputPath(job, new Path(stagingDir + "/input/"));
FileOutputFormat.setOutputPath(job, new Path(stagingDir + "/output/"));
job.getConfiguration().setInt(JobContext.NUM_MAPS, inputDataFiles.size());
job.getConfiguration().set(SEGMENT_CREATION_THIRDEYE_CONFIG.toString(), OBJECT_MAPPER.writeValueAsString(thirdeyeConfig));
job.setMaxReduceAttempts(1);
job.setMaxMapAttempts(0);
job.setNumReduceTasks(0);
for (Object key : props.keySet()) {
job.getConfiguration().set(key.toString(), props.getProperty(key.toString()));
}
job.waitForCompletion(true);
if (!job.isSuccessful()) {
throw new RuntimeException("Job failed : " + job);
}
LOGGER.info("Moving Segment Tar files from {} to: {}", stagingDir + "/output/segmentTar", outputDir);
FileStatus[] segmentArr = fs.listStatus(new Path(stagingDir + "/output/segmentTar"));
for (FileStatus segment : segmentArr) {
fs.rename(segment.getPath(), new Path(outputDir, segment.getPath().getName()));
}
// Delete temporary directory.
LOGGER.info("Cleanup the working directory.");
LOGGER.info("Deleting the dir: {}", stagingDir);
fs.delete(stagingDir, true);
return job;
}
use of com.linkedin.thirdeye.hadoop.config.ThirdEyeConfig in project pinot by linkedin.
the class TopKPhaseJob method run.
public Job run() throws Exception {
Job job = Job.getInstance(getConf());
job.setJobName(name);
job.setJarByClass(TopKPhaseJob.class);
Configuration configuration = job.getConfiguration();
FileSystem fs = FileSystem.get(configuration);
// Properties
LOGGER.info("Properties {}", props);
// Input Path
String inputPathDir = getAndSetConfiguration(configuration, TOPK_PHASE_INPUT_PATH);
LOGGER.info("Input path dir: " + inputPathDir);
for (String inputPath : inputPathDir.split(ThirdEyeConstants.FIELD_SEPARATOR)) {
LOGGER.info("Adding input:" + inputPath);
Path input = new Path(inputPath);
FileInputFormat.addInputPath(job, input);
}
// Output path
Path outputPath = new Path(getAndSetConfiguration(configuration, TOPK_PHASE_OUTPUT_PATH));
LOGGER.info("Output path dir: " + outputPath.toString());
if (fs.exists(outputPath)) {
fs.delete(outputPath, true);
}
FileOutputFormat.setOutputPath(job, outputPath);
// Schema
Schema avroSchema = ThirdeyeAvroUtils.getSchema(inputPathDir);
LOGGER.info("Schema : {}", avroSchema.toString(true));
// ThirdEyeConfig
String metricTypesProperty = ThirdeyeAvroUtils.getMetricTypesProperty(props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_NAMES.toString()), props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString()), avroSchema);
props.setProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString(), metricTypesProperty);
ThirdEyeConfig thirdeyeConfig = ThirdEyeConfig.fromProperties(props);
LOGGER.info("Thirdeye Config {}", thirdeyeConfig.encode());
job.getConfiguration().set(TOPK_PHASE_THIRDEYE_CONFIG.toString(), OBJECT_MAPPER.writeValueAsString(thirdeyeConfig));
// Map config
job.setMapperClass(TopKPhaseMapper.class);
job.setInputFormatClass(AvroKeyInputFormat.class);
job.setMapOutputKeyClass(BytesWritable.class);
job.setMapOutputValueClass(BytesWritable.class);
// Combiner
job.setCombinerClass(TopKPhaseCombiner.class);
// Reduce config
job.setReducerClass(TopKPhaseReducer.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(NullWritable.class);
job.setNumReduceTasks(1);
job.waitForCompletion(true);
return job;
}
Aggregations