use of co.cask.cdap.data.stream.StreamInputSplitFinder in project cdap by caskdata.
the class HiveStreamInputFormat method getSplitFinder.
private StreamInputSplitFinder<InputSplit> getSplitFinder(JobConf conf) throws IOException {
// first get the context we are in
ContextManager.Context context = ContextManager.getContext(conf);
Preconditions.checkNotNull(context);
StreamConfig streamConfig = context.getStreamConfig(getStreamId(conf));
// make sure we get the current generation so we don't read events that occurred before a truncate.
Location streamPath = StreamUtils.createGenerationLocation(streamConfig.getLocation(), StreamUtils.getGeneration(streamConfig));
StreamInputSplitFinder.Builder builder = StreamInputSplitFinder.builder(streamPath.toURI());
// Get the Hive table path for the InputSplit created. It is just to satisfy hive. The InputFormat never uses it.
JobContext jobContext = ShimLoader.getHadoopShims().newJobContext(Job.getInstance(conf));
final Path[] tablePaths = FileInputFormat.getInputPaths(jobContext);
return setupBuilder(conf, streamConfig, builder).build(new StreamInputSplitFactory<InputSplit>() {
@Override
public InputSplit createSplit(Path eventPath, Path indexPath, long startTime, long endTime, long start, long length, @Nullable String[] locations) {
return new StreamInputSplit(tablePaths[0], eventPath, indexPath, startTime, endTime, start, length, locations);
}
});
}
Aggregations