use of org.apache.hyracks.hdfs.dataflow.InputSplitsFactory in project asterixdb by apache.
the class HDFSDataSourceFactory method configure.
@Override
public void configure(IServiceContext serviceCtx, Map<String, String> configuration) throws AsterixException {
try {
this.serviceCtx = serviceCtx;
this.configuration = configuration;
init((ICCServiceContext) serviceCtx);
JobConf conf = HDFSUtils.configureHDFSJobConf(configuration);
confFactory = new ConfFactory(conf);
clusterLocations = getPartitionConstraint();
int numPartitions = clusterLocations.getLocations().length;
// if files list was set, we restrict the splits to the list
InputSplit[] inputSplits;
if (files == null) {
inputSplits = conf.getInputFormat().getSplits(conf, numPartitions);
} else {
inputSplits = HDFSUtils.getSplits(conf, files);
}
if (indexingOp) {
readSchedule = indexingScheduler.getLocationConstraints(inputSplits);
} else {
readSchedule = hdfsScheduler.getLocationConstraints(inputSplits);
}
inputSplitsFactory = new InputSplitsFactory(inputSplits);
read = new boolean[readSchedule.length];
Arrays.fill(read, false);
String formatString = configuration.get(ExternalDataConstants.KEY_FORMAT);
if (formatString == null || formatString.equals(ExternalDataConstants.FORMAT_HDFS_WRITABLE)) {
RecordReader<?, ?> reader = conf.getInputFormat().getRecordReader(inputSplits[0], conf, Reporter.NULL);
this.recordClass = reader.createValue().getClass();
reader.close();
} else {
recordReaderClazz = StreamRecordReaderProvider.getRecordReaderClazz(configuration);
this.recordClass = char[].class;
}
} catch (IOException e) {
throw new AsterixException(e);
}
}
Aggregations