Search in sources :

Example 1 with InputSplitsFactory

use of org.apache.hyracks.hdfs.dataflow.InputSplitsFactory in project asterixdb by apache.

the class HDFSDataSourceFactory method configure.

@Override
public void configure(IServiceContext serviceCtx, Map<String, String> configuration) throws AsterixException {
    try {
        this.serviceCtx = serviceCtx;
        this.configuration = configuration;
        init((ICCServiceContext) serviceCtx);
        JobConf conf = HDFSUtils.configureHDFSJobConf(configuration);
        confFactory = new ConfFactory(conf);
        clusterLocations = getPartitionConstraint();
        int numPartitions = clusterLocations.getLocations().length;
        // if files list was set, we restrict the splits to the list
        InputSplit[] inputSplits;
        if (files == null) {
            inputSplits = conf.getInputFormat().getSplits(conf, numPartitions);
        } else {
            inputSplits = HDFSUtils.getSplits(conf, files);
        }
        if (indexingOp) {
            readSchedule = indexingScheduler.getLocationConstraints(inputSplits);
        } else {
            readSchedule = hdfsScheduler.getLocationConstraints(inputSplits);
        }
        inputSplitsFactory = new InputSplitsFactory(inputSplits);
        read = new boolean[readSchedule.length];
        Arrays.fill(read, false);
        String formatString = configuration.get(ExternalDataConstants.KEY_FORMAT);
        if (formatString == null || formatString.equals(ExternalDataConstants.FORMAT_HDFS_WRITABLE)) {
            RecordReader<?, ?> reader = conf.getInputFormat().getRecordReader(inputSplits[0], conf, Reporter.NULL);
            this.recordClass = reader.createValue().getClass();
            reader.close();
        } else {
            recordReaderClazz = StreamRecordReaderProvider.getRecordReaderClazz(configuration);
            this.recordClass = char[].class;
        }
    } catch (IOException e) {
        throw new AsterixException(e);
    }
}
Also used : ConfFactory(org.apache.hyracks.hdfs.dataflow.ConfFactory) AsterixException(org.apache.asterix.common.exceptions.AsterixException) IOException(java.io.IOException) JobConf(org.apache.hadoop.mapred.JobConf) InputSplit(org.apache.hadoop.mapred.InputSplit) InputSplitsFactory(org.apache.hyracks.hdfs.dataflow.InputSplitsFactory) AlgebricksAbsolutePartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint)

Aggregations

IOException (java.io.IOException)1 AsterixException (org.apache.asterix.common.exceptions.AsterixException)1 InputSplit (org.apache.hadoop.mapred.InputSplit)1 JobConf (org.apache.hadoop.mapred.JobConf)1 AlgebricksAbsolutePartitionConstraint (org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint)1 ConfFactory (org.apache.hyracks.hdfs.dataflow.ConfFactory)1 InputSplitsFactory (org.apache.hyracks.hdfs.dataflow.InputSplitsFactory)1