use of org.apache.accumulo.core.util.PeekingIterator in project hive by apache.
the class HiveAccumuloTableInputFormat method getRecordReader.
/**
* Setup accumulo input format from conf properties. Delegates to final RecordReader from mapred
* package.
*
* @param inputSplit
* @param jobConf
* @param reporter
* @return RecordReader
* @throws IOException
*/
@Override
public RecordReader<Text, AccumuloHiveRow> getRecordReader(InputSplit inputSplit, final JobConf jobConf, final Reporter reporter) throws IOException {
final ColumnMapper columnMapper;
try {
columnMapper = getColumnMapper(jobConf);
} catch (TooManyAccumuloColumnsException e) {
throw new IOException(e);
}
try {
final List<IteratorSetting> iterators = predicateHandler.getIterators(jobConf, columnMapper);
HiveAccumuloSplit hiveSplit = (HiveAccumuloSplit) inputSplit;
RangeInputSplit rangeSplit = hiveSplit.getSplit();
log.info("Split: " + rangeSplit);
// Should be fixed in Accumulo 1.5.2 and 1.6.1
if (null == rangeSplit.getIterators() || (rangeSplit.getIterators().isEmpty() && !iterators.isEmpty())) {
log.debug("Re-setting iterators on InputSplit due to Accumulo bug.");
rangeSplit.setIterators(iterators);
}
// but we want it to, so just re-set it if it's null.
if (null == getTableName(rangeSplit)) {
final AccumuloConnectionParameters accumuloParams = new AccumuloConnectionParameters(jobConf);
log.debug("Re-setting table name on InputSplit due to Accumulo bug.");
setTableName(rangeSplit, accumuloParams.getAccumuloTableName());
}
final RecordReader<Text, PeekingIterator<Map.Entry<Key, Value>>> recordReader = accumuloInputFormat.getRecordReader(rangeSplit, jobConf, reporter);
return new HiveAccumuloRecordReader(recordReader, iterators.size());
} catch (SerDeException e) {
throw new IOException(StringUtils.stringifyException(e));
}
}
Aggregations