use of org.apache.asterix.external.input.record.reader.hdfs.HDFSRecordReader in project asterixdb by apache.
the class RecordColumnarIndexer method reset.
@Override
public void reset(IIndexingDatasource reader) throws HyracksDataException {
try {
//TODO: Make this more generic. right now, it works because we only index hdfs files.
@SuppressWarnings("unchecked") HDFSRecordReader<?, Writable> hdfsReader = (HDFSRecordReader<?, Writable>) reader;
fileNumber.setValue(hdfsReader.getSnapshot().get(hdfsReader.getCurrentSplitIndex()).getFileNumber());
recordReader = hdfsReader.getReader();
offset.setValue(recordReader.getPos());
nextOffset = offset.getLongValue();
rowNumber.setValue(0);
} catch (IOException e) {
throw new HyracksDataException(e);
}
}
use of org.apache.asterix.external.input.record.reader.hdfs.HDFSRecordReader in project asterixdb by apache.
the class HDFSDataSourceFactory method createRecordReader.
/**
* HDFS Datasource is a special case in two ways:
* 1. It supports indexing.
* 2. It returns input as a set of writable object that we sometimes internally transform into a byte stream
* Hence, it can produce:
* 1. StreamRecordReader: When we transform the input into a byte stream.
* 2. Indexing Stream Record Reader: When we transform the input into a byte stream and perform indexing.
* 3. HDFS Record Reader: When we simply pass the Writable object as it is to the parser.
*/
@Override
public IRecordReader<? extends Object> createRecordReader(IHyracksTaskContext ctx, int partition) throws HyracksDataException {
try {
IExternalIndexer indexer = files == null ? null : ExternalIndexerProvider.getIndexer(configuration);
if (recordReaderClazz != null) {
StreamRecordReader streamReader = (StreamRecordReader) recordReaderClazz.getConstructor().newInstance();
streamReader.configure(createInputStream(ctx, partition, indexer), configuration);
if (indexer != null) {
return new IndexingStreamRecordReader(streamReader, indexer);
} else {
return streamReader;
}
}
restoreConfig(ctx);
return new HDFSRecordReader<>(read, inputSplits, readSchedule, nodeName, conf, files, indexer);
} catch (Exception e) {
throw new HyracksDataException(e);
}
}
Aggregations