use of org.apache.drill.common.exceptions.ExecutionSetupException in project drill by apache.
the class CompliantTextRecordReader method setup.
/**
* Performs the initial setup required for the record reader.
* Initializes the input stream, handling of the output record batch
* and the actual reader to be used.
* @param context operator context from which buffer's will be allocated and managed
* @param outputMutator Used to create the schema in the output record batch
* @throws ExecutionSetupException
*/
@SuppressWarnings("resource")
@Override
public void setup(OperatorContext context, OutputMutator outputMutator) throws ExecutionSetupException {
oContext = context;
// Note: DO NOT use managed buffers here. They remain in existence
// until the fragment is shut down. The buffers here are large.
// If we scan 1000 files, and allocate 1 MB for each, we end up
// holding onto 1 GB of memory in managed buffers.
// Instead, we allocate the buffers explicitly, and must free
// them.
// readBuffer = context.getManagedBuffer(READ_BUFFER);
// whitespaceBuffer = context.getManagedBuffer(WHITE_SPACE_BUFFER);
readBuffer = context.getAllocator().buffer(READ_BUFFER);
whitespaceBuffer = context.getAllocator().buffer(WHITE_SPACE_BUFFER);
// setup Output, Input, and Reader
try {
TextOutput output = null;
TextInput input = null;
InputStream stream = null;
// setup Output using OutputMutator
if (settings.isHeaderExtractionEnabled()) {
//extract header and use that to setup a set of VarCharVectors
String[] fieldNames = extractHeader();
output = new FieldVarCharOutput(outputMutator, fieldNames, getColumns(), isStarQuery());
} else {
//simply use RepeatedVarCharVector
output = new RepeatedVarCharOutput(outputMutator, getColumns(), isStarQuery());
}
// setup Input using InputStream
logger.trace("Opening file {}", split.getPath());
stream = dfs.openPossiblyCompressedStream(split.getPath());
input = new TextInput(settings, stream, readBuffer, split.getStart(), split.getStart() + split.getLength());
// setup Reader using Input and Output
reader = new TextReader(settings, input, output, whitespaceBuffer);
reader.start();
} catch (SchemaChangeException | IOException e) {
throw new ExecutionSetupException(String.format("Failure while setting up text reader for file %s", split.getPath()), e);
} catch (IllegalArgumentException e) {
throw UserException.dataReadError(e).addContext("File Path", split.getPath().toString()).build(logger);
}
}
use of org.apache.drill.common.exceptions.ExecutionSetupException in project drill by apache.
the class KuduScanBatchCreator method getBatch.
@Override
public ScanBatch getBatch(FragmentContext context, KuduSubScan subScan, List<RecordBatch> children) throws ExecutionSetupException {
Preconditions.checkArgument(children.isEmpty());
List<RecordReader> readers = Lists.newArrayList();
List<SchemaPath> columns = null;
for (KuduSubScan.KuduSubScanSpec scanSpec : subScan.getTabletScanSpecList()) {
try {
if ((columns = subScan.getColumns()) == null) {
columns = GroupScan.ALL_COLUMNS;
}
readers.add(new KuduRecordReader(subScan.getStorageEngine().getClient(), scanSpec, columns, context));
} catch (Exception e1) {
throw new ExecutionSetupException(e1);
}
}
return new ScanBatch(subScan, context, readers.iterator());
}
use of org.apache.drill.common.exceptions.ExecutionSetupException in project drill by apache.
the class ParquetScanBatchCreator method getBatch.
@Override
public ScanBatch getBatch(FragmentContext context, ParquetRowGroupScan rowGroupScan, List<RecordBatch> children) throws ExecutionSetupException {
Preconditions.checkArgument(children.isEmpty());
OperatorContext oContext = context.newOperatorContext(rowGroupScan);
final ImplicitColumnExplorer columnExplorer = new ImplicitColumnExplorer(context, rowGroupScan.getColumns());
if (!columnExplorer.isStarQuery()) {
rowGroupScan = new ParquetRowGroupScan(rowGroupScan.getUserName(), rowGroupScan.getStorageEngine(), rowGroupScan.getRowGroupReadEntries(), columnExplorer.getTableColumns(), rowGroupScan.getSelectionRoot(), rowGroupScan.getFilter());
rowGroupScan.setOperatorId(rowGroupScan.getOperatorId());
}
DrillFileSystem fs;
try {
boolean useAsyncPageReader = context.getOptions().getOption(ExecConstants.PARQUET_PAGEREADER_ASYNC).bool_val;
if (useAsyncPageReader) {
fs = oContext.newNonTrackingFileSystem(rowGroupScan.getStorageEngine().getFsConf());
} else {
fs = oContext.newFileSystem(rowGroupScan.getStorageEngine().getFsConf());
}
} catch (IOException e) {
throw new ExecutionSetupException(String.format("Failed to create DrillFileSystem: %s", e.getMessage()), e);
}
Configuration conf = new Configuration(fs.getConf());
conf.setBoolean(ENABLE_BYTES_READ_COUNTER, false);
conf.setBoolean(ENABLE_BYTES_TOTAL_COUNTER, false);
conf.setBoolean(ENABLE_TIME_READ_COUNTER, false);
// keep footers in a map to avoid re-reading them
Map<String, ParquetMetadata> footers = Maps.newHashMap();
List<RecordReader> readers = Lists.newArrayList();
List<Map<String, String>> implicitColumns = Lists.newArrayList();
Map<String, String> mapWithMaxColumns = Maps.newLinkedHashMap();
for (RowGroupReadEntry e : rowGroupScan.getRowGroupReadEntries()) {
/*
Here we could store a map from file names to footers, to prevent re-reading the footer for each row group in a file
TODO - to prevent reading the footer again in the parquet record reader (it is read earlier in the ParquetStorageEngine)
we should add more information to the RowGroupInfo that will be populated upon the first read to
provide the reader with all of th file meta-data it needs
These fields will be added to the constructor below
*/
try {
Stopwatch timer = Stopwatch.createUnstarted();
if (!footers.containsKey(e.getPath())) {
timer.start();
ParquetMetadata footer = ParquetFileReader.readFooter(conf, new Path(e.getPath()));
long timeToRead = timer.elapsed(TimeUnit.MICROSECONDS);
logger.trace("ParquetTrace,Read Footer,{},{},{},{},{},{},{}", "", e.getPath(), "", 0, 0, 0, timeToRead);
footers.put(e.getPath(), footer);
}
boolean autoCorrectCorruptDates = rowGroupScan.formatConfig.autoCorrectCorruptDates;
ParquetReaderUtility.DateCorruptionStatus containsCorruptDates = ParquetReaderUtility.detectCorruptDates(footers.get(e.getPath()), rowGroupScan.getColumns(), autoCorrectCorruptDates);
if (logger.isDebugEnabled()) {
logger.debug(containsCorruptDates.toString());
}
if (!context.getOptions().getOption(ExecConstants.PARQUET_NEW_RECORD_READER).bool_val && !isComplex(footers.get(e.getPath()))) {
readers.add(new ParquetRecordReader(context, e.getPath(), e.getRowGroupIndex(), e.getNumRecordsToRead(), fs, CodecFactory.createDirectCodecFactory(fs.getConf(), new ParquetDirectByteBufferAllocator(oContext.getAllocator()), 0), footers.get(e.getPath()), rowGroupScan.getColumns(), containsCorruptDates));
} else {
ParquetMetadata footer = footers.get(e.getPath());
readers.add(new DrillParquetReader(context, footer, e, columnExplorer.getTableColumns(), fs, containsCorruptDates));
}
Map<String, String> implicitValues = columnExplorer.populateImplicitColumns(e, rowGroupScan.getSelectionRoot());
implicitColumns.add(implicitValues);
if (implicitValues.size() > mapWithMaxColumns.size()) {
mapWithMaxColumns = implicitValues;
}
} catch (IOException e1) {
throw new ExecutionSetupException(e1);
}
}
// all readers should have the same number of implicit columns, add missing ones with value null
Map<String, String> diff = Maps.transformValues(mapWithMaxColumns, Functions.constant((String) null));
for (Map<String, String> map : implicitColumns) {
map.putAll(Maps.difference(map, diff).entriesOnlyOnRight());
}
return new ScanBatch(rowGroupScan, context, oContext, readers.iterator(), implicitColumns);
}
use of org.apache.drill.common.exceptions.ExecutionSetupException in project drill by apache.
the class PojoRecordReader method setup.
@Override
public void setup(OperatorContext context, OutputMutator output) throws ExecutionSetupException {
operatorContext = context;
try {
Field[] fields = pojoClass.getDeclaredFields();
List<PojoWriter> writers = Lists.newArrayList();
for (int i = 0; i < fields.length; i++) {
Field f = fields[i];
if (Modifier.isStatic(f.getModifiers())) {
continue;
}
Class<?> type = f.getType();
PojoWriter w = null;
if (type == int.class) {
w = new IntWriter(f);
} else if (type == Integer.class) {
w = new NIntWriter(f);
} else if (type == Long.class) {
w = new NBigIntWriter(f);
} else if (type == Boolean.class) {
w = new NBooleanWriter(f);
} else if (type == double.class) {
w = new DoubleWriter(f);
} else if (type == Double.class) {
w = new NDoubleWriter(f);
} else if (type.isEnum()) {
w = new EnumWriter(f, output.getManagedBuffer());
} else if (type == boolean.class) {
w = new BitWriter(f);
} else if (type == long.class) {
w = new LongWriter(f);
} else if (type == String.class) {
w = new StringWriter(f, output.getManagedBuffer());
} else if (type == Timestamp.class) {
w = new NTimeStampWriter(f);
} else {
throw new ExecutionSetupException(String.format("PojoRecord reader doesn't yet support conversions from type [%s].", type));
}
writers.add(w);
w.init(output);
}
this.writers = writers.toArray(new PojoWriter[writers.size()]);
} catch (SchemaChangeException e) {
throw new ExecutionSetupException("Failure while setting up schema for PojoRecordReader.", e);
}
currentIterator = pojoObjects.iterator();
}
use of org.apache.drill.common.exceptions.ExecutionSetupException in project drill by apache.
the class HiveScanBatchCreator method getBatch.
@Override
public ScanBatch getBatch(FragmentContext context, HiveSubScan config, List<RecordBatch> children) throws ExecutionSetupException {
List<RecordReader> readers = Lists.newArrayList();
HiveTableWithColumnCache table = config.getTable();
List<InputSplit> splits = config.getInputSplits();
List<HivePartition> partitions = config.getPartitions();
boolean hasPartitions = (partitions != null && partitions.size() > 0);
int i = 0;
final UserGroupInformation proxyUgi = ImpersonationUtil.createProxyUgi(config.getUserName(), context.getQueryUserName());
final HiveConf hiveConf = config.getHiveConf();
final String formatName = table.getSd().getInputFormat();
Class<? extends HiveAbstractReader> readerClass = HiveDefaultReader.class;
if (readerMap.containsKey(formatName)) {
readerClass = readerMap.get(formatName);
}
Constructor<? extends HiveAbstractReader> readerConstructor = null;
try {
readerConstructor = readerClass.getConstructor(HiveTableWithColumnCache.class, HivePartition.class, InputSplit.class, List.class, FragmentContext.class, HiveConf.class, UserGroupInformation.class);
for (InputSplit split : splits) {
readers.add(readerConstructor.newInstance(table, (hasPartitions ? partitions.get(i++) : null), split, config.getColumns(), context, hiveConf, proxyUgi));
}
if (readers.size() == 0) {
readers.add(readerConstructor.newInstance(table, null, null, config.getColumns(), context, hiveConf, proxyUgi));
}
} catch (Exception e) {
logger.error("No constructor for {}, thrown {}", readerClass.getName(), e);
}
return new ScanBatch(config, context, readers.iterator());
}
Aggregations