use of org.apache.drill.common.exceptions.ExecutionSetupException in project drill by apache.
the class HiveAbstractReader method setup.
@Override
public void setup(OperatorContext context, OutputMutator output) throws ExecutionSetupException {
// initializes "reader"
final Callable<Void> readerInitializer = new Callable<Void>() {
@Override
public Void call() throws Exception {
init();
return null;
}
};
final ListenableFuture<Void> result = context.runCallableAs(proxyUgi, readerInitializer);
try {
result.get();
} catch (InterruptedException e) {
result.cancel(true);
// Preserve evidence that the interruption occurred so that code higher up on the call stack can learn of the
// interruption and respond to it if it wants to.
Thread.currentThread().interrupt();
} catch (ExecutionException e) {
throw ExecutionSetupException.fromThrowable(e.getMessage(), e);
}
try {
final OptionManager options = fragmentContext.getOptions();
for (int i = 0; i < selectedColumnNames.size(); i++) {
MajorType type = HiveUtilities.getMajorTypeFromHiveTypeInfo(selectedColumnTypes.get(i), options);
MaterializedField field = MaterializedField.create(selectedColumnNames.get(i), type);
Class<? extends ValueVector> vvClass = TypeHelper.getValueVectorClass(type.getMinorType(), type.getMode());
vectors.add(output.addField(field, vvClass));
}
for (int i = 0; i < selectedPartitionNames.size(); i++) {
MajorType type = HiveUtilities.getMajorTypeFromHiveTypeInfo(selectedPartitionTypes.get(i), options);
MaterializedField field = MaterializedField.create(selectedPartitionNames.get(i), type);
Class<? extends ValueVector> vvClass = TypeHelper.getValueVectorClass(field.getType().getMinorType(), field.getDataMode());
pVectors.add(output.addField(field, vvClass));
}
} catch (SchemaChangeException e) {
throw new ExecutionSetupException(e);
}
}
use of org.apache.drill.common.exceptions.ExecutionSetupException in project drill by apache.
the class HiveAbstractReader method init.
private void init() throws ExecutionSetupException {
final JobConf job = new JobConf(hiveConf);
// Get the configured default val
defaultPartitionValue = hiveConf.get(ConfVars.DEFAULTPARTITIONNAME.varname);
Properties tableProperties;
try {
tableProperties = HiveUtilities.getTableMetadata(table);
final Properties partitionProperties = (partition == null) ? tableProperties : HiveUtilities.getPartitionMetadata(partition, table);
HiveUtilities.addConfToJob(job, partitionProperties);
final SerDe tableSerDe = createSerDe(job, table.getSd().getSerdeInfo().getSerializationLib(), tableProperties);
final StructObjectInspector tableOI = getStructOI(tableSerDe);
if (partition != null) {
partitionSerDe = createSerDe(job, partition.getSd().getSerdeInfo().getSerializationLib(), partitionProperties);
partitionOI = getStructOI(partitionSerDe);
finalOI = (StructObjectInspector) ObjectInspectorConverters.getConvertedOI(partitionOI, tableOI);
partTblObjectInspectorConverter = ObjectInspectorConverters.getConverter(partitionOI, finalOI);
job.setInputFormat(HiveUtilities.getInputFormatClass(job, partition.getSd(), table));
} else {
// For non-partitioned tables, there is no need to create converter as there are no schema changes expected.
partitionSerDe = tableSerDe;
partitionOI = tableOI;
partTblObjectInspectorConverter = null;
finalOI = tableOI;
job.setInputFormat(HiveUtilities.getInputFormatClass(job, table.getSd(), table));
}
if (logger.isTraceEnabled()) {
for (StructField field : finalOI.getAllStructFieldRefs()) {
logger.trace("field in finalOI: {}", field.getClass().getName());
}
logger.trace("partitionSerDe class is {} {}", partitionSerDe.getClass().getName());
}
// Get list of partition column names
final List<String> partitionNames = Lists.newArrayList();
for (FieldSchema field : table.getPartitionKeys()) {
partitionNames.add(field.getName());
}
// We should always get the columns names from ObjectInspector. For some of the tables (ex. avro) metastore
// may not contain the schema, instead it is derived from other sources such as table properties or external file.
// SerDe object knows how to get the schema with all the config and table properties passed in initialization.
// ObjectInspector created from the SerDe object has the schema.
final StructTypeInfo sTypeInfo = (StructTypeInfo) TypeInfoUtils.getTypeInfoFromObjectInspector(finalOI);
final List<String> tableColumnNames = sTypeInfo.getAllStructFieldNames();
// Select list of columns for project pushdown into Hive SerDe readers.
final List<Integer> columnIds = Lists.newArrayList();
if (isStarQuery()) {
selectedColumnNames = tableColumnNames;
for (int i = 0; i < selectedColumnNames.size(); i++) {
columnIds.add(i);
}
selectedPartitionNames = partitionNames;
} else {
selectedColumnNames = Lists.newArrayList();
for (SchemaPath field : getColumns()) {
String columnName = field.getRootSegment().getPath();
if (partitionNames.contains(columnName)) {
selectedPartitionNames.add(columnName);
} else {
columnIds.add(tableColumnNames.indexOf(columnName));
selectedColumnNames.add(columnName);
}
}
}
ColumnProjectionUtils.appendReadColumns(job, columnIds, selectedColumnNames);
for (String columnName : selectedColumnNames) {
StructField fieldRef = finalOI.getStructFieldRef(columnName);
selectedStructFieldRefs.add(fieldRef);
ObjectInspector fieldOI = fieldRef.getFieldObjectInspector();
TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(fieldOI.getTypeName());
selectedColumnObjInspectors.add(fieldOI);
selectedColumnTypes.add(typeInfo);
selectedColumnFieldConverters.add(HiveFieldConverter.create(typeInfo, fragmentContext));
}
for (int i = 0; i < selectedColumnNames.size(); ++i) {
logger.trace("inspector:typeName={}, className={}, TypeInfo: {}, converter:{}", selectedColumnObjInspectors.get(i).getTypeName(), selectedColumnObjInspectors.get(i).getClass().getName(), selectedColumnTypes.get(i).toString(), selectedColumnFieldConverters.get(i).getClass().getName());
}
for (int i = 0; i < table.getPartitionKeys().size(); i++) {
FieldSchema field = table.getPartitionKeys().get(i);
if (selectedPartitionNames.contains(field.getName())) {
TypeInfo pType = TypeInfoUtils.getTypeInfoFromTypeString(field.getType());
selectedPartitionTypes.add(pType);
if (partition != null) {
selectedPartitionValues.add(HiveUtilities.convertPartitionType(pType, partition.getValues().get(i), defaultPartitionValue));
}
}
}
} catch (Exception e) {
throw new ExecutionSetupException("Failure while initializing Hive Reader " + this.getClass().getName(), e);
}
if (!empty) {
try {
reader = (org.apache.hadoop.mapred.RecordReader<Object, Object>) job.getInputFormat().getRecordReader(inputSplit, job, Reporter.NULL);
logger.trace("hive reader created: {} for inputSplit {}", reader.getClass().getName(), inputSplit.toString());
} catch (Exception e) {
throw new ExecutionSetupException("Failed to get o.a.hadoop.mapred.RecordReader from Hive InputFormat", e);
}
internalInit(tableProperties, reader);
}
}
use of org.apache.drill.common.exceptions.ExecutionSetupException in project drill by apache.
the class HiveDrillNativeScanBatchCreator method getBatch.
@Override
public ScanBatch getBatch(FragmentContext context, HiveDrillNativeParquetSubScan config, List<RecordBatch> children) throws ExecutionSetupException {
final HiveTableWithColumnCache table = config.getTable();
final List<InputSplit> splits = config.getInputSplits();
final List<HivePartition> partitions = config.getPartitions();
final List<SchemaPath> columns = config.getColumns();
final String partitionDesignator = context.getOptions().getOption(ExecConstants.FILESYSTEM_PARTITION_COLUMN_LABEL).string_val;
List<Map<String, String>> implicitColumns = Lists.newLinkedList();
boolean selectAllQuery = AbstractRecordReader.isStarQuery(columns);
final boolean hasPartitions = (partitions != null && partitions.size() > 0);
final List<String[]> partitionColumns = Lists.newArrayList();
final List<Integer> selectedPartitionColumns = Lists.newArrayList();
List<SchemaPath> newColumns = columns;
if (!selectAllQuery) {
// Separate out the partition and non-partition columns. Non-partition columns are passed directly to the
// ParquetRecordReader. Partition columns are passed to ScanBatch.
newColumns = Lists.newArrayList();
Pattern pattern = Pattern.compile(String.format("%s[0-9]+", partitionDesignator));
for (SchemaPath column : columns) {
Matcher m = pattern.matcher(column.getAsUnescapedPath());
if (m.matches()) {
selectedPartitionColumns.add(Integer.parseInt(column.getAsUnescapedPath().substring(partitionDesignator.length())));
} else {
newColumns.add(column);
}
}
}
final OperatorContext oContext = context.newOperatorContext(config);
int currentPartitionIndex = 0;
final List<RecordReader> readers = Lists.newArrayList();
final HiveConf conf = config.getHiveConf();
// TODO: In future we can get this cache from Metadata cached on filesystem.
final Map<String, ParquetMetadata> footerCache = Maps.newHashMap();
Map<String, String> mapWithMaxColumns = Maps.newLinkedHashMap();
try {
for (InputSplit split : splits) {
final FileSplit fileSplit = (FileSplit) split;
final Path finalPath = fileSplit.getPath();
final JobConf cloneJob = new ProjectionPusher().pushProjectionsAndFilters(new JobConf(conf), finalPath.getParent());
final FileSystem fs = finalPath.getFileSystem(cloneJob);
ParquetMetadata parquetMetadata = footerCache.get(finalPath.toString());
if (parquetMetadata == null) {
parquetMetadata = ParquetFileReader.readFooter(cloneJob, finalPath);
footerCache.put(finalPath.toString(), parquetMetadata);
}
final List<Integer> rowGroupNums = getRowGroupNumbersFromFileSplit(fileSplit, parquetMetadata);
for (int rowGroupNum : rowGroupNums) {
//DRILL-5009 : Skip the row group if the row count is zero
if (parquetMetadata.getBlocks().get(rowGroupNum).getRowCount() == 0) {
continue;
}
// Drill has only ever written a single row group per file, only detect corruption
// in the first row group
ParquetReaderUtility.DateCorruptionStatus containsCorruptDates = ParquetReaderUtility.detectCorruptDates(parquetMetadata, config.getColumns(), true);
if (logger.isDebugEnabled()) {
logger.debug(containsCorruptDates.toString());
}
readers.add(new ParquetRecordReader(context, Path.getPathWithoutSchemeAndAuthority(finalPath).toString(), rowGroupNum, fs, CodecFactory.createDirectCodecFactory(fs.getConf(), new ParquetDirectByteBufferAllocator(oContext.getAllocator()), 0), parquetMetadata, newColumns, containsCorruptDates));
Map<String, String> implicitValues = Maps.newLinkedHashMap();
if (hasPartitions) {
List<String> values = partitions.get(currentPartitionIndex).getValues();
for (int i = 0; i < values.size(); i++) {
if (selectAllQuery || selectedPartitionColumns.contains(i)) {
implicitValues.put(partitionDesignator + i, values.get(i));
}
}
}
implicitColumns.add(implicitValues);
if (implicitValues.size() > mapWithMaxColumns.size()) {
mapWithMaxColumns = implicitValues;
}
}
currentPartitionIndex++;
}
} catch (final IOException | RuntimeException e) {
AutoCloseables.close(e, readers);
throw new ExecutionSetupException("Failed to create RecordReaders. " + e.getMessage(), e);
}
// all readers should have the same number of implicit columns, add missing ones with value null
mapWithMaxColumns = Maps.transformValues(mapWithMaxColumns, Functions.constant((String) null));
for (Map<String, String> map : implicitColumns) {
map.putAll(Maps.difference(map, mapWithMaxColumns).entriesOnlyOnRight());
}
// create an empty RecordReader to output the schema
if (readers.size() == 0) {
readers.add(new HiveDefaultReader(table, null, null, columns, context, conf, ImpersonationUtil.createProxyUgi(config.getUserName(), context.getQueryUserName())));
}
return new ScanBatch(config, context, oContext, readers.iterator(), implicitColumns);
}
use of org.apache.drill.common.exceptions.ExecutionSetupException in project drill by apache.
the class ImplCreator method getExec.
/**
* Create and return fragment RootExec for given FragmentRoot. RootExec has one or more RecordBatches as children
* (which may contain child RecordBatches and so on).
*
* @param context
* FragmentContext.
* @param root
* FragmentRoot.
* @return RootExec of fragment.
* @throws ExecutionSetupException
*/
public static RootExec getExec(FragmentContext context, FragmentRoot root) throws ExecutionSetupException {
Preconditions.checkNotNull(root);
Preconditions.checkNotNull(context);
if (AssertionUtil.isAssertionsEnabled() || context.getOptionSet().getOption(ExecConstants.ENABLE_ITERATOR_VALIDATOR) || context.getConfig().getBoolean(ExecConstants.ENABLE_ITERATOR_VALIDATION)) {
root = IteratorValidatorInjector.rewritePlanWithIteratorValidator(context, root);
}
final ImplCreator creator = new ImplCreator();
Stopwatch watch = Stopwatch.createStarted();
try {
final RootExec rootExec = creator.getRootExec(root, context);
// skip over this for SimpleRootExec (testing)
if (rootExec instanceof BaseRootExec) {
((BaseRootExec) rootExec).setOperators(creator.getOperators());
}
logger.debug("Took {} ms to create RecordBatch tree", watch.elapsed(TimeUnit.MILLISECONDS));
if (rootExec == null) {
throw new ExecutionSetupException("The provided fragment did not have a root node that correctly created a RootExec value.");
}
return rootExec;
} catch (Exception e) {
AutoCloseables.close(e, creator.getOperators());
context.fail(e);
}
return null;
}
use of org.apache.drill.common.exceptions.ExecutionSetupException in project drill by apache.
the class MongoScanBatchCreator method getBatch.
@Override
public ScanBatch getBatch(FragmentContext context, MongoSubScan subScan, List<RecordBatch> children) throws ExecutionSetupException {
Preconditions.checkArgument(children.isEmpty());
List<RecordReader> readers = Lists.newArrayList();
List<SchemaPath> columns = null;
for (MongoSubScan.MongoSubScanSpec scanSpec : subScan.getChunkScanSpecList()) {
try {
if ((columns = subScan.getColumns()) == null) {
columns = GroupScan.ALL_COLUMNS;
}
readers.add(new MongoRecordReader(scanSpec, columns, context, subScan.getMongoStoragePlugin()));
} catch (Exception e) {
logger.error("MongoRecordReader creation failed for subScan: " + subScan + ".");
logger.error(e.getMessage(), e);
throw new ExecutionSetupException(e);
}
}
logger.info("Number of record readers initialized : " + readers.size());
return new ScanBatch(subScan, context, readers.iterator());
}
Aggregations