use of org.apache.drill.exec.store.hive.HiveScan in project drill by apache.
the class HivePartitionDescriptor method getVectorType.
@Override
public TypeProtos.MajorType getVectorType(SchemaPath column, PlannerSettings plannerSettings) {
HiveScan hiveScan = (HiveScan) scanRel.getGroupScan();
String partitionName = column.getAsNamePart().getName();
Map<String, String> partitionNameTypeMap = hiveScan.hiveReadEntry.table.getPartitionNameTypeMap();
String hiveType = partitionNameTypeMap.get(partitionName);
PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(hiveType);
TypeProtos.MinorType partitionType = HiveUtilities.getMinorTypeFromHivePrimitiveTypeInfo(primitiveTypeInfo, plannerSettings.getOptions());
return TypeProtos.MajorType.newBuilder().setMode(TypeProtos.DataMode.OPTIONAL).setMinorType(partitionType).build();
}
use of org.apache.drill.exec.store.hive.HiveScan in project drill by apache.
the class ConvertHiveParquetScanToDrillParquetScan method createNativeScanRel.
/**
* Helper method which creates a DrillScalRel with native HiveScan.
*/
private DrillScanRel createNativeScanRel(final Map<String, String> partitionColMapping, final DrillScanRel hiveScanRel) throws Exception {
final RelDataTypeFactory typeFactory = hiveScanRel.getCluster().getTypeFactory();
final RelDataType varCharType = typeFactory.createSqlType(SqlTypeName.VARCHAR);
final List<String> nativeScanColNames = Lists.newArrayList();
final List<RelDataType> nativeScanColTypes = Lists.newArrayList();
for (RelDataTypeField field : hiveScanRel.getRowType().getFieldList()) {
final String dirColName = partitionColMapping.get(field.getName());
if (dirColName != null) {
// partition column
nativeScanColNames.add(dirColName);
nativeScanColTypes.add(varCharType);
} else {
nativeScanColNames.add(field.getName());
nativeScanColTypes.add(field.getType());
}
}
final RelDataType nativeScanRowType = typeFactory.createStructType(nativeScanColTypes, nativeScanColNames);
// Create the list of projected columns set in HiveScan. The order of this list may not be same as the order of
// columns in HiveScan row type. Note: If the HiveScan.getColumn() contains a '*', we just need to add it as it is,
// unlike above where we expanded the '*'. HiveScan and related (subscan) can handle '*'.
final List<SchemaPath> nativeScanCols = Lists.newArrayList();
for (SchemaPath colName : hiveScanRel.getColumns()) {
final String partitionCol = partitionColMapping.get(colName.getAsUnescapedPath());
if (partitionCol != null) {
nativeScanCols.add(SchemaPath.getSimplePath(partitionCol));
} else {
nativeScanCols.add(colName);
}
}
final HiveScan hiveScan = (HiveScan) hiveScanRel.getGroupScan();
final HiveDrillNativeParquetScan nativeHiveScan = new HiveDrillNativeParquetScan(hiveScan.getUserName(), hiveScan.hiveReadEntry, hiveScan.storagePlugin, nativeScanCols, null);
return new DrillScanRel(hiveScanRel.getCluster(), hiveScanRel.getTraitSet(), hiveScanRel.getTable(), nativeHiveScan, nativeScanRowType, nativeScanCols);
}
use of org.apache.drill.exec.store.hive.HiveScan in project drill by apache.
the class ConvertHiveParquetScanToDrillParquetScan method onMatch.
@Override
public void onMatch(RelOptRuleCall call) {
try {
final DrillScanRel hiveScanRel = (DrillScanRel) call.rel(0);
final HiveScan hiveScan = (HiveScan) hiveScanRel.getGroupScan();
final PlannerSettings settings = PrelUtil.getPlannerSettings(call.getPlanner());
final String partitionColumnLabel = settings.getFsPartitionColumnLabel();
final Table hiveTable = hiveScan.hiveReadEntry.getTable();
checkForUnsupportedDataTypes(hiveTable);
final Map<String, String> partitionColMapping = getPartitionColMapping(hiveTable, partitionColumnLabel);
final DrillScanRel nativeScanRel = createNativeScanRel(partitionColMapping, hiveScanRel);
if (hiveScanRel.getRowType().getFieldCount() == 0) {
call.transformTo(nativeScanRel);
} else {
final DrillProjectRel projectRel = createProjectRel(hiveScanRel, partitionColMapping, nativeScanRel);
call.transformTo(projectRel);
}
} catch (final Exception e) {
logger.warn("Failed to convert HiveScan to HiveDrillNativeParquetScan", e);
}
}
use of org.apache.drill.exec.store.hive.HiveScan in project drill by apache.
the class ConvertHiveParquetScanToDrillParquetScan method matches.
/**
* Rule is matched when all of the following match:
* 1) GroupScan in given DrillScalRel is an {@link HiveScan}
* 2) {@link HiveScan} is not already rewritten using Drill's native readers
* 3) InputFormat in Hive table metadata and all partitions metadata contains the same value
* {@link MapredParquetInputFormat}
* 4) No error occurred while checking for the above conditions. An error is logged as warning.
*
* @param call
* @return True if the rule can be applied. False otherwise
*/
@Override
public boolean matches(RelOptRuleCall call) {
final DrillScanRel scanRel = (DrillScanRel) call.rel(0);
if (!(scanRel.getGroupScan() instanceof HiveScan) || ((HiveScan) scanRel.getGroupScan()).isNativeReader()) {
return false;
}
final HiveScan hiveScan = (HiveScan) scanRel.getGroupScan();
final HiveConf hiveConf = hiveScan.getHiveConf();
final HiveTableWithColumnCache hiveTable = hiveScan.hiveReadEntry.getTable();
final Class<? extends InputFormat<?, ?>> tableInputFormat = getInputFormatFromSD(HiveUtilities.getTableMetadata(hiveTable), hiveScan.hiveReadEntry, hiveTable.getSd(), hiveConf);
if (tableInputFormat == null || !tableInputFormat.equals(MapredParquetInputFormat.class)) {
return false;
}
final List<HivePartitionWrapper> partitions = hiveScan.hiveReadEntry.getHivePartitionWrappers();
if (partitions == null) {
return true;
}
final List<FieldSchema> tableSchema = hiveTable.getSd().getCols();
// Make sure all partitions have the same input format as the table input format
for (HivePartitionWrapper partition : partitions) {
final StorageDescriptor partitionSD = partition.getPartition().getSd();
Class<? extends InputFormat<?, ?>> inputFormat = getInputFormatFromSD(HiveUtilities.getPartitionMetadata(partition.getPartition(), hiveTable), hiveScan.hiveReadEntry, partitionSD, hiveConf);
if (inputFormat == null || !inputFormat.equals(tableInputFormat)) {
return false;
}
// possible types. Drill doesn't have the similar set of methods yet.
if (!partitionSD.getCols().equals(tableSchema)) {
logger.debug("Partitions schema is different from table schema. Currently native reader conversion can't " + "handle schema difference between partitions and table");
return false;
}
}
return true;
}
Aggregations