Search in sources :

Example 1 with HiveToRelDataTypeConverter

use of org.apache.drill.exec.planner.types.HiveToRelDataTypeConverter in project drill by apache.

the class DrillHiveTable method getRowType.

@Override
public RelDataType getRowType(RelDataTypeFactory typeFactory) {
    HiveToRelDataTypeConverter dataTypeConverter = new HiveToRelDataTypeConverter(typeFactory);
    final List<String> fieldNames = new ArrayList<>();
    final List<RelDataType> fieldTypes = Stream.of(hiveTable.getColumnListsCache().getTableSchemaColumns(), hiveTable.getPartitionKeys()).flatMap(Collection::stream).peek(hiveField -> fieldNames.add(hiveField.getName())).map(dataTypeConverter::convertToNullableRelDataType).collect(Collectors.toList());
    return typeFactory.createStructType(fieldTypes, fieldNames);
}
Also used : HiveToRelDataTypeConverter(org.apache.drill.exec.planner.types.HiveToRelDataTypeConverter) ArrayList(java.util.ArrayList) RelDataType(org.apache.calcite.rel.type.RelDataType)

Example 2 with HiveToRelDataTypeConverter

use of org.apache.drill.exec.planner.types.HiveToRelDataTypeConverter in project drill by apache.

the class ConvertHiveMapRDBJsonScanToDrillMapRDBJsonScan method createNativeScanRel.

/**
 * Helper method which creates a DrillScanRel with native Drill HiveScan.
 */
private DrillScanRel createNativeScanRel(DrillScanRel hiveScanRel, PlannerSettings settings) throws IOException {
    RelDataTypeFactory typeFactory = hiveScanRel.getCluster().getTypeFactory();
    HiveScan hiveScan = (HiveScan) hiveScanRel.getGroupScan();
    HiveReadEntry hiveReadEntry = hiveScan.getHiveReadEntry();
    Map<String, String> parameters = hiveReadEntry.getHiveTableWrapper().getParameters();
    JsonScanSpec scanSpec = new JsonScanSpec(parameters.get(MAPRDB_TABLE_NAME), null, null);
    List<SchemaPath> hiveScanCols = hiveScanRel.getColumns().stream().map(colNameSchemaPath -> replaceOverriddenSchemaPath(parameters, colNameSchemaPath)).collect(Collectors.toList());
    // creates TupleMetadata based on Hive's schema (with optional data modes) to be used in the reader
    // for the case when column type wasn't discovered
    HiveToRelDataTypeConverter dataTypeConverter = new HiveToRelDataTypeConverter(typeFactory);
    TupleMetadata schema = new TupleSchema();
    hiveReadEntry.getTable().getColumnListsCache().getTableSchemaColumns().forEach(column -> schema.addColumn(HiveUtilities.getColumnMetadata(replaceOverriddenColumnId(parameters, column.getName()), dataTypeConverter.convertToNullableRelDataType(column))));
    MapRDBFormatPluginConfig formatConfig = new MapRDBFormatPluginConfig();
    formatConfig.readTimestampWithZoneOffset = settings.getOptions().getBoolean(ExecConstants.HIVE_READ_MAPRDB_JSON_TIMESTAMP_WITH_TIMEZONE_OFFSET);
    formatConfig.allTextMode = settings.getOptions().getBoolean(ExecConstants.HIVE_MAPRDB_JSON_ALL_TEXT_MODE);
    JsonTableGroupScan nativeMapRDBScan = new JsonTableGroupScan(hiveScan.getUserName(), hiveScan.getStoragePlugin(), // TODO: We should use Hive format plugins here, once it will be implemented. DRILL-6621
    (MapRDBFormatPlugin) hiveScan.getStoragePlugin().getFormatPlugin(formatConfig), scanSpec, hiveScanCols, new MapRDBStatistics(), FileSystemMetadataProviderManager.getMetadataProviderForSchema(schema));
    List<String> nativeScanColNames = hiveScanRel.getRowType().getFieldList().stream().map(field -> replaceOverriddenColumnId(parameters, field.getName())).collect(Collectors.toList());
    List<RelDataType> nativeScanColTypes = hiveScanRel.getRowType().getFieldList().stream().map(RelDataTypeField::getType).collect(Collectors.toList());
    RelDataType nativeScanRowType = typeFactory.createStructType(nativeScanColTypes, nativeScanColNames);
    return new DrillScanRel(hiveScanRel.getCluster(), hiveScanRel.getTraitSet(), hiveScanRel.getTable(), nativeMapRDBScan, nativeScanRowType, hiveScanCols);
}
Also used : JsonScanSpec(org.apache.drill.exec.store.mapr.db.json.JsonScanSpec) RelDataTypeFactory(org.apache.calcite.rel.type.RelDataTypeFactory) DrillScanRel(org.apache.drill.exec.planner.logical.DrillScanRel) HiveUtilities(org.apache.drill.exec.store.hive.HiveUtilities) MapRDBFormatPluginConfig(org.apache.drill.exec.store.mapr.db.MapRDBFormatPluginConfig) HiveToRelDataTypeConverter(org.apache.drill.exec.planner.types.HiveToRelDataTypeConverter) RelOptHelper(org.apache.drill.exec.planner.logical.RelOptHelper) Map(java.util.Map) MapRDBStatistics(org.apache.drill.exec.planner.index.MapRDBStatistics) MapRDBFormatPlugin(org.apache.drill.exec.store.mapr.db.MapRDBFormatPlugin) TupleSchema(org.apache.drill.exec.record.metadata.TupleSchema) RelDataType(org.apache.calcite.rel.type.RelDataType) PrelUtil(org.apache.drill.exec.planner.physical.PrelUtil) StoragePluginOptimizerRule(org.apache.drill.exec.store.StoragePluginOptimizerRule) HiveMapRDBJsonInputFormat(org.apache.hadoop.hive.maprdb.json.input.HiveMapRDBJsonInputFormat) HiveMetadataProvider(org.apache.drill.exec.store.hive.HiveMetadataProvider) SchemaPath(org.apache.drill.common.expression.SchemaPath) IOException(java.io.IOException) FileSystemMetadataProviderManager(org.apache.drill.exec.metastore.store.FileSystemMetadataProviderManager) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) Collectors(java.util.stream.Collectors) RelOptRuleCall(org.apache.calcite.plan.RelOptRuleCall) HiveScan(org.apache.drill.exec.store.hive.HiveScan) HiveReadEntry(org.apache.drill.exec.store.hive.HiveReadEntry) List(java.util.List) PlannerSettings(org.apache.drill.exec.planner.physical.PlannerSettings) RelDataTypeField(org.apache.calcite.rel.type.RelDataTypeField) JsonTableGroupScan(org.apache.drill.exec.store.mapr.db.json.JsonTableGroupScan) ExecConstants(org.apache.drill.exec.ExecConstants) DocumentConstants(org.ojai.DocumentConstants) MapRDBStatistics(org.apache.drill.exec.planner.index.MapRDBStatistics) DrillScanRel(org.apache.drill.exec.planner.logical.DrillScanRel) MapRDBFormatPluginConfig(org.apache.drill.exec.store.mapr.db.MapRDBFormatPluginConfig) RelDataType(org.apache.calcite.rel.type.RelDataType) TupleSchema(org.apache.drill.exec.record.metadata.TupleSchema) HiveReadEntry(org.apache.drill.exec.store.hive.HiveReadEntry) SchemaPath(org.apache.drill.common.expression.SchemaPath) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) RelDataTypeFactory(org.apache.calcite.rel.type.RelDataTypeFactory) HiveScan(org.apache.drill.exec.store.hive.HiveScan) HiveToRelDataTypeConverter(org.apache.drill.exec.planner.types.HiveToRelDataTypeConverter) JsonTableGroupScan(org.apache.drill.exec.store.mapr.db.json.JsonTableGroupScan) JsonScanSpec(org.apache.drill.exec.store.mapr.db.json.JsonScanSpec)

Aggregations

RelDataType (org.apache.calcite.rel.type.RelDataType)2 HiveToRelDataTypeConverter (org.apache.drill.exec.planner.types.HiveToRelDataTypeConverter)2 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 List (java.util.List)1 Map (java.util.Map)1 Collectors (java.util.stream.Collectors)1 RelOptRuleCall (org.apache.calcite.plan.RelOptRuleCall)1 RelDataTypeFactory (org.apache.calcite.rel.type.RelDataTypeFactory)1 RelDataTypeField (org.apache.calcite.rel.type.RelDataTypeField)1 SchemaPath (org.apache.drill.common.expression.SchemaPath)1 ExecConstants (org.apache.drill.exec.ExecConstants)1 FileSystemMetadataProviderManager (org.apache.drill.exec.metastore.store.FileSystemMetadataProviderManager)1 MapRDBStatistics (org.apache.drill.exec.planner.index.MapRDBStatistics)1 DrillScanRel (org.apache.drill.exec.planner.logical.DrillScanRel)1 RelOptHelper (org.apache.drill.exec.planner.logical.RelOptHelper)1 PlannerSettings (org.apache.drill.exec.planner.physical.PlannerSettings)1 PrelUtil (org.apache.drill.exec.planner.physical.PrelUtil)1 TupleMetadata (org.apache.drill.exec.record.metadata.TupleMetadata)1 TupleSchema (org.apache.drill.exec.record.metadata.TupleSchema)1