Search in sources :

Example 61 with StorageDescriptor

use of org.apache.hadoop.hive.metastore.api.StorageDescriptor in project incubator-gobblin by apache.

the class ConvertibleHiveDatasetTest method getTestTable.

public static Table getTestTable(String dbName, String tableName) {
    Table table = new Table();
    table.setDbName(dbName);
    table.setTableName(tableName);
    StorageDescriptor sd = new StorageDescriptor();
    sd.setLocation("/tmp/test");
    table.setSd(sd);
    return table;
}
Also used : Table(org.apache.hadoop.hive.metastore.api.Table) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor)

Example 62 with StorageDescriptor

use of org.apache.hadoop.hive.metastore.api.StorageDescriptor in project drill by axbaretto.

the class ConvertHiveParquetScanToDrillParquetScan method matches.

/**
 * Rule is matched when all of the following match:
 * 1) GroupScan in given DrillScalRel is an {@link HiveScan}
 * 2) {@link HiveScan} is not already rewritten using Drill's native readers
 * 3) InputFormat in Hive table metadata and all partitions metadata contains the same value
 *    {@link MapredParquetInputFormat}
 * 4) No error occurred while checking for the above conditions. An error is logged as warning.
 *
 * @param call
 * @return True if the rule can be applied. False otherwise
 */
@Override
public boolean matches(RelOptRuleCall call) {
    final DrillScanRel scanRel = (DrillScanRel) call.rel(0);
    if (!(scanRel.getGroupScan() instanceof HiveScan) || ((HiveScan) scanRel.getGroupScan()).isNativeReader()) {
        return false;
    }
    final HiveScan hiveScan = (HiveScan) scanRel.getGroupScan();
    final HiveConf hiveConf = hiveScan.getHiveConf();
    final HiveTableWithColumnCache hiveTable = hiveScan.getHiveReadEntry().getTable();
    final Class<? extends InputFormat<?, ?>> tableInputFormat = getInputFormatFromSD(HiveUtilities.getTableMetadata(hiveTable), hiveScan.getHiveReadEntry(), hiveTable.getSd(), hiveConf);
    if (tableInputFormat == null || !tableInputFormat.equals(MapredParquetInputFormat.class)) {
        return false;
    }
    final List<HivePartitionWrapper> partitions = hiveScan.getHiveReadEntry().getHivePartitionWrappers();
    if (partitions == null) {
        return true;
    }
    final List<FieldSchema> tableSchema = hiveTable.getSd().getCols();
    // Make sure all partitions have the same input format as the table input format
    for (HivePartitionWrapper partition : partitions) {
        final StorageDescriptor partitionSD = partition.getPartition().getSd();
        Class<? extends InputFormat<?, ?>> inputFormat = getInputFormatFromSD(HiveUtilities.getPartitionMetadata(partition.getPartition(), hiveTable), hiveScan.getHiveReadEntry(), partitionSD, hiveConf);
        if (inputFormat == null || !inputFormat.equals(tableInputFormat)) {
            return false;
        }
        // possible types. Drill doesn't have the similar set of methods yet.
        if (!partitionSD.getCols().equals(tableSchema)) {
            logger.debug("Partitions schema is different from table schema. Currently native reader conversion can't " + "handle schema difference between partitions and table");
            return false;
        }
    }
    return true;
}
Also used : DrillScanRel(org.apache.drill.exec.planner.logical.DrillScanRel) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) HiveScan(org.apache.drill.exec.store.hive.HiveScan) HivePartitionWrapper(org.apache.drill.exec.store.hive.HiveTableWrapper.HivePartitionWrapper) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) HiveConf(org.apache.hadoop.hive.conf.HiveConf) HiveTableWithColumnCache(org.apache.drill.exec.store.hive.HiveTableWithColumnCache)

Example 63 with StorageDescriptor

use of org.apache.hadoop.hive.metastore.api.StorageDescriptor in project atlas by apache.

the class BaseHiveEvent method getStorageDescEntity.

protected AtlasEntity getStorageDescEntity(AtlasObjectId tableId, Table table) {
    String sdQualifiedName = getQualifiedName(table, table.getSd());
    boolean isKnownTable = tableId.getGuid() == null;
    AtlasEntity ret = context.getEntity(sdQualifiedName);
    if (ret == null) {
        ret = new AtlasEntity(HIVE_TYPE_STORAGEDESC);
        // - cause Atlas server to resolve the entity by its qualifiedName
        if (isKnownTable) {
            ret.setGuid(null);
        }
        StorageDescriptor sd = table.getSd();
        ret.setAttribute(ATTRIBUTE_TABLE, tableId);
        ret.setAttribute(ATTRIBUTE_QUALIFIED_NAME, sdQualifiedName);
        ret.setAttribute(ATTRIBUTE_PARAMETERS, sd.getParameters());
        ret.setAttribute(ATTRIBUTE_LOCATION, HdfsNameServiceResolver.getInstance().getPathWithNameServiceID(sd.getLocation()));
        ret.setAttribute(ATTRIBUTE_INPUT_FORMAT, sd.getInputFormat());
        ret.setAttribute(ATTRIBUTE_OUTPUT_FORMAT, sd.getOutputFormat());
        ret.setAttribute(ATTRIBUTE_COMPRESSED, sd.isCompressed());
        ret.setAttribute(ATTRIBUTE_NUM_BUCKETS, sd.getNumBuckets());
        ret.setAttribute(ATTRIBUTE_STORED_AS_SUB_DIRECTORIES, sd.isStoredAsSubDirectories());
        if (sd.getBucketCols().size() > 0) {
            ret.setAttribute(ATTRIBUTE_BUCKET_COLS, sd.getBucketCols());
        }
        if (sd.getSerdeInfo() != null) {
            AtlasStruct serdeInfo = new AtlasStruct(HIVE_TYPE_SERDE);
            SerDeInfo sdSerDeInfo = sd.getSerdeInfo();
            serdeInfo.setAttribute(ATTRIBUTE_NAME, sdSerDeInfo.getName());
            serdeInfo.setAttribute(ATTRIBUTE_SERIALIZATION_LIB, sdSerDeInfo.getSerializationLib());
            serdeInfo.setAttribute(ATTRIBUTE_PARAMETERS, sdSerDeInfo.getParameters());
            ret.setAttribute(ATTRIBUTE_SERDE_INFO, serdeInfo);
        }
        if (CollectionUtils.isNotEmpty(sd.getSortCols())) {
            List<AtlasStruct> sortCols = new ArrayList<>(sd.getSortCols().size());
            for (Order sdSortCol : sd.getSortCols()) {
                AtlasStruct sortcol = new AtlasStruct(HIVE_TYPE_ORDER);
                sortcol.setAttribute("col", sdSortCol.getCol());
                sortcol.setAttribute("order", sdSortCol.getOrder());
                sortCols.add(sortcol);
            }
            ret.setAttribute(ATTRIBUTE_SORT_COLS, sortCols);
        }
        context.putEntity(sdQualifiedName, ret);
    }
    return ret;
}
Also used : AtlasStruct(org.apache.atlas.model.instance.AtlasStruct) Order(org.apache.hadoop.hive.metastore.api.Order) AtlasEntity(org.apache.atlas.model.instance.AtlasEntity) SerDeInfo(org.apache.hadoop.hive.metastore.api.SerDeInfo) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) ArrayList(java.util.ArrayList)

Example 64 with StorageDescriptor

use of org.apache.hadoop.hive.metastore.api.StorageDescriptor in project presto by prestodb.

the class ThriftMetastoreUtil method makeStorageDescriptor.

private static StorageDescriptor makeStorageDescriptor(String tableName, List<Column> columns, Storage storage, ColumnConverter columnConverter) {
    if (storage.isSkewed()) {
        throw new IllegalArgumentException("Writing to skewed table/partition is not supported");
    }
    SerDeInfo serdeInfo = new SerDeInfo();
    serdeInfo.setName(tableName);
    serdeInfo.setSerializationLib(storage.getStorageFormat().getSerDeNullable());
    serdeInfo.setParameters(storage.getSerdeParameters());
    StorageDescriptor sd = new StorageDescriptor();
    sd.setLocation(emptyToNull(storage.getLocation()));
    sd.setCols(columns.stream().map(col -> ThriftMetastoreUtil.toMetastoreApiFieldSchema(col, columnConverter)).collect(toList()));
    sd.setSerdeInfo(serdeInfo);
    sd.setInputFormat(storage.getStorageFormat().getInputFormatNullable());
    sd.setOutputFormat(storage.getStorageFormat().getOutputFormatNullable());
    sd.setParameters(storage.getParameters());
    Optional<HiveBucketProperty> bucketProperty = storage.getBucketProperty();
    if (bucketProperty.isPresent()) {
        sd.setNumBuckets(bucketProperty.get().getBucketCount());
        sd.setBucketCols(bucketProperty.get().getBucketedBy());
        if (!bucketProperty.get().getSortedBy().isEmpty()) {
            sd.setSortCols(bucketProperty.get().getSortedBy().stream().map(column -> new Order(column.getColumnName(), column.getOrder().getHiveOrder())).collect(toList()));
        }
    }
    return sd;
}
Also used : Order(org.apache.hadoop.hive.metastore.api.Order) HiveBucketProperty(com.facebook.presto.hive.HiveBucketProperty) SerDeInfo(org.apache.hadoop.hive.metastore.api.SerDeInfo) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor)

Example 65 with StorageDescriptor

use of org.apache.hadoop.hive.metastore.api.StorageDescriptor in project presto by prestodb.

the class ThriftMetastoreUtil method fromMetastoreApiPartition.

public static Partition fromMetastoreApiPartition(org.apache.hadoop.hive.metastore.api.Partition partition, PartitionMutator partitionMutator, ColumnConverter columnConverter) {
    StorageDescriptor storageDescriptor = partition.getSd();
    if (storageDescriptor == null) {
        throw new PrestoException(HIVE_INVALID_METADATA, "Partition does not contain a storage descriptor: " + partition);
    }
    Partition.Builder partitionBuilder = Partition.builder().setDatabaseName(partition.getDbName()).setTableName(partition.getTableName()).setValues(partition.getValues()).setColumns(storageDescriptor.getCols().stream().map(fieldSchema -> columnConverter.toColumn(fieldSchema)).collect(toList())).setParameters(partition.getParameters()).setCreateTime(partition.getCreateTime());
    // mutate apache partition to Presto partition
    partitionMutator.mutate(partitionBuilder, partition);
    fromMetastoreApiStorageDescriptor(storageDescriptor, partitionBuilder.getStorageBuilder(), format("%s.%s", partition.getTableName(), partition.getValues()));
    return partitionBuilder.build();
}
Also used : Partition(com.facebook.presto.hive.metastore.Partition) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) PrestoException(com.facebook.presto.spi.PrestoException)

Aggregations

StorageDescriptor (org.apache.hadoop.hive.metastore.api.StorageDescriptor)284 SerDeInfo (org.apache.hadoop.hive.metastore.api.SerDeInfo)163 Table (org.apache.hadoop.hive.metastore.api.Table)159 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)155 ArrayList (java.util.ArrayList)134 Test (org.junit.Test)131 Partition (org.apache.hadoop.hive.metastore.api.Partition)97 HashMap (java.util.HashMap)61 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)38 List (java.util.List)35 Order (org.apache.hadoop.hive.metastore.api.Order)33 Path (org.apache.hadoop.fs.Path)30 ColumnStatistics (org.apache.hadoop.hive.metastore.api.ColumnStatistics)30 ColumnStatisticsDesc (org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc)30 ColumnStatisticsData (org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)29 ColumnStatisticsObj (org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj)29 AggrStats (org.apache.hadoop.hive.metastore.api.AggrStats)27 Database (org.apache.hadoop.hive.metastore.api.Database)25 SkewedInfo (org.apache.hadoop.hive.metastore.api.SkewedInfo)23 IOException (java.io.IOException)15