use of org.apache.hadoop.hive.metastore.api.StorageDescriptor in project incubator-gobblin by apache.
the class ConvertibleHiveDatasetTest method getTestTable.
public static Table getTestTable(String dbName, String tableName) {
Table table = new Table();
table.setDbName(dbName);
table.setTableName(tableName);
StorageDescriptor sd = new StorageDescriptor();
sd.setLocation("/tmp/test");
table.setSd(sd);
return table;
}
use of org.apache.hadoop.hive.metastore.api.StorageDescriptor in project drill by axbaretto.
the class ConvertHiveParquetScanToDrillParquetScan method matches.
/**
* Rule is matched when all of the following match:
* 1) GroupScan in given DrillScalRel is an {@link HiveScan}
* 2) {@link HiveScan} is not already rewritten using Drill's native readers
* 3) InputFormat in Hive table metadata and all partitions metadata contains the same value
* {@link MapredParquetInputFormat}
* 4) No error occurred while checking for the above conditions. An error is logged as warning.
*
* @param call
* @return True if the rule can be applied. False otherwise
*/
@Override
public boolean matches(RelOptRuleCall call) {
final DrillScanRel scanRel = (DrillScanRel) call.rel(0);
if (!(scanRel.getGroupScan() instanceof HiveScan) || ((HiveScan) scanRel.getGroupScan()).isNativeReader()) {
return false;
}
final HiveScan hiveScan = (HiveScan) scanRel.getGroupScan();
final HiveConf hiveConf = hiveScan.getHiveConf();
final HiveTableWithColumnCache hiveTable = hiveScan.getHiveReadEntry().getTable();
final Class<? extends InputFormat<?, ?>> tableInputFormat = getInputFormatFromSD(HiveUtilities.getTableMetadata(hiveTable), hiveScan.getHiveReadEntry(), hiveTable.getSd(), hiveConf);
if (tableInputFormat == null || !tableInputFormat.equals(MapredParquetInputFormat.class)) {
return false;
}
final List<HivePartitionWrapper> partitions = hiveScan.getHiveReadEntry().getHivePartitionWrappers();
if (partitions == null) {
return true;
}
final List<FieldSchema> tableSchema = hiveTable.getSd().getCols();
// Make sure all partitions have the same input format as the table input format
for (HivePartitionWrapper partition : partitions) {
final StorageDescriptor partitionSD = partition.getPartition().getSd();
Class<? extends InputFormat<?, ?>> inputFormat = getInputFormatFromSD(HiveUtilities.getPartitionMetadata(partition.getPartition(), hiveTable), hiveScan.getHiveReadEntry(), partitionSD, hiveConf);
if (inputFormat == null || !inputFormat.equals(tableInputFormat)) {
return false;
}
// possible types. Drill doesn't have the similar set of methods yet.
if (!partitionSD.getCols().equals(tableSchema)) {
logger.debug("Partitions schema is different from table schema. Currently native reader conversion can't " + "handle schema difference between partitions and table");
return false;
}
}
return true;
}
use of org.apache.hadoop.hive.metastore.api.StorageDescriptor in project atlas by apache.
the class BaseHiveEvent method getStorageDescEntity.
protected AtlasEntity getStorageDescEntity(AtlasObjectId tableId, Table table) {
String sdQualifiedName = getQualifiedName(table, table.getSd());
boolean isKnownTable = tableId.getGuid() == null;
AtlasEntity ret = context.getEntity(sdQualifiedName);
if (ret == null) {
ret = new AtlasEntity(HIVE_TYPE_STORAGEDESC);
// - cause Atlas server to resolve the entity by its qualifiedName
if (isKnownTable) {
ret.setGuid(null);
}
StorageDescriptor sd = table.getSd();
ret.setAttribute(ATTRIBUTE_TABLE, tableId);
ret.setAttribute(ATTRIBUTE_QUALIFIED_NAME, sdQualifiedName);
ret.setAttribute(ATTRIBUTE_PARAMETERS, sd.getParameters());
ret.setAttribute(ATTRIBUTE_LOCATION, HdfsNameServiceResolver.getInstance().getPathWithNameServiceID(sd.getLocation()));
ret.setAttribute(ATTRIBUTE_INPUT_FORMAT, sd.getInputFormat());
ret.setAttribute(ATTRIBUTE_OUTPUT_FORMAT, sd.getOutputFormat());
ret.setAttribute(ATTRIBUTE_COMPRESSED, sd.isCompressed());
ret.setAttribute(ATTRIBUTE_NUM_BUCKETS, sd.getNumBuckets());
ret.setAttribute(ATTRIBUTE_STORED_AS_SUB_DIRECTORIES, sd.isStoredAsSubDirectories());
if (sd.getBucketCols().size() > 0) {
ret.setAttribute(ATTRIBUTE_BUCKET_COLS, sd.getBucketCols());
}
if (sd.getSerdeInfo() != null) {
AtlasStruct serdeInfo = new AtlasStruct(HIVE_TYPE_SERDE);
SerDeInfo sdSerDeInfo = sd.getSerdeInfo();
serdeInfo.setAttribute(ATTRIBUTE_NAME, sdSerDeInfo.getName());
serdeInfo.setAttribute(ATTRIBUTE_SERIALIZATION_LIB, sdSerDeInfo.getSerializationLib());
serdeInfo.setAttribute(ATTRIBUTE_PARAMETERS, sdSerDeInfo.getParameters());
ret.setAttribute(ATTRIBUTE_SERDE_INFO, serdeInfo);
}
if (CollectionUtils.isNotEmpty(sd.getSortCols())) {
List<AtlasStruct> sortCols = new ArrayList<>(sd.getSortCols().size());
for (Order sdSortCol : sd.getSortCols()) {
AtlasStruct sortcol = new AtlasStruct(HIVE_TYPE_ORDER);
sortcol.setAttribute("col", sdSortCol.getCol());
sortcol.setAttribute("order", sdSortCol.getOrder());
sortCols.add(sortcol);
}
ret.setAttribute(ATTRIBUTE_SORT_COLS, sortCols);
}
context.putEntity(sdQualifiedName, ret);
}
return ret;
}
use of org.apache.hadoop.hive.metastore.api.StorageDescriptor in project presto by prestodb.
the class ThriftMetastoreUtil method makeStorageDescriptor.
private static StorageDescriptor makeStorageDescriptor(String tableName, List<Column> columns, Storage storage, ColumnConverter columnConverter) {
if (storage.isSkewed()) {
throw new IllegalArgumentException("Writing to skewed table/partition is not supported");
}
SerDeInfo serdeInfo = new SerDeInfo();
serdeInfo.setName(tableName);
serdeInfo.setSerializationLib(storage.getStorageFormat().getSerDeNullable());
serdeInfo.setParameters(storage.getSerdeParameters());
StorageDescriptor sd = new StorageDescriptor();
sd.setLocation(emptyToNull(storage.getLocation()));
sd.setCols(columns.stream().map(col -> ThriftMetastoreUtil.toMetastoreApiFieldSchema(col, columnConverter)).collect(toList()));
sd.setSerdeInfo(serdeInfo);
sd.setInputFormat(storage.getStorageFormat().getInputFormatNullable());
sd.setOutputFormat(storage.getStorageFormat().getOutputFormatNullable());
sd.setParameters(storage.getParameters());
Optional<HiveBucketProperty> bucketProperty = storage.getBucketProperty();
if (bucketProperty.isPresent()) {
sd.setNumBuckets(bucketProperty.get().getBucketCount());
sd.setBucketCols(bucketProperty.get().getBucketedBy());
if (!bucketProperty.get().getSortedBy().isEmpty()) {
sd.setSortCols(bucketProperty.get().getSortedBy().stream().map(column -> new Order(column.getColumnName(), column.getOrder().getHiveOrder())).collect(toList()));
}
}
return sd;
}
use of org.apache.hadoop.hive.metastore.api.StorageDescriptor in project presto by prestodb.
the class ThriftMetastoreUtil method fromMetastoreApiPartition.
public static Partition fromMetastoreApiPartition(org.apache.hadoop.hive.metastore.api.Partition partition, PartitionMutator partitionMutator, ColumnConverter columnConverter) {
StorageDescriptor storageDescriptor = partition.getSd();
if (storageDescriptor == null) {
throw new PrestoException(HIVE_INVALID_METADATA, "Partition does not contain a storage descriptor: " + partition);
}
Partition.Builder partitionBuilder = Partition.builder().setDatabaseName(partition.getDbName()).setTableName(partition.getTableName()).setValues(partition.getValues()).setColumns(storageDescriptor.getCols().stream().map(fieldSchema -> columnConverter.toColumn(fieldSchema)).collect(toList())).setParameters(partition.getParameters()).setCreateTime(partition.getCreateTime());
// mutate apache partition to Presto partition
partitionMutator.mutate(partitionBuilder, partition);
fromMetastoreApiStorageDescriptor(storageDescriptor, partitionBuilder.getStorageBuilder(), format("%s.%s", partition.getTableName(), partition.getValues()));
return partitionBuilder.build();
}
Aggregations