Search in sources :

Example 1 with FieldInfo

use of com.netflix.metacat.common.server.connectors.model.FieldInfo in project metacat by Netflix.

the class S3ConnectorInfoConverter method toFields.

private List<FieldInfo> toFields(final Table table) {
    List<FieldInfo> result = Lists.newArrayList();
    final Location location = table.getLocation();
    if (location != null) {
        final Schema schema = location.getSchema();
        if (schema != null) {
            result = schema.getFields().stream().sorted(Comparator.comparing(Field::getPos)).map(this::toFieldInfo).collect(Collectors.toList());
        }
    }
    return result;
}
Also used : Field(com.netflix.metacat.connector.s3.model.Field) Schema(com.netflix.metacat.connector.s3.model.Schema) FieldInfo(com.netflix.metacat.common.server.connectors.model.FieldInfo) Location(com.netflix.metacat.connector.s3.model.Location)

Example 2 with FieldInfo

use of com.netflix.metacat.common.server.connectors.model.FieldInfo in project metacat by Netflix.

the class S3ConnectorInfoConverter method toFields.

/**
 * Creates list of fields from table info.
 * @param tableInfo table info
 * @param schema schema
 * @return list of fields
 */
public List<Field> toFields(final TableInfo tableInfo, final Schema schema) {
    final ImmutableList.Builder<Field> columns = ImmutableList.builder();
    int index = 0;
    for (FieldInfo fieldInfo : tableInfo.getFields()) {
        final Field field = toField(fieldInfo);
        field.setPos(index++);
        field.setSchema(schema);
        columns.add(field);
    }
    return columns.build();
}
Also used : Field(com.netflix.metacat.connector.s3.model.Field) ImmutableList(com.google.common.collect.ImmutableList) FieldInfo(com.netflix.metacat.common.server.connectors.model.FieldInfo)

Example 3 with FieldInfo

use of com.netflix.metacat.common.server.connectors.model.FieldInfo in project metacat by Netflix.

the class HiveConnectorInfoConverter method toTableInfo.

/**
 * Converts to TableDto.
 *
 * @param table connector table
 * @return Metacat table Info
 */
@Override
public TableInfo toTableInfo(final QualifiedName name, final Table table) {
    final List<FieldSchema> nonPartitionColumns = (table.getSd() != null) ? table.getSd().getCols() : Collections.emptyList();
    // ignore all exceptions
    try {
        if (nonPartitionColumns.isEmpty()) {
            for (StructField field : HiveTableUtil.getTableStructFields(table)) {
                final FieldSchema fieldSchema = new FieldSchema(field.getFieldName(), field.getFieldObjectInspector().getTypeName(), field.getFieldComment());
                nonPartitionColumns.add(fieldSchema);
            }
        }
    } catch (final Exception e) {
        log.error(e.getMessage(), e);
    }
    final List<FieldSchema> partitionColumns = table.getPartitionKeys();
    final Date creationDate = table.isSetCreateTime() ? epochSecondsToDate(table.getCreateTime()) : null;
    final List<FieldInfo> allFields = Lists.newArrayListWithCapacity(nonPartitionColumns.size() + partitionColumns.size());
    nonPartitionColumns.stream().map(field -> hiveToMetacatField(field, false)).forEachOrdered(allFields::add);
    partitionColumns.stream().map(field -> hiveToMetacatField(field, true)).forEachOrdered(allFields::add);
    final AuditInfo auditInfo = AuditInfo.builder().createdDate(creationDate).build();
    if (null != table.getTableType() && table.getTableType().equals(TableType.VIRTUAL_VIEW.name())) {
        return TableInfo.builder().serde(toStorageInfo(table.getSd(), table.getOwner())).fields(allFields).metadata(table.getParameters()).name(name).auditInfo(auditInfo).view(ViewInfo.builder().viewOriginalText(table.getViewOriginalText()).viewExpandedText(table.getViewExpandedText()).build()).build();
    } else {
        return TableInfo.builder().serde(toStorageInfo(table.getSd(), table.getOwner())).fields(allFields).metadata(table.getParameters()).name(name).auditInfo(auditInfo).build();
    }
}
Also used : Date(java.util.Date) AuditInfo(com.netflix.metacat.common.server.connectors.model.AuditInfo) HashMap(java.util.HashMap) SerDeInfo(org.apache.hadoop.hive.metastore.api.SerDeInfo) StringUtils(org.apache.commons.lang3.StringUtils) Partition(org.apache.hadoop.hive.metastore.api.Partition) DirectSqlTable(com.netflix.metacat.connector.hive.sql.DirectSqlTable) Strings(com.google.common.base.Strings) DatabaseInfo(com.netflix.metacat.common.server.connectors.model.DatabaseInfo) FieldInfo(com.netflix.metacat.common.server.connectors.model.FieldInfo) Lists(com.google.common.collect.Lists) ImmutableList(com.google.common.collect.ImmutableList) ConnectorInfoConverter(com.netflix.metacat.common.server.connectors.ConnectorInfoConverter) PartitionInfo(com.netflix.metacat.common.server.connectors.model.PartitionInfo) Map(java.util.Map) IcebergTableWrapper(com.netflix.metacat.connector.hive.iceberg.IcebergTableWrapper) StorageInfo(com.netflix.metacat.common.server.connectors.model.StorageInfo) LinkedList(java.util.LinkedList) Splitter(com.google.common.base.Splitter) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) ViewInfo(com.netflix.metacat.common.server.connectors.model.ViewInfo) QualifiedName(com.netflix.metacat.common.QualifiedName) Instant(java.time.Instant) Collectors(java.util.stream.Collectors) Table(org.apache.hadoop.hive.metastore.api.Table) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) Slf4j(lombok.extern.slf4j.Slf4j) List(java.util.List) TableInfo(com.netflix.metacat.common.server.connectors.model.TableInfo) HiveTableUtil(com.netflix.metacat.connector.hive.util.HiveTableUtil) TableType(org.apache.hadoop.hive.metastore.TableType) Preconditions(com.google.common.base.Preconditions) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Database(org.apache.hadoop.hive.metastore.api.Database) Collections(java.util.Collections) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) AuditInfo(com.netflix.metacat.common.server.connectors.model.AuditInfo) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) Date(java.util.Date) FieldInfo(com.netflix.metacat.common.server.connectors.model.FieldInfo)

Example 4 with FieldInfo

use of com.netflix.metacat.common.server.connectors.model.FieldInfo in project metacat by Netflix.

the class HiveConnectorInfoConverter method fromPartitionInfo.

/**
 * Converts from PartitionDto to the connector partition.
 *
 * @param partition Metacat partition Info
 * @return connector partition
 */
@Override
public Partition fromPartitionInfo(final TableInfo tableInfo, final PartitionInfo partition) {
    final QualifiedName name = partition.getName();
    final List<String> values = Lists.newArrayListWithCapacity(16);
    Map<String, String> metadata = partition.getMetadata();
    if (metadata == null) {
        metadata = new HashMap<>();
    // can't use Collections.emptyMap()
    // which is immutable and can't be
    // modifed by add parts in the embedded
    }
    final List<FieldInfo> fields = tableInfo.getFields();
    List<FieldSchema> fieldSchemas = Collections.emptyList();
    if (notNull(fields)) {
        fieldSchemas = fields.stream().filter(field -> !field.isPartitionKey()).map(this::metacatToHiveField).collect(Collectors.toList());
    }
    final StorageDescriptor sd = fromStorageInfo(partition.getSerde(), fieldSchemas);
    // using the table level serialization lib
    if (notNull(sd.getSerdeInfo()) && notNull(tableInfo.getSerde()) && Strings.isNullOrEmpty(sd.getSerdeInfo().getSerializationLib())) {
        sd.getSerdeInfo().setSerializationLib(tableInfo.getSerde().getSerializationLib());
    }
    final AuditInfo auditInfo = partition.getAudit();
    final int createTime = (notNull(auditInfo) && notNull(auditInfo.getCreatedDate())) ? dateToEpochSeconds(auditInfo.getCreatedDate()) : 0;
    final int lastAccessTime = (notNull(auditInfo) && notNull(auditInfo.getLastModifiedDate())) ? dateToEpochSeconds(auditInfo.getLastModifiedDate()) : 0;
    if (null == name) {
        return new Partition(values, "", "", createTime, lastAccessTime, sd, metadata);
    }
    if (notNull(name.getPartitionName())) {
        for (String partialPartName : SLASH_SPLITTER.split(partition.getName().getPartitionName())) {
            final List<String> nameValues = ImmutableList.copyOf(EQUAL_SPLITTER.split(partialPartName));
            Preconditions.checkState(nameValues.size() == 2, "Unrecognized partition name: " + partition.getName());
            values.add(nameValues.get(1));
        }
    }
    final String databaseName = notNull(name.getDatabaseName()) ? name.getDatabaseName() : "";
    final String tableName = notNull(name.getTableName()) ? name.getTableName() : "";
    return new Partition(values, databaseName, tableName, createTime, lastAccessTime, sd, metadata);
}
Also used : Date(java.util.Date) AuditInfo(com.netflix.metacat.common.server.connectors.model.AuditInfo) HashMap(java.util.HashMap) SerDeInfo(org.apache.hadoop.hive.metastore.api.SerDeInfo) StringUtils(org.apache.commons.lang3.StringUtils) Partition(org.apache.hadoop.hive.metastore.api.Partition) DirectSqlTable(com.netflix.metacat.connector.hive.sql.DirectSqlTable) Strings(com.google.common.base.Strings) DatabaseInfo(com.netflix.metacat.common.server.connectors.model.DatabaseInfo) FieldInfo(com.netflix.metacat.common.server.connectors.model.FieldInfo) Lists(com.google.common.collect.Lists) ImmutableList(com.google.common.collect.ImmutableList) ConnectorInfoConverter(com.netflix.metacat.common.server.connectors.ConnectorInfoConverter) PartitionInfo(com.netflix.metacat.common.server.connectors.model.PartitionInfo) Map(java.util.Map) IcebergTableWrapper(com.netflix.metacat.connector.hive.iceberg.IcebergTableWrapper) StorageInfo(com.netflix.metacat.common.server.connectors.model.StorageInfo) LinkedList(java.util.LinkedList) Splitter(com.google.common.base.Splitter) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) ViewInfo(com.netflix.metacat.common.server.connectors.model.ViewInfo) QualifiedName(com.netflix.metacat.common.QualifiedName) Instant(java.time.Instant) Collectors(java.util.stream.Collectors) Table(org.apache.hadoop.hive.metastore.api.Table) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) Slf4j(lombok.extern.slf4j.Slf4j) List(java.util.List) TableInfo(com.netflix.metacat.common.server.connectors.model.TableInfo) HiveTableUtil(com.netflix.metacat.connector.hive.util.HiveTableUtil) TableType(org.apache.hadoop.hive.metastore.TableType) Preconditions(com.google.common.base.Preconditions) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Database(org.apache.hadoop.hive.metastore.api.Database) Collections(java.util.Collections) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) Partition(org.apache.hadoop.hive.metastore.api.Partition) AuditInfo(com.netflix.metacat.common.server.connectors.model.AuditInfo) QualifiedName(com.netflix.metacat.common.QualifiedName) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) FieldInfo(com.netflix.metacat.common.server.connectors.model.FieldInfo)

Example 5 with FieldInfo

use of com.netflix.metacat.common.server.connectors.model.FieldInfo in project metacat by Netflix.

the class IcebergTableHandler method update.

/**
 * Updates the iceberg schema if the provided tableInfo has updated field comments.
 *
 * @param tableInfo table information
 * @return true if an update is done
 */
public boolean update(final TableInfo tableInfo) {
    boolean result = false;
    final List<FieldInfo> fields = tableInfo.getFields();
    if (fields != null && !fields.isEmpty() && // This parameter is only sent during data change and not during schema change.
    Strings.isNullOrEmpty(tableInfo.getMetadata().get(DirectSqlTable.PARAM_PREVIOUS_METADATA_LOCATION))) {
        final QualifiedName tableName = tableInfo.getName();
        final String tableMetadataLocation = HiveTableUtil.getIcebergTableMetadataLocation(tableInfo);
        if (Strings.isNullOrEmpty(tableMetadataLocation)) {
            final String message = String.format("No metadata location specified for table %s", tableName);
            log.error(message);
            throw new MetacatBadRequestException(message);
        }
        final IcebergMetastoreTables icebergMetastoreTables = new IcebergMetastoreTables(new IcebergTableOps(conf, tableMetadataLocation, connectorContext.getConfig(), icebergTableOpsProxy));
        final Table table = icebergMetastoreTables.loadTable(HiveTableUtil.qualifiedNameToTableIdentifier(tableName));
        final UpdateSchema updateSchema = table.updateSchema();
        final Schema schema = table.schema();
        for (FieldInfo field : fields) {
            final Types.NestedField iField = schema.findField(field.getName());
            if (iField != null && !Objects.equals(field.getComment(), iField.doc())) {
                updateSchema.updateColumnDoc(field.getName(), field.getComment());
                result = true;
            }
        }
        if (result) {
            updateSchema.commit();
            final String newTableMetadataLocation = icebergMetastoreTables.getTableOps().currentMetadataLocation();
            if (!tableMetadataLocation.equalsIgnoreCase(newTableMetadataLocation)) {
                tableInfo.getMetadata().put(DirectSqlTable.PARAM_PREVIOUS_METADATA_LOCATION, tableMetadataLocation);
                tableInfo.getMetadata().put(DirectSqlTable.PARAM_METADATA_LOCATION, newTableMetadataLocation);
            }
        }
    }
    return result;
}
Also used : Types(org.apache.iceberg.types.Types) DirectSqlTable(com.netflix.metacat.connector.hive.sql.DirectSqlTable) Table(org.apache.iceberg.Table) UpdateSchema(org.apache.iceberg.UpdateSchema) QualifiedName(com.netflix.metacat.common.QualifiedName) UpdateSchema(org.apache.iceberg.UpdateSchema) Schema(org.apache.iceberg.Schema) MetacatBadRequestException(com.netflix.metacat.common.exception.MetacatBadRequestException) FieldInfo(com.netflix.metacat.common.server.connectors.model.FieldInfo)

Aggregations

FieldInfo (com.netflix.metacat.common.server.connectors.model.FieldInfo)13 ImmutableList (com.google.common.collect.ImmutableList)9 QualifiedName (com.netflix.metacat.common.QualifiedName)5 StorageInfo (com.netflix.metacat.common.server.connectors.model.StorageInfo)5 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)5 StorageDescriptor (org.apache.hadoop.hive.metastore.api.StorageDescriptor)5 TableInfo (com.netflix.metacat.common.server.connectors.model.TableInfo)4 DirectSqlTable (com.netflix.metacat.connector.hive.sql.DirectSqlTable)4 SerDeInfo (org.apache.hadoop.hive.metastore.api.SerDeInfo)4 Lists (com.google.common.collect.Lists)3 AuditInfo (com.netflix.metacat.common.server.connectors.model.AuditInfo)3 ViewInfo (com.netflix.metacat.common.server.connectors.model.ViewInfo)3 List (java.util.List)3 Collectors (java.util.stream.Collectors)3 Slf4j (lombok.extern.slf4j.Slf4j)3 Table (org.apache.hadoop.hive.metastore.api.Table)3 StructField (org.apache.hadoop.hive.serde2.objectinspector.StructField)3 VisibleForTesting (com.google.common.annotations.VisibleForTesting)2 Preconditions (com.google.common.base.Preconditions)2 Splitter (com.google.common.base.Splitter)2