Search in sources :

Example 1 with Function1

use of org.apache.hudi.common.util.Functions.Function1 in project hudi by apache.

the class UtilHelpers method createLatestSchemaProvider.

/**
 * Create latest schema provider for Target schema.
 *
 * @param structType spark data type of incoming batch.
 * @param jssc       instance of {@link JavaSparkContext}.
 * @param fs         instance of {@link FileSystem}.
 * @param basePath   base path of the table.
 * @return the schema provider where target schema refers to latest schema(either incoming schema or table schema).
 */
public static SchemaProvider createLatestSchemaProvider(StructType structType, JavaSparkContext jssc, FileSystem fs, String basePath) {
    SchemaProvider rowSchemaProvider = new RowBasedSchemaProvider(structType);
    Schema writeSchema = rowSchemaProvider.getTargetSchema();
    Schema latestTableSchema = writeSchema;
    try {
        if (FSUtils.isTableExists(basePath, fs)) {
            HoodieTableMetaClient tableMetaClient = HoodieTableMetaClient.builder().setConf(jssc.sc().hadoopConfiguration()).setBasePath(basePath).build();
            TableSchemaResolver tableSchemaResolver = new TableSchemaResolver(tableMetaClient);
            latestTableSchema = tableSchemaResolver.getLatestSchema(writeSchema, true, (Function1<Schema, Schema>) v1 -> AvroConversionUtils.convertStructTypeToAvroSchema(AvroConversionUtils.convertAvroSchemaToStructType(v1), RowBasedSchemaProvider.HOODIE_RECORD_STRUCT_NAME, RowBasedSchemaProvider.HOODIE_RECORD_NAMESPACE));
        }
    } catch (IOException e) {
        LOG.warn("Could not fetch table schema. Falling back to writer schema");
    }
    final Schema finalLatestTableSchema = latestTableSchema;
    return new SchemaProvider(new TypedProperties()) {

        @Override
        public Schema getSourceSchema() {
            return rowSchemaProvider.getSourceSchema();
        }

        @Override
        public Schema getTargetSchema() {
            return finalLatestTableSchema;
        }
    };
}
Also used : HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Schema(org.apache.avro.Schema) Function1(org.apache.hudi.common.util.Functions.Function1) SchemaProvider(org.apache.hudi.utilities.schema.SchemaProvider) RowBasedSchemaProvider(org.apache.hudi.utilities.schema.RowBasedSchemaProvider) DelegatingSchemaProvider(org.apache.hudi.utilities.schema.DelegatingSchemaProvider) RowBasedSchemaProvider(org.apache.hudi.utilities.schema.RowBasedSchemaProvider) TableSchemaResolver(org.apache.hudi.common.table.TableSchemaResolver) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) TypedProperties(org.apache.hudi.common.config.TypedProperties)

Aggregations

IOException (java.io.IOException)1 Schema (org.apache.avro.Schema)1 TypedProperties (org.apache.hudi.common.config.TypedProperties)1 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)1 TableSchemaResolver (org.apache.hudi.common.table.TableSchemaResolver)1 Function1 (org.apache.hudi.common.util.Functions.Function1)1 HoodieIOException (org.apache.hudi.exception.HoodieIOException)1 DelegatingSchemaProvider (org.apache.hudi.utilities.schema.DelegatingSchemaProvider)1 RowBasedSchemaProvider (org.apache.hudi.utilities.schema.RowBasedSchemaProvider)1 SchemaProvider (org.apache.hudi.utilities.schema.SchemaProvider)1