Examples with HadoopTables - org.apache.iceberg.hadoop.HadoopTables

Example 6 with HadoopTables

use of org.apache.iceberg.hadoop.HadoopTables in project hive by apache.

the class TestCatalogs method testCreateDropTableToLocation.

@Test
public void testCreateDropTableToLocation() throws IOException {
    Properties missingSchema = new Properties();
    missingSchema.put("location", temp.newFolder("hadoop_tables").toString());
    AssertHelpers.assertThrows("Should complain about table schema not set", NullPointerException.class, "schema not set", () -> Catalogs.createTable(conf, missingSchema));
    conf.set(InputFormatConfig.CATALOG, Catalogs.LOCATION);
    Properties missingLocation = new Properties();
    missingLocation.put(InputFormatConfig.TABLE_SCHEMA, SchemaParser.toJson(SCHEMA));
    AssertHelpers.assertThrows("Should complain about table location not set", NullPointerException.class, "location not set", () -> Catalogs.createTable(conf, missingLocation));
    Properties properties = new Properties();
    properties.put("location", temp.getRoot() + "/hadoop_tables");
    properties.put(InputFormatConfig.TABLE_SCHEMA, SchemaParser.toJson(SCHEMA));
    properties.put(InputFormatConfig.PARTITION_SPEC, PartitionSpecParser.toJson(SPEC));
    properties.put("dummy", "test");
    Catalogs.createTable(conf, properties);
    HadoopTables tables = new HadoopTables();
    Table table = tables.load(properties.getProperty("location"));
    Assert.assertEquals(properties.getProperty("location"), table.location());
    Assert.assertEquals(SchemaParser.toJson(SCHEMA), SchemaParser.toJson(table.schema()));
    Assert.assertEquals(PartitionSpecParser.toJson(SPEC), PartitionSpecParser.toJson(table.spec()));
    Assert.assertEquals(Collections.singletonMap("dummy", "test"), table.properties());
    AssertHelpers.assertThrows("Should complain about table location not set", NullPointerException.class, "location not set", () -> Catalogs.dropTable(conf, new Properties()));
    Properties dropProperties = new Properties();
    dropProperties.put("location", temp.getRoot() + "/hadoop_tables");
    Catalogs.dropTable(conf, dropProperties);
    AssertHelpers.assertThrows("Should complain about table not found", NoSuchTableException.class, "Table does not exist", () -> Catalogs.loadTable(conf, dropProperties));
}

Also used : Table(org.apache.iceberg.Table) HadoopTables(org.apache.iceberg.hadoop.HadoopTables) CatalogProperties(org.apache.iceberg.CatalogProperties) Properties(java.util.Properties) Test(org.junit.Test)

Example 7 with HadoopTables

use of org.apache.iceberg.hadoop.HadoopTables in project hive by apache.

the class TestHiveIcebergSerDe method testInitialize.

@Test
public void testInitialize() throws IOException, SerDeException {
    File location = tmp.newFolder();
    Assert.assertTrue(location.delete());
    Configuration conf = new Configuration();
    Properties properties = new Properties();
    properties.setProperty("location", location.toString());
    properties.setProperty(InputFormatConfig.CATALOG_NAME, Catalogs.ICEBERG_HADOOP_TABLE_NAME);
    HadoopTables tables = new HadoopTables(conf);
    tables.create(schema, location.toString());
    HiveIcebergSerDe serDe = new HiveIcebergSerDe();
    serDe.initialize(conf, properties, new Properties());
    Assert.assertEquals(IcebergObjectInspector.create(schema), serDe.getObjectInspector());
}

Also used : Configuration(org.apache.hadoop.conf.Configuration) HadoopTables(org.apache.iceberg.hadoop.HadoopTables) Properties(java.util.Properties) File(java.io.File) Test(org.junit.Test)

Example 8 with HadoopTables

use of org.apache.iceberg.hadoop.HadoopTables in project drill by apache.

the class IcebergQueriesTest method setUpBeforeClass.

@BeforeClass
public static void setUpBeforeClass() throws Exception {
    startCluster(ClusterFixture.builder(dirTestWatcher));
    StoragePluginRegistry pluginRegistry = cluster.drillbit().getContext().getStorage();
    FileSystemConfig pluginConfig = (FileSystemConfig) pluginRegistry.getPlugin(DFS_PLUGIN_NAME).getConfig();
    Map<String, FormatPluginConfig> formats = new HashMap<>(pluginConfig.getFormats());
    formats.put("iceberg", IcebergFormatPluginConfig.builder().build());
    FileSystemConfig newPluginConfig = new FileSystemConfig(pluginConfig.getConnection(), pluginConfig.getConfig(), pluginConfig.getWorkspaces(), formats, PlainCredentialsProvider.EMPTY_CREDENTIALS_PROVIDER);
    newPluginConfig.setEnabled(pluginConfig.isEnabled());
    pluginRegistry.put(DFS_PLUGIN_NAME, newPluginConfig);
    // defining another plugin with iceberg format to ensure that DRILL-8049 is fixed
    FileSystemConfig anotherFileSystemConfig = pluginConfig.copyWithFormats(formats);
    pluginRegistry.put("dfs2", anotherFileSystemConfig);
    Configuration config = new Configuration();
    config.set(FileSystem.FS_DEFAULT_NAME_KEY, FileSystem.DEFAULT_FS);
    HadoopTables tables = new HadoopTables(config);
    Schema structSchema = new Schema(Types.NestedField.optional(13, "struct_int_field", Types.IntegerType.get()), Types.NestedField.optional(14, "struct_string_field", Types.StringType.get()));
    Types.ListType repeatedStructType = Types.ListType.ofOptional(16, Types.StructType.of(Types.NestedField.optional(17, "struct_int_field", Types.IntegerType.get()), Types.NestedField.optional(18, "struct_string_field", Types.StringType.get())));
    Schema schema = new Schema(Types.NestedField.optional(1, "int_field", Types.IntegerType.get()), Types.NestedField.optional(2, "long_field", Types.LongType.get()), Types.NestedField.optional(3, "float_field", Types.FloatType.get()), Types.NestedField.optional(4, "double_field", Types.DoubleType.get()), Types.NestedField.optional(5, "string_field", Types.StringType.get()), Types.NestedField.optional(6, "boolean_field", Types.BooleanType.get()), Types.NestedField.optional(26, "time_field", Types.TimeType.get()), Types.NestedField.optional(27, "timestamp_field", Types.TimestampType.withoutZone()), Types.NestedField.optional(28, "date_field", Types.DateType.get()), Types.NestedField.optional(29, "decimal_field", Types.DecimalType.of(4, 2)), Types.NestedField.optional(30, "uuid_field", Types.UUIDType.get()), Types.NestedField.optional(31, "fixed_field", Types.FixedType.ofLength(10)), Types.NestedField.optional(32, "binary_field", Types.BinaryType.get()), Types.NestedField.optional(7, "list_field", Types.ListType.ofOptional(10, Types.StringType.get())), Types.NestedField.optional(8, "map_field", Types.MapType.ofOptional(11, 12, Types.StringType.get(), Types.FloatType.get())), Types.NestedField.required(9, "struct_field", structSchema.asStruct()), Types.NestedField.required(15, "repeated_struct_field", repeatedStructType), Types.NestedField.required(19, "repeated_list_field", Types.ListType.ofOptional(20, Types.ListType.ofOptional(21, Types.StringType.get()))), Types.NestedField.optional(22, "repeated_map_field", Types.ListType.ofOptional(23, Types.MapType.ofOptional(24, 25, Types.StringType.get(), Types.FloatType.get()))));
    List<String> listValue = Arrays.asList("a", "b", "c");
    Map<String, Float> mapValue = new HashMap<>();
    mapValue.put("a", 0.1F);
    mapValue.put("b", 0.2F);
    Map<String, Float> secondMapValue = new HashMap<>();
    secondMapValue.put("true", 1F);
    secondMapValue.put("false", 0F);
    Record structValue = GenericRecord.create(structSchema);
    structValue.setField("struct_int_field", 123);
    structValue.setField("struct_string_field", "abc");
    Record secondStructValue = GenericRecord.create(structSchema);
    secondStructValue.setField("struct_int_field", 321);
    secondStructValue.setField("struct_string_field", "def");
    Record record = GenericRecord.create(schema);
    record.setField("int_field", 1);
    record.setField("long_field", 100L);
    record.setField("float_field", 0.5F);
    record.setField("double_field", 1.5D);
    record.setField("string_field", "abc");
    record.setField("boolean_field", true);
    record.setField("time_field", LocalTime.of(2, 42, 42));
    record.setField("timestamp_field", LocalDateTime.of(1994, 4, 18, 11, 0, 0));
    record.setField("date_field", LocalDate.of(1994, 4, 18));
    record.setField("decimal_field", new BigDecimal("12.34"));
    record.setField("uuid_field", new byte[16]);
    record.setField("fixed_field", new byte[10]);
    record.setField("binary_field", ByteBuffer.wrap("hello".getBytes(StandardCharsets.UTF_8)));
    record.setField("list_field", listValue);
    record.setField("map_field", mapValue);
    record.setField("struct_field", structValue);
    record.setField("repeated_struct_field", Arrays.asList(structValue, structValue));
    record.setField("repeated_list_field", Arrays.asList(listValue, listValue));
    record.setField("repeated_map_field", Arrays.asList(mapValue, mapValue));
    Record nullsRecord = GenericRecord.create(schema);
    nullsRecord.setField("int_field", null);
    nullsRecord.setField("long_field", null);
    nullsRecord.setField("float_field", null);
    nullsRecord.setField("double_field", null);
    nullsRecord.setField("string_field", null);
    nullsRecord.setField("boolean_field", null);
    nullsRecord.setField("time_field", null);
    nullsRecord.setField("timestamp_field", null);
    nullsRecord.setField("date_field", null);
    nullsRecord.setField("decimal_field", null);
    nullsRecord.setField("uuid_field", null);
    nullsRecord.setField("fixed_field", null);
    nullsRecord.setField("binary_field", null);
    nullsRecord.setField("list_field", null);
    nullsRecord.setField("map_field", null);
    nullsRecord.setField("struct_field", GenericRecord.create(structSchema));
    nullsRecord.setField("repeated_struct_field", Collections.emptyList());
    nullsRecord.setField("repeated_list_field", Collections.emptyList());
    nullsRecord.setField("repeated_map_field", Collections.emptyList());
    Record secondRecord = GenericRecord.create(schema);
    secondRecord.setField("int_field", 988);
    secondRecord.setField("long_field", 543L);
    secondRecord.setField("float_field", Float.NaN);
    secondRecord.setField("double_field", Double.MAX_VALUE);
    secondRecord.setField("string_field", "def");
    secondRecord.setField("boolean_field", false);
    secondRecord.setField("time_field", LocalTime.of(3, 41, 53));
    secondRecord.setField("timestamp_field", LocalDateTime.of(1995, 9, 10, 9, 0, 0));
    secondRecord.setField("date_field", LocalDate.of(1995, 9, 10));
    secondRecord.setField("decimal_field", new BigDecimal("99.99"));
    secondRecord.setField("uuid_field", new byte[16]);
    secondRecord.setField("fixed_field", new byte[10]);
    secondRecord.setField("binary_field", ByteBuffer.wrap("world".getBytes(StandardCharsets.UTF_8)));
    secondRecord.setField("list_field", Arrays.asList("y", "n"));
    secondRecord.setField("map_field", secondMapValue);
    secondRecord.setField("struct_field", secondStructValue);
    secondRecord.setField("repeated_struct_field", Arrays.asList(structValue, secondStructValue));
    secondRecord.setField("repeated_list_field", Arrays.asList(listValue, Arrays.asList("y", "n")));
    secondRecord.setField("repeated_map_field", Arrays.asList(mapValue, secondMapValue));
    String location = Paths.get(dirTestWatcher.getDfsTestTmpDir().toURI().getPath(), "testAllTypes").toUri().getPath();
    table = tables.create(schema, location);
    writeParquetAndCommitDataFile(table, "allTypes", Arrays.asList(record, nullsRecord));
    writeParquetAndCommitDataFile(table, "allTypes_1", Collections.singleton(secondRecord));
    String avroLocation = Paths.get(dirTestWatcher.getDfsTestTmpDir().toURI().getPath(), "testAllTypesAvro").toUri().getPath();
    writeAndCommitDataFile(tables.create(structSchema, avroLocation), "allTypes", FileFormat.AVRO, Arrays.asList(structValue, GenericRecord.create(structSchema), secondStructValue));
    String orcLocation = Paths.get(dirTestWatcher.getDfsTestTmpDir().toURI().getPath(), "testAllTypesOrc").toUri().getPath();
    writeAndCommitDataFile(tables.create(structSchema, orcLocation), "allTypes", FileFormat.ORC, Arrays.asList(structValue, GenericRecord.create(structSchema), secondStructValue));
    String emptyTableLocation = Paths.get(dirTestWatcher.getDfsTestTmpDir().toURI().getPath(), "testAllTypesEmpty").toUri().getPath();
    tables.create(structSchema, emptyTableLocation);
}

Also used : StoragePluginRegistry(org.apache.drill.exec.store.StoragePluginRegistry) Types(org.apache.iceberg.types.Types) Configuration(org.apache.hadoop.conf.Configuration) HashMap(java.util.HashMap) JsonStringHashMap(org.apache.drill.exec.util.JsonStringHashMap) Schema(org.apache.iceberg.Schema) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) BigDecimal(java.math.BigDecimal) FormatPluginConfig(org.apache.drill.common.logical.FormatPluginConfig) IcebergFormatPluginConfig(org.apache.drill.exec.store.iceberg.format.IcebergFormatPluginConfig) HadoopTables(org.apache.iceberg.hadoop.HadoopTables) FileSystemConfig(org.apache.drill.exec.store.dfs.FileSystemConfig) GenericRecord(org.apache.iceberg.data.GenericRecord) Record(org.apache.iceberg.data.Record) BeforeClass(org.junit.BeforeClass)

Example 9 with HadoopTables

use of org.apache.iceberg.hadoop.HadoopTables in project drill by apache.

the class IcebergGroupScan method initTableScan.

public static TableScan initTableScan(IcebergFormatPlugin formatPlugin, String path, LogicalExpression condition) {
    TableScan tableScan = new HadoopTables(formatPlugin.getFsConf()).load(path).newScan();
    Map<String, String> properties = formatPlugin.getConfig().getProperties();
    if (properties != null) {
        for (Map.Entry<String, String> entry : properties.entrySet()) {
            tableScan = tableScan.option(entry.getKey(), entry.getValue());
        }
    }
    if (condition != null) {
        Expression expression = condition.accept(DrillExprToIcebergTranslator.INSTANCE, null);
        tableScan = tableScan.filter(expression);
    }
    Snapshot snapshot = formatPlugin.getConfig().getSnapshot();
    if (snapshot != null) {
        tableScan = snapshot.apply(tableScan);
    }
    Boolean caseSensitive = formatPlugin.getConfig().getCaseSensitive();
    if (caseSensitive != null) {
        tableScan = tableScan.caseSensitive(caseSensitive);
    }
    Boolean includeColumnStats = formatPlugin.getConfig().getIncludeColumnStats();
    if (includeColumnStats != null && includeColumnStats) {
        tableScan = tableScan.includeColumnStats();
    }
    Boolean ignoreResiduals = formatPlugin.getConfig().getIgnoreResiduals();
    if (ignoreResiduals != null && ignoreResiduals) {
        tableScan = tableScan.ignoreResiduals();
    }
    return tableScan;
}

Also used : TableScan(org.apache.iceberg.TableScan) Snapshot(org.apache.drill.exec.store.iceberg.snapshot.Snapshot) Expression(org.apache.iceberg.expressions.Expression) LogicalExpression(org.apache.drill.common.expression.LogicalExpression) HadoopTables(org.apache.iceberg.hadoop.HadoopTables) Map(java.util.Map)

Aggregations

HadoopTables (org.apache.iceberg.hadoop.HadoopTables)9 Configuration (org.apache.hadoop.conf.Configuration)3 Catalog (org.apache.iceberg.catalog.Catalog)3 Test (org.junit.Test)3 File (java.io.File)2 Properties (java.util.Properties)2 Schema (org.apache.iceberg.Schema)2 Table (org.apache.iceberg.Table)2 BigDecimal (java.math.BigDecimal)1 HashMap (java.util.HashMap)1 Map (java.util.Map)1 LogicalExpression (org.apache.drill.common.expression.LogicalExpression)1 FormatPluginConfig (org.apache.drill.common.logical.FormatPluginConfig)1 StoragePluginRegistry (org.apache.drill.exec.store.StoragePluginRegistry)1 FileSystemConfig (org.apache.drill.exec.store.dfs.FileSystemConfig)1 IcebergFormatPluginConfig (org.apache.drill.exec.store.iceberg.format.IcebergFormatPluginConfig)1 Snapshot (org.apache.drill.exec.store.iceberg.snapshot.Snapshot)1 JsonStringHashMap (org.apache.drill.exec.util.JsonStringHashMap)1 CatalogProperties (org.apache.iceberg.CatalogProperties)1 DataFile (org.apache.iceberg.DataFile)1