use of org.apache.iceberg.hadoop.HadoopTables in project hive by apache.
the class TestCatalogs method testCreateDropTableToLocation.
@Test
public void testCreateDropTableToLocation() throws IOException {
Properties missingSchema = new Properties();
missingSchema.put("location", temp.newFolder("hadoop_tables").toString());
AssertHelpers.assertThrows("Should complain about table schema not set", NullPointerException.class, "schema not set", () -> Catalogs.createTable(conf, missingSchema));
conf.set(InputFormatConfig.CATALOG, Catalogs.LOCATION);
Properties missingLocation = new Properties();
missingLocation.put(InputFormatConfig.TABLE_SCHEMA, SchemaParser.toJson(SCHEMA));
AssertHelpers.assertThrows("Should complain about table location not set", NullPointerException.class, "location not set", () -> Catalogs.createTable(conf, missingLocation));
Properties properties = new Properties();
properties.put("location", temp.getRoot() + "/hadoop_tables");
properties.put(InputFormatConfig.TABLE_SCHEMA, SchemaParser.toJson(SCHEMA));
properties.put(InputFormatConfig.PARTITION_SPEC, PartitionSpecParser.toJson(SPEC));
properties.put("dummy", "test");
Catalogs.createTable(conf, properties);
HadoopTables tables = new HadoopTables();
Table table = tables.load(properties.getProperty("location"));
Assert.assertEquals(properties.getProperty("location"), table.location());
Assert.assertEquals(SchemaParser.toJson(SCHEMA), SchemaParser.toJson(table.schema()));
Assert.assertEquals(PartitionSpecParser.toJson(SPEC), PartitionSpecParser.toJson(table.spec()));
Assert.assertEquals(Collections.singletonMap("dummy", "test"), table.properties());
AssertHelpers.assertThrows("Should complain about table location not set", NullPointerException.class, "location not set", () -> Catalogs.dropTable(conf, new Properties()));
Properties dropProperties = new Properties();
dropProperties.put("location", temp.getRoot() + "/hadoop_tables");
Catalogs.dropTable(conf, dropProperties);
AssertHelpers.assertThrows("Should complain about table not found", NoSuchTableException.class, "Table does not exist", () -> Catalogs.loadTable(conf, dropProperties));
}
use of org.apache.iceberg.hadoop.HadoopTables in project hive by apache.
the class TestHiveIcebergSerDe method testInitialize.
@Test
public void testInitialize() throws IOException, SerDeException {
File location = tmp.newFolder();
Assert.assertTrue(location.delete());
Configuration conf = new Configuration();
Properties properties = new Properties();
properties.setProperty("location", location.toString());
properties.setProperty(InputFormatConfig.CATALOG_NAME, Catalogs.ICEBERG_HADOOP_TABLE_NAME);
HadoopTables tables = new HadoopTables(conf);
tables.create(schema, location.toString());
HiveIcebergSerDe serDe = new HiveIcebergSerDe();
serDe.initialize(conf, properties, new Properties());
Assert.assertEquals(IcebergObjectInspector.create(schema), serDe.getObjectInspector());
}
use of org.apache.iceberg.hadoop.HadoopTables in project drill by apache.
the class IcebergQueriesTest method setUpBeforeClass.
@BeforeClass
public static void setUpBeforeClass() throws Exception {
startCluster(ClusterFixture.builder(dirTestWatcher));
StoragePluginRegistry pluginRegistry = cluster.drillbit().getContext().getStorage();
FileSystemConfig pluginConfig = (FileSystemConfig) pluginRegistry.getPlugin(DFS_PLUGIN_NAME).getConfig();
Map<String, FormatPluginConfig> formats = new HashMap<>(pluginConfig.getFormats());
formats.put("iceberg", IcebergFormatPluginConfig.builder().build());
FileSystemConfig newPluginConfig = new FileSystemConfig(pluginConfig.getConnection(), pluginConfig.getConfig(), pluginConfig.getWorkspaces(), formats, PlainCredentialsProvider.EMPTY_CREDENTIALS_PROVIDER);
newPluginConfig.setEnabled(pluginConfig.isEnabled());
pluginRegistry.put(DFS_PLUGIN_NAME, newPluginConfig);
// defining another plugin with iceberg format to ensure that DRILL-8049 is fixed
FileSystemConfig anotherFileSystemConfig = pluginConfig.copyWithFormats(formats);
pluginRegistry.put("dfs2", anotherFileSystemConfig);
Configuration config = new Configuration();
config.set(FileSystem.FS_DEFAULT_NAME_KEY, FileSystem.DEFAULT_FS);
HadoopTables tables = new HadoopTables(config);
Schema structSchema = new Schema(Types.NestedField.optional(13, "struct_int_field", Types.IntegerType.get()), Types.NestedField.optional(14, "struct_string_field", Types.StringType.get()));
Types.ListType repeatedStructType = Types.ListType.ofOptional(16, Types.StructType.of(Types.NestedField.optional(17, "struct_int_field", Types.IntegerType.get()), Types.NestedField.optional(18, "struct_string_field", Types.StringType.get())));
Schema schema = new Schema(Types.NestedField.optional(1, "int_field", Types.IntegerType.get()), Types.NestedField.optional(2, "long_field", Types.LongType.get()), Types.NestedField.optional(3, "float_field", Types.FloatType.get()), Types.NestedField.optional(4, "double_field", Types.DoubleType.get()), Types.NestedField.optional(5, "string_field", Types.StringType.get()), Types.NestedField.optional(6, "boolean_field", Types.BooleanType.get()), Types.NestedField.optional(26, "time_field", Types.TimeType.get()), Types.NestedField.optional(27, "timestamp_field", Types.TimestampType.withoutZone()), Types.NestedField.optional(28, "date_field", Types.DateType.get()), Types.NestedField.optional(29, "decimal_field", Types.DecimalType.of(4, 2)), Types.NestedField.optional(30, "uuid_field", Types.UUIDType.get()), Types.NestedField.optional(31, "fixed_field", Types.FixedType.ofLength(10)), Types.NestedField.optional(32, "binary_field", Types.BinaryType.get()), Types.NestedField.optional(7, "list_field", Types.ListType.ofOptional(10, Types.StringType.get())), Types.NestedField.optional(8, "map_field", Types.MapType.ofOptional(11, 12, Types.StringType.get(), Types.FloatType.get())), Types.NestedField.required(9, "struct_field", structSchema.asStruct()), Types.NestedField.required(15, "repeated_struct_field", repeatedStructType), Types.NestedField.required(19, "repeated_list_field", Types.ListType.ofOptional(20, Types.ListType.ofOptional(21, Types.StringType.get()))), Types.NestedField.optional(22, "repeated_map_field", Types.ListType.ofOptional(23, Types.MapType.ofOptional(24, 25, Types.StringType.get(), Types.FloatType.get()))));
List<String> listValue = Arrays.asList("a", "b", "c");
Map<String, Float> mapValue = new HashMap<>();
mapValue.put("a", 0.1F);
mapValue.put("b", 0.2F);
Map<String, Float> secondMapValue = new HashMap<>();
secondMapValue.put("true", 1F);
secondMapValue.put("false", 0F);
Record structValue = GenericRecord.create(structSchema);
structValue.setField("struct_int_field", 123);
structValue.setField("struct_string_field", "abc");
Record secondStructValue = GenericRecord.create(structSchema);
secondStructValue.setField("struct_int_field", 321);
secondStructValue.setField("struct_string_field", "def");
Record record = GenericRecord.create(schema);
record.setField("int_field", 1);
record.setField("long_field", 100L);
record.setField("float_field", 0.5F);
record.setField("double_field", 1.5D);
record.setField("string_field", "abc");
record.setField("boolean_field", true);
record.setField("time_field", LocalTime.of(2, 42, 42));
record.setField("timestamp_field", LocalDateTime.of(1994, 4, 18, 11, 0, 0));
record.setField("date_field", LocalDate.of(1994, 4, 18));
record.setField("decimal_field", new BigDecimal("12.34"));
record.setField("uuid_field", new byte[16]);
record.setField("fixed_field", new byte[10]);
record.setField("binary_field", ByteBuffer.wrap("hello".getBytes(StandardCharsets.UTF_8)));
record.setField("list_field", listValue);
record.setField("map_field", mapValue);
record.setField("struct_field", structValue);
record.setField("repeated_struct_field", Arrays.asList(structValue, structValue));
record.setField("repeated_list_field", Arrays.asList(listValue, listValue));
record.setField("repeated_map_field", Arrays.asList(mapValue, mapValue));
Record nullsRecord = GenericRecord.create(schema);
nullsRecord.setField("int_field", null);
nullsRecord.setField("long_field", null);
nullsRecord.setField("float_field", null);
nullsRecord.setField("double_field", null);
nullsRecord.setField("string_field", null);
nullsRecord.setField("boolean_field", null);
nullsRecord.setField("time_field", null);
nullsRecord.setField("timestamp_field", null);
nullsRecord.setField("date_field", null);
nullsRecord.setField("decimal_field", null);
nullsRecord.setField("uuid_field", null);
nullsRecord.setField("fixed_field", null);
nullsRecord.setField("binary_field", null);
nullsRecord.setField("list_field", null);
nullsRecord.setField("map_field", null);
nullsRecord.setField("struct_field", GenericRecord.create(structSchema));
nullsRecord.setField("repeated_struct_field", Collections.emptyList());
nullsRecord.setField("repeated_list_field", Collections.emptyList());
nullsRecord.setField("repeated_map_field", Collections.emptyList());
Record secondRecord = GenericRecord.create(schema);
secondRecord.setField("int_field", 988);
secondRecord.setField("long_field", 543L);
secondRecord.setField("float_field", Float.NaN);
secondRecord.setField("double_field", Double.MAX_VALUE);
secondRecord.setField("string_field", "def");
secondRecord.setField("boolean_field", false);
secondRecord.setField("time_field", LocalTime.of(3, 41, 53));
secondRecord.setField("timestamp_field", LocalDateTime.of(1995, 9, 10, 9, 0, 0));
secondRecord.setField("date_field", LocalDate.of(1995, 9, 10));
secondRecord.setField("decimal_field", new BigDecimal("99.99"));
secondRecord.setField("uuid_field", new byte[16]);
secondRecord.setField("fixed_field", new byte[10]);
secondRecord.setField("binary_field", ByteBuffer.wrap("world".getBytes(StandardCharsets.UTF_8)));
secondRecord.setField("list_field", Arrays.asList("y", "n"));
secondRecord.setField("map_field", secondMapValue);
secondRecord.setField("struct_field", secondStructValue);
secondRecord.setField("repeated_struct_field", Arrays.asList(structValue, secondStructValue));
secondRecord.setField("repeated_list_field", Arrays.asList(listValue, Arrays.asList("y", "n")));
secondRecord.setField("repeated_map_field", Arrays.asList(mapValue, secondMapValue));
String location = Paths.get(dirTestWatcher.getDfsTestTmpDir().toURI().getPath(), "testAllTypes").toUri().getPath();
table = tables.create(schema, location);
writeParquetAndCommitDataFile(table, "allTypes", Arrays.asList(record, nullsRecord));
writeParquetAndCommitDataFile(table, "allTypes_1", Collections.singleton(secondRecord));
String avroLocation = Paths.get(dirTestWatcher.getDfsTestTmpDir().toURI().getPath(), "testAllTypesAvro").toUri().getPath();
writeAndCommitDataFile(tables.create(structSchema, avroLocation), "allTypes", FileFormat.AVRO, Arrays.asList(structValue, GenericRecord.create(structSchema), secondStructValue));
String orcLocation = Paths.get(dirTestWatcher.getDfsTestTmpDir().toURI().getPath(), "testAllTypesOrc").toUri().getPath();
writeAndCommitDataFile(tables.create(structSchema, orcLocation), "allTypes", FileFormat.ORC, Arrays.asList(structValue, GenericRecord.create(structSchema), secondStructValue));
String emptyTableLocation = Paths.get(dirTestWatcher.getDfsTestTmpDir().toURI().getPath(), "testAllTypesEmpty").toUri().getPath();
tables.create(structSchema, emptyTableLocation);
}
use of org.apache.iceberg.hadoop.HadoopTables in project drill by apache.
the class IcebergGroupScan method initTableScan.
public static TableScan initTableScan(IcebergFormatPlugin formatPlugin, String path, LogicalExpression condition) {
TableScan tableScan = new HadoopTables(formatPlugin.getFsConf()).load(path).newScan();
Map<String, String> properties = formatPlugin.getConfig().getProperties();
if (properties != null) {
for (Map.Entry<String, String> entry : properties.entrySet()) {
tableScan = tableScan.option(entry.getKey(), entry.getValue());
}
}
if (condition != null) {
Expression expression = condition.accept(DrillExprToIcebergTranslator.INSTANCE, null);
tableScan = tableScan.filter(expression);
}
Snapshot snapshot = formatPlugin.getConfig().getSnapshot();
if (snapshot != null) {
tableScan = snapshot.apply(tableScan);
}
Boolean caseSensitive = formatPlugin.getConfig().getCaseSensitive();
if (caseSensitive != null) {
tableScan = tableScan.caseSensitive(caseSensitive);
}
Boolean includeColumnStats = formatPlugin.getConfig().getIncludeColumnStats();
if (includeColumnStats != null && includeColumnStats) {
tableScan = tableScan.includeColumnStats();
}
Boolean ignoreResiduals = formatPlugin.getConfig().getIgnoreResiduals();
if (ignoreResiduals != null && ignoreResiduals) {
tableScan = tableScan.ignoreResiduals();
}
return tableScan;
}
Aggregations