use of org.apache.iceberg.Table in project drill by apache.
the class IcebergMetastore method updateTableProperties.
/**
* Checks config table properties against current table properties.
* Adds properties that are absent, updates existing and removes absent.
* If properties are the same, does nothing.
*
* @param table Iceberg table instance
* @param tableProperties table properties from the config
*/
private void updateTableProperties(Table table, Map<String, String> tableProperties) {
Map<String, String> currentProperties = table.properties();
MapDifference<String, String> difference = Maps.difference(tableProperties, currentProperties);
if (difference.areEqual()) {
return;
}
UpdateProperties updateProperties = table.updateProperties();
// collect properties that are different
Map<String, String> propertiesToUpdate = difference.entriesDiffering().entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, entry -> entry.getValue().leftValue(), (o, n) -> n));
// add new properties
propertiesToUpdate.putAll(difference.entriesOnlyOnLeft());
logger.debug("Updating Iceberg table [{}] properties: {}", table.location(), updateProperties);
propertiesToUpdate.forEach(updateProperties::set);
logger.debug("Removing Iceberg table [{}] properties: {}", table.location(), difference.entriesOnlyOnRight());
difference.entriesOnlyOnRight().keySet().forEach(updateProperties::remove);
updateProperties.commit();
}
use of org.apache.iceberg.Table in project drill by apache.
the class TestParquetFileWriter method testTypeMismatch.
@Test
public void testTypeMismatch() throws Exception {
Schema schema = new Schema(Types.NestedField.optional(1, "int_field", Types.IntegerType.get()));
Record record = GenericRecord.create(schema);
record.setField("int_field", 1);
record.setField("int_field", "abc");
String location = defaultFolder.newFolder("testTypeMismatch").toURI().getPath();
Table table = tables.create(schema, location);
thrown.expect(IcebergMetastoreException.class);
new ParquetFileWriter(table).records(Collections.singletonList(record)).location(location).name("typeMismatch").write();
}
use of org.apache.iceberg.Table in project drill by apache.
the class TestParquetFileWriter method testAllTypes.
@Test
public void testAllTypes() throws Exception {
Schema schema = new Schema(Types.NestedField.optional(1, "int_field", Types.IntegerType.get()), Types.NestedField.optional(2, "long_field", Types.LongType.get()), Types.NestedField.optional(3, "float_field", Types.FloatType.get()), Types.NestedField.optional(4, "double_field", Types.DoubleType.get()), Types.NestedField.optional(5, "string_field", Types.StringType.get()), Types.NestedField.optional(6, "boolean_field", Types.BooleanType.get()), Types.NestedField.optional(7, "list_field", Types.ListType.ofOptional(9, Types.StringType.get())), Types.NestedField.optional(8, "map_field", Types.MapType.ofOptional(10, 11, Types.StringType.get(), Types.FloatType.get())));
List<String> listValue = Arrays.asList("a", "b", "c");
Map<String, Float> mapValue = new HashMap<>();
mapValue.put("a", 0.1F);
mapValue.put("b", 0.2F);
Record record = GenericRecord.create(schema);
record.setField("int_field", 1);
record.setField("long_field", 100L);
record.setField("float_field", 0.5F);
record.setField("double_field", 1.5D);
record.setField("string_field", "abc");
record.setField("boolean_field", true);
record.setField("list_field", listValue);
record.setField("map_field", mapValue);
String location = defaultFolder.newFolder("testAllTypes").toURI().getPath();
String fileName = "allTypes";
Table table = tables.create(schema, location);
org.apache.drill.metastore.iceberg.write.File result = new ParquetFileWriter(table).records(Collections.singletonList(record)).location(location).name(fileName).write();
String writePath = new Path(location, FileFormat.PARQUET.addExtension(fileName)).toUri().getPath();
assertEquals(new Path(FileFormat.PARQUET.addExtension(writePath)), new Path(result.location()));
assertEquals(Long.valueOf(1), result.metrics().recordCount());
List<Record> rows = readData(result.input(), schema);
assertEquals(1, rows.size());
Record row = rows.get(0);
assertEquals(1, row.getField("int_field"));
assertEquals(100L, row.getField("long_field"));
assertEquals(0.5F, row.getField("float_field"));
assertEquals(1.5D, row.getField("double_field"));
assertEquals("abc", row.getField("string_field"));
assertEquals(true, row.getField("boolean_field"));
assertEquals(listValue, row.getField("list_field"));
assertEquals(mapValue, row.getField("map_field"));
}
use of org.apache.iceberg.Table in project drill by apache.
the class TestParquetFileWriter method testSeveralRecords.
@Test
public void testSeveralRecords() throws Exception {
int fieldIndex = 1;
Schema schema = new Schema(Types.NestedField.optional(fieldIndex, "int_field", Types.IntegerType.get()));
List<Integer> values = Arrays.asList(1, 2, 3, 3, null, null, null);
List<Record> records = values.stream().map(value -> {
Record record = GenericRecord.create(schema);
record.setField("int_field", value);
return record;
}).collect(Collectors.toList());
String location = defaultFolder.newFolder("testSeveralRecords").toURI().getPath();
Table table = tables.create(schema, location);
org.apache.drill.metastore.iceberg.write.File result = new ParquetFileWriter(table).records(records).location(location).name("severalRecords").write();
assertEquals(Long.valueOf(7), result.metrics().recordCount());
assertEquals(Long.valueOf(7), result.metrics().valueCounts().get(fieldIndex));
assertEquals(Long.valueOf(3), result.metrics().nullValueCounts().get(fieldIndex));
List<Record> rows = readData(result.input(), schema);
assertEquals(7, rows.size());
List<Integer> actual = rows.stream().map(row -> (Integer) row.getField("int_field")).collect(Collectors.toList());
assertEquals(values, actual);
}
use of org.apache.iceberg.Table in project presto by prestodb.
the class IcebergHadoopMetadata method getColumnHandles.
@Override
public Map<String, ColumnHandle> getColumnHandles(ConnectorSession session, ConnectorTableHandle tableHandle) {
IcebergTableHandle table = (IcebergTableHandle) tableHandle;
Table icebergTable = getHadoopIcebergTable(resourceFactory, session, table.getSchemaTableName());
return getColumns(icebergTable.schema(), typeManager).stream().collect(toImmutableMap(IcebergColumnHandle::getName, identity()));
}
Aggregations