use of org.apache.iceberg.data.Record in project hive by apache.
the class TestHiveIcebergPartitions method testBucketTransform.
@Test
public void testBucketTransform() throws IOException {
Schema schema = new Schema(optional(1, "id", Types.LongType.get()), optional(2, "part_field", Types.StringType.get()));
PartitionSpec spec = PartitionSpec.builderFor(schema).bucket("part_field", 2).build();
List<Record> records = TestHelper.RecordsBuilder.newInstance(schema).add(1L, "Part1").add(2L, "Part2").add(3L, "Art3").build();
Table table = testTables.createTable(shell, "part_test", schema, spec, fileFormat, records);
HiveIcebergTestUtils.validateData(table, records, 0);
HiveIcebergTestUtils.validateDataWithSQL(shell, "part_test", records, "id");
}
use of org.apache.iceberg.data.Record in project hive by apache.
the class TestHiveIcebergSelects method testMultiColumnPruning.
/**
* Column pruning could become problematic when a single Map Task contains multiple TableScan operators where
* different columns are pruned. This only occurs on MR, as Tez initializes a single Map task for every TableScan
* operator.
*/
@Test
public void testMultiColumnPruning() throws IOException {
shell.setHiveSessionValue("hive.cbo.enable", true);
Schema schema1 = new Schema(optional(1, "fk", Types.StringType.get()));
List<Record> records1 = TestHelper.RecordsBuilder.newInstance(schema1).add("fk1").build();
testTables.createTable(shell, "table1", schema1, fileFormat, records1);
Schema schema2 = new Schema(optional(1, "fk", Types.StringType.get()), optional(2, "val", Types.StringType.get()));
List<Record> records2 = TestHelper.RecordsBuilder.newInstance(schema2).add("fk1", "val").build();
testTables.createTable(shell, "table2", schema2, fileFormat, records2);
// MR is needed for the reproduction
shell.setHiveSessionValue("hive.execution.engine", "mr");
String query = "SELECT t2.val FROM table1 t1 JOIN table2 t2 ON t1.fk = t2.fk";
List<Object[]> result = shell.executeStatement(query);
Assert.assertEquals(1, result.size());
Assert.assertArrayEquals(new Object[] { "val" }, result.get(0));
}
use of org.apache.iceberg.data.Record in project hive by apache.
the class TestHiveIcebergSelects method testSelectDistinctFromTable.
@Test
public void testSelectDistinctFromTable() throws IOException {
for (int i = 0; i < SUPPORTED_TYPES.size(); i++) {
Type type = SUPPORTED_TYPES.get(i);
if ((type == Types.TimestampType.withZone() || type == Types.TimeType.get()) && isVectorized && fileFormat == FileFormat.ORC) {
// ORC/TIMESTAMP_INSTANT and time are not supported vectorized types for Hive
continue;
}
// TODO: remove this filter when issue #1881 is resolved
if (type == Types.UUIDType.get() && fileFormat == FileFormat.PARQUET) {
continue;
}
String tableName = type.typeId().toString().toLowerCase() + "_table_" + i;
String columnName = type.typeId().toString().toLowerCase() + "_column";
Schema schema = new Schema(required(1, columnName, type));
List<Record> records = TestHelper.generateRandomRecords(schema, 4, 0L);
int size = records.stream().map(r -> r.getField(columnName)).collect(Collectors.toSet()).size();
testTables.createTable(shell, tableName, schema, fileFormat, records);
List<Object[]> queryResult = shell.executeStatement("select count(distinct(" + columnName + ")) from default." + tableName);
int distinctIds = ((Long) queryResult.get(0)[0]).intValue();
Assert.assertEquals(tableName, size, distinctIds);
}
}
use of org.apache.iceberg.data.Record in project hive by apache.
the class TestHiveIcebergSelects method testJoinTablesSupportedTypes.
@Test
public void testJoinTablesSupportedTypes() throws IOException {
for (int i = 0; i < SUPPORTED_TYPES.size(); i++) {
Type type = SUPPORTED_TYPES.get(i);
if ((type == Types.TimestampType.withZone() || type == Types.TimeType.get()) && isVectorized && fileFormat == FileFormat.ORC) {
// ORC/TIMESTAMP_INSTANT and time are not supported vectorized types for Hive
continue;
}
// TODO: remove this filter when issue #1881 is resolved
if (type == Types.UUIDType.get() && fileFormat == FileFormat.PARQUET) {
continue;
}
String tableName = type.typeId().toString().toLowerCase() + "_table_" + i;
String columnName = type.typeId().toString().toLowerCase() + "_column";
Schema schema = new Schema(required(1, columnName, type));
List<Record> records = TestHelper.generateRandomRecords(schema, 1, 0L);
testTables.createTable(shell, tableName, schema, fileFormat, records);
List<Object[]> queryResult = shell.executeStatement("select s." + columnName + ", h." + columnName + " from default." + tableName + " s join default." + tableName + " h on h." + columnName + "=s." + columnName);
Assert.assertEquals("Non matching record count for table " + tableName + " with type " + type, 1, queryResult.size());
}
}
use of org.apache.iceberg.data.Record in project hive by apache.
the class HiveIcebergTestUtils method validateDataWithSQL.
/**
* Validates whether the table contains the expected records. The records are retrieved by Hive query and compared as
* strings. The results should be sorted by a unique key so we do not end up with flaky tests.
* @param shell Shell to execute the query
* @param tableName The table to query
* @param expected The expected list of Records
* @param sortBy The column name by which we will sort
*/
public static void validateDataWithSQL(TestHiveShell shell, String tableName, List<Record> expected, String sortBy) {
List<Object[]> rows = shell.executeStatement("SELECT * FROM " + tableName + " ORDER BY " + sortBy);
Assert.assertEquals(expected.size(), rows.size());
for (int i = 0; i < expected.size(); ++i) {
Object[] row = rows.get(i);
Record record = expected.get(i);
Assert.assertEquals(record.size(), row.length);
for (int j = 0; j < record.size(); ++j) {
Object field = record.get(j);
if (field instanceof LocalDateTime) {
Assert.assertEquals(((LocalDateTime) field).toInstant(ZoneOffset.UTC).toEpochMilli(), TimestampUtils.stringToTimestamp((String) row[j]).toEpochMilli());
} else if (field instanceof OffsetDateTime) {
Assert.assertEquals(((OffsetDateTime) field).toInstant().toEpochMilli(), TimestampTZUtil.parse((String) row[j]).toEpochMilli());
} else {
Assert.assertEquals(field.toString(), row[j].toString());
}
}
}
}
Aggregations