use of org.apache.iceberg.spark.source.SparkTable in project iceberg by apache.
the class TestCreateActions method testSparkTableReservedProperties.
@Test
public void testSparkTableReservedProperties() throws Exception {
String destTableName = "iceberg_reserved_properties";
String source = sourceName("test_reserved_properties_table");
String dest = destName(destTableName);
createSourceTable(CREATE_PARQUET, source);
assertSnapshotFileCount(SparkActions.get().snapshotTable(source).as(dest), source, dest);
SparkTable table = loadTable(dest);
// set sort orders
table.table().replaceSortOrder().asc("id").desc("data").commit();
String[] keys = { "provider", "format", "current-snapshot-id", "location", "sort-order" };
for (String entry : keys) {
Assert.assertTrue("Created table missing reserved property " + entry, table.properties().containsKey(entry));
}
Assert.assertEquals("Unexpected provider", "iceberg", table.properties().get("provider"));
Assert.assertEquals("Unexpected format", "iceberg/parquet", table.properties().get("format"));
Assert.assertNotEquals("No current-snapshot-id found", "none", table.properties().get("current-snapshot-id"));
Assert.assertTrue("Location isn't correct", table.properties().get("location").endsWith(destTableName));
Assert.assertEquals("Sort-order isn't correct", "id ASC NULLS FIRST, data DESC NULLS LAST", table.properties().get("sort-order"));
}
use of org.apache.iceberg.spark.source.SparkTable in project iceberg by apache.
the class TestCreateActions method schemaEvolutionTestWithSparkSQL.
@Test
public void schemaEvolutionTestWithSparkSQL() throws Exception {
Assume.assumeTrue("Cannot migrate to a hadoop based catalog", !type.equals("hadoop"));
Assume.assumeTrue("Can only migrate from Spark Session Catalog", catalog.name().equals("spark_catalog"));
String tblName = sourceName("schema_evolution_test_sql");
// Data generation and partition addition
spark.range(0, 5).selectExpr("CAST(id as INT) col0", "CAST(id AS FLOAT) col1", "CAST(id AS STRING) col2").write().mode(SaveMode.Append).saveAsTable(tblName);
sql("ALTER TABLE %s ADD COLUMN col3 INT", tblName);
spark.range(6, 10).selectExpr("CAST(id AS INT) col0", "CAST(id AS FLOAT) col1", "CAST(id AS STRING) col2", "CAST(id AS INT) col3").registerTempTable("tempdata");
sql("INSERT INTO TABLE %s SELECT * FROM tempdata", tblName);
List<Object[]> expectedBeforeAddColumn = sql("SELECT * FROM %s ORDER BY col0", tblName);
List<Object[]> expectedAfterAddColumn = sql("SELECT col0, null, col1, col2, col3 FROM %s ORDER BY col0", tblName);
// Migrate table
SparkActions.get().migrateTable(tblName).execute();
// check if iceberg and non-iceberg output
List<Object[]> afterMigarteBeforeAddResults = sql("SELECT * FROM %s ORDER BY col0", tblName);
assertEquals("Output must match", expectedBeforeAddColumn, afterMigarteBeforeAddResults);
// Update schema and check output correctness
SparkTable sparkTable = loadTable(tblName);
sparkTable.table().updateSchema().addColumn("newCol", Types.IntegerType.get()).moveAfter("newCol", "col0").commit();
List<Object[]> afterMigarteAfterAddResults = sql("SELECT * FROM %s ORDER BY col0", tblName);
assertEquals("Output must match", expectedAfterAddColumn, afterMigarteAfterAddResults);
}
use of org.apache.iceberg.spark.source.SparkTable in project iceberg by apache.
the class TestCreateActions method validateTables.
private void validateTables(String source, String dest) throws NoSuchTableException, ParseException {
List<Row> expected = spark.table(source).collectAsList();
SparkTable destTable = loadTable(dest);
Assert.assertEquals("Provider should be iceberg", "iceberg", destTable.properties().get(TableCatalog.PROP_PROVIDER));
List<Row> actual = spark.table(dest).collectAsList();
Assert.assertTrue(String.format("Rows in migrated table did not match\nExpected :%s rows \nFound :%s", expected, actual), expected.containsAll(actual) && actual.containsAll(expected));
}
use of org.apache.iceberg.spark.source.SparkTable in project iceberg by apache.
the class TestCreateActions method testProperties.
@Test
public void testProperties() throws Exception {
String source = sourceName("test_properties_table");
String dest = destName("iceberg_properties");
Map<String, String> props = Maps.newHashMap();
props.put("city", "New Orleans");
props.put("note", "Jazz");
createSourceTable(CREATE_PARQUET, source);
for (Map.Entry<String, String> keyValue : props.entrySet()) {
spark.sql(String.format("ALTER TABLE %s SET TBLPROPERTIES (\"%s\" = \"%s\")", source, keyValue.getKey(), keyValue.getValue()));
}
assertSnapshotFileCount(SparkActions.get().snapshotTable(source).as(dest).tableProperty("dogs", "sundance"), source, dest);
SparkTable table = loadTable(dest);
Map<String, String> expectedProps = Maps.newHashMap();
expectedProps.putAll(props);
expectedProps.put("dogs", "sundance");
for (Map.Entry<String, String> entry : expectedProps.entrySet()) {
Assert.assertTrue("Created table missing property " + entry.getKey(), table.properties().containsKey(entry.getKey()));
Assert.assertEquals("Property value is not the expected value", entry.getValue(), table.properties().get(entry.getKey()));
}
}
use of org.apache.iceberg.spark.source.SparkTable in project iceberg by apache.
the class TestCreateActions method testAddColumnOnMigratedTableAtEnd.
@Test
public void testAddColumnOnMigratedTableAtEnd() throws Exception {
Assume.assumeTrue("Cannot migrate to a hadoop based catalog", !type.equals("hadoop"));
Assume.assumeTrue("Can only migrate from Spark Session Catalog", catalog.name().equals("spark_catalog"));
String source = sourceName("test_add_column_migrated_table");
String dest = source;
createSourceTable(CREATE_PARQUET, source);
List<Object[]> expected1 = sql("select *, null from %s order by id", source);
List<Object[]> expected2 = sql("select *, null, null from %s order by id", source);
// migrate table
SparkActions.get().migrateTable(source).execute();
SparkTable sparkTable = loadTable(dest);
Table table = sparkTable.table();
// test column addition on migrated table
Schema beforeSchema = table.schema();
String newCol1 = "newCol1";
sparkTable.table().updateSchema().addColumn(newCol1, Types.IntegerType.get()).commit();
Schema afterSchema = table.schema();
Assert.assertNull(beforeSchema.findField(newCol1));
Assert.assertNotNull(afterSchema.findField(newCol1));
// reads should succeed without any exceptions
List<Object[]> results1 = sql("select * from %s order by id", dest);
Assert.assertTrue(results1.size() > 0);
assertEquals("Output must match", results1, expected1);
String newCol2 = "newCol2";
sql("ALTER TABLE %s ADD COLUMN %s INT", dest, newCol2);
StructType schema = spark.table(dest).schema();
Assert.assertTrue(Arrays.asList(schema.fieldNames()).contains(newCol2));
// reads should succeed without any exceptions
List<Object[]> results2 = sql("select * from %s order by id", dest);
Assert.assertTrue(results2.size() > 0);
assertEquals("Output must match", results2, expected2);
}
Aggregations