Search in sources :

Example 6 with SparkTable

use of org.apache.iceberg.spark.source.SparkTable in project iceberg by apache.

the class TestCreateActions method testSparkTableReservedProperties.

@Test
public void testSparkTableReservedProperties() throws Exception {
    String destTableName = "iceberg_reserved_properties";
    String source = sourceName("test_reserved_properties_table");
    String dest = destName(destTableName);
    createSourceTable(CREATE_PARQUET, source);
    assertSnapshotFileCount(SparkActions.get().snapshotTable(source).as(dest), source, dest);
    SparkTable table = loadTable(dest);
    // set sort orders
    table.table().replaceSortOrder().asc("id").desc("data").commit();
    String[] keys = { "provider", "format", "current-snapshot-id", "location", "sort-order" };
    for (String entry : keys) {
        Assert.assertTrue("Created table missing reserved property " + entry, table.properties().containsKey(entry));
    }
    Assert.assertEquals("Unexpected provider", "iceberg", table.properties().get("provider"));
    Assert.assertEquals("Unexpected format", "iceberg/parquet", table.properties().get("format"));
    Assert.assertNotEquals("No current-snapshot-id found", "none", table.properties().get("current-snapshot-id"));
    Assert.assertTrue("Location isn't correct", table.properties().get("location").endsWith(destTableName));
    Assert.assertEquals("Sort-order isn't correct", "id ASC NULLS FIRST, data DESC NULLS LAST", table.properties().get("sort-order"));
}
Also used : SparkTable(org.apache.iceberg.spark.source.SparkTable) Test(org.junit.Test)

Example 7 with SparkTable

use of org.apache.iceberg.spark.source.SparkTable in project iceberg by apache.

the class TestCreateActions method schemaEvolutionTestWithSparkSQL.

@Test
public void schemaEvolutionTestWithSparkSQL() throws Exception {
    Assume.assumeTrue("Cannot migrate to a hadoop based catalog", !type.equals("hadoop"));
    Assume.assumeTrue("Can only migrate from Spark Session Catalog", catalog.name().equals("spark_catalog"));
    String tblName = sourceName("schema_evolution_test_sql");
    // Data generation and partition addition
    spark.range(0, 5).selectExpr("CAST(id as INT) col0", "CAST(id AS FLOAT) col1", "CAST(id AS STRING) col2").write().mode(SaveMode.Append).saveAsTable(tblName);
    sql("ALTER TABLE %s ADD COLUMN col3 INT", tblName);
    spark.range(6, 10).selectExpr("CAST(id AS INT) col0", "CAST(id AS FLOAT) col1", "CAST(id AS STRING) col2", "CAST(id AS INT) col3").registerTempTable("tempdata");
    sql("INSERT INTO TABLE %s SELECT * FROM tempdata", tblName);
    List<Object[]> expectedBeforeAddColumn = sql("SELECT * FROM %s ORDER BY col0", tblName);
    List<Object[]> expectedAfterAddColumn = sql("SELECT col0, null, col1, col2, col3 FROM %s ORDER BY col0", tblName);
    // Migrate table
    SparkActions.get().migrateTable(tblName).execute();
    // check if iceberg and non-iceberg output
    List<Object[]> afterMigarteBeforeAddResults = sql("SELECT * FROM %s ORDER BY col0", tblName);
    assertEquals("Output must match", expectedBeforeAddColumn, afterMigarteBeforeAddResults);
    // Update schema and check output correctness
    SparkTable sparkTable = loadTable(tblName);
    sparkTable.table().updateSchema().addColumn("newCol", Types.IntegerType.get()).moveAfter("newCol", "col0").commit();
    List<Object[]> afterMigarteAfterAddResults = sql("SELECT * FROM %s ORDER BY col0", tblName);
    assertEquals("Output must match", expectedAfterAddColumn, afterMigarteAfterAddResults);
}
Also used : SparkTable(org.apache.iceberg.spark.source.SparkTable) Test(org.junit.Test)

Example 8 with SparkTable

use of org.apache.iceberg.spark.source.SparkTable in project iceberg by apache.

the class TestCreateActions method validateTables.

private void validateTables(String source, String dest) throws NoSuchTableException, ParseException {
    List<Row> expected = spark.table(source).collectAsList();
    SparkTable destTable = loadTable(dest);
    Assert.assertEquals("Provider should be iceberg", "iceberg", destTable.properties().get(TableCatalog.PROP_PROVIDER));
    List<Row> actual = spark.table(dest).collectAsList();
    Assert.assertTrue(String.format("Rows in migrated table did not match\nExpected :%s rows \nFound    :%s", expected, actual), expected.containsAll(actual) && actual.containsAll(expected));
}
Also used : Row(org.apache.spark.sql.Row) SparkTable(org.apache.iceberg.spark.source.SparkTable)

Example 9 with SparkTable

use of org.apache.iceberg.spark.source.SparkTable in project iceberg by apache.

the class TestCreateActions method testProperties.

@Test
public void testProperties() throws Exception {
    String source = sourceName("test_properties_table");
    String dest = destName("iceberg_properties");
    Map<String, String> props = Maps.newHashMap();
    props.put("city", "New Orleans");
    props.put("note", "Jazz");
    createSourceTable(CREATE_PARQUET, source);
    for (Map.Entry<String, String> keyValue : props.entrySet()) {
        spark.sql(String.format("ALTER TABLE %s SET TBLPROPERTIES (\"%s\" = \"%s\")", source, keyValue.getKey(), keyValue.getValue()));
    }
    assertSnapshotFileCount(SparkActions.get().snapshotTable(source).as(dest).tableProperty("dogs", "sundance"), source, dest);
    SparkTable table = loadTable(dest);
    Map<String, String> expectedProps = Maps.newHashMap();
    expectedProps.putAll(props);
    expectedProps.put("dogs", "sundance");
    for (Map.Entry<String, String> entry : expectedProps.entrySet()) {
        Assert.assertTrue("Created table missing property " + entry.getKey(), table.properties().containsKey(entry.getKey()));
        Assert.assertEquals("Property value is not the expected value", entry.getValue(), table.properties().get(entry.getKey()));
    }
}
Also used : Map(java.util.Map) ImmutableMap(org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap) SparkTable(org.apache.iceberg.spark.source.SparkTable) Test(org.junit.Test)

Example 10 with SparkTable

use of org.apache.iceberg.spark.source.SparkTable in project iceberg by apache.

the class TestCreateActions method testAddColumnOnMigratedTableAtEnd.

@Test
public void testAddColumnOnMigratedTableAtEnd() throws Exception {
    Assume.assumeTrue("Cannot migrate to a hadoop based catalog", !type.equals("hadoop"));
    Assume.assumeTrue("Can only migrate from Spark Session Catalog", catalog.name().equals("spark_catalog"));
    String source = sourceName("test_add_column_migrated_table");
    String dest = source;
    createSourceTable(CREATE_PARQUET, source);
    List<Object[]> expected1 = sql("select *, null from %s order by id", source);
    List<Object[]> expected2 = sql("select *, null, null from %s order by id", source);
    // migrate table
    SparkActions.get().migrateTable(source).execute();
    SparkTable sparkTable = loadTable(dest);
    Table table = sparkTable.table();
    // test column addition on migrated table
    Schema beforeSchema = table.schema();
    String newCol1 = "newCol1";
    sparkTable.table().updateSchema().addColumn(newCol1, Types.IntegerType.get()).commit();
    Schema afterSchema = table.schema();
    Assert.assertNull(beforeSchema.findField(newCol1));
    Assert.assertNotNull(afterSchema.findField(newCol1));
    // reads should succeed without any exceptions
    List<Object[]> results1 = sql("select * from %s order by id", dest);
    Assert.assertTrue(results1.size() > 0);
    assertEquals("Output must match", results1, expected1);
    String newCol2 = "newCol2";
    sql("ALTER TABLE %s ADD COLUMN %s INT", dest, newCol2);
    StructType schema = spark.table(dest).schema();
    Assert.assertTrue(Arrays.asList(schema.fieldNames()).contains(newCol2));
    // reads should succeed without any exceptions
    List<Object[]> results2 = sql("select * from %s order by id", dest);
    Assert.assertTrue(results2.size() > 0);
    assertEquals("Output must match", results2, expected2);
}
Also used : CatalogTable(org.apache.spark.sql.catalyst.catalog.CatalogTable) SnapshotTable(org.apache.iceberg.actions.SnapshotTable) MigrateTable(org.apache.iceberg.actions.MigrateTable) Table(org.apache.iceberg.Table) SparkTable(org.apache.iceberg.spark.source.SparkTable) StructType(org.apache.spark.sql.types.StructType) Schema(org.apache.iceberg.Schema) SparkTable(org.apache.iceberg.spark.source.SparkTable) Test(org.junit.Test)

Aggregations

SparkTable (org.apache.iceberg.spark.source.SparkTable)23 Test (org.junit.Test)12 Identifier (org.apache.spark.sql.connector.catalog.Identifier)8 File (java.io.File)7 SparkCatalog (org.apache.iceberg.spark.SparkCatalog)7 Map (java.util.Map)6 Table (org.apache.iceberg.Table)6 StreamSupport (java.util.stream.StreamSupport)5 DeleteOrphanFiles (org.apache.iceberg.actions.DeleteOrphanFiles)5 Maps (org.apache.iceberg.relocated.com.google.common.collect.Maps)5 SparkSchemaUtil (org.apache.iceberg.spark.SparkSchemaUtil)5 SparkSessionCatalog (org.apache.iceberg.spark.SparkSessionCatalog)5 Transform (org.apache.spark.sql.connector.expressions.Transform)5 After (org.junit.After)5 Assert (org.junit.Assert)5 Schema (org.apache.iceberg.Schema)4 MigrateTable (org.apache.iceberg.actions.MigrateTable)3 SnapshotTable (org.apache.iceberg.actions.SnapshotTable)3 NoSuchTableException (org.apache.spark.sql.catalyst.analysis.NoSuchTableException)3 CatalogTable (org.apache.spark.sql.catalyst.catalog.CatalogTable)3