Search in sources :

Example 1 with MigrateTable

use of org.apache.iceberg.actions.MigrateTable in project iceberg by apache.

the class TestCreateActions method testAddColumnOnMigratedTableAtMiddle.

@Test
public void testAddColumnOnMigratedTableAtMiddle() throws Exception {
    Assume.assumeTrue("Cannot migrate to a hadoop based catalog", !type.equals("hadoop"));
    Assume.assumeTrue("Can only migrate from Spark Session Catalog", catalog.name().equals("spark_catalog"));
    String source = sourceName("test_add_column_migrated_table_middle");
    String dest = source;
    createSourceTable(CREATE_PARQUET, source);
    // migrate table
    SparkActions.get().migrateTable(source).execute();
    SparkTable sparkTable = loadTable(dest);
    Table table = sparkTable.table();
    List<Object[]> expected = sql("select id, null, data from %s order by id", source);
    // test column addition on migrated table
    Schema beforeSchema = table.schema();
    String newCol1 = "newCol";
    sparkTable.table().updateSchema().addColumn("newCol", Types.IntegerType.get()).moveAfter(newCol1, "id").commit();
    Schema afterSchema = table.schema();
    Assert.assertNull(beforeSchema.findField(newCol1));
    Assert.assertNotNull(afterSchema.findField(newCol1));
    // reads should succeed
    List<Object[]> results = sql("select * from %s order by id", dest);
    Assert.assertTrue(results.size() > 0);
    assertEquals("Output must match", results, expected);
}
Also used : CatalogTable(org.apache.spark.sql.catalyst.catalog.CatalogTable) SnapshotTable(org.apache.iceberg.actions.SnapshotTable) MigrateTable(org.apache.iceberg.actions.MigrateTable) Table(org.apache.iceberg.Table) SparkTable(org.apache.iceberg.spark.source.SparkTable) Schema(org.apache.iceberg.Schema) SparkTable(org.apache.iceberg.spark.source.SparkTable) Test(org.junit.Test)

Example 2 with MigrateTable

use of org.apache.iceberg.actions.MigrateTable in project iceberg by apache.

the class TestCreateActions method testAddColumnOnMigratedTableAtEnd.

@Test
public void testAddColumnOnMigratedTableAtEnd() throws Exception {
    Assume.assumeTrue("Cannot migrate to a hadoop based catalog", !type.equals("hadoop"));
    Assume.assumeTrue("Can only migrate from Spark Session Catalog", catalog.name().equals("spark_catalog"));
    String source = sourceName("test_add_column_migrated_table");
    String dest = source;
    createSourceTable(CREATE_PARQUET, source);
    List<Object[]> expected1 = sql("select *, null from %s order by id", source);
    List<Object[]> expected2 = sql("select *, null, null from %s order by id", source);
    // migrate table
    SparkActions.get().migrateTable(source).execute();
    SparkTable sparkTable = loadTable(dest);
    Table table = sparkTable.table();
    // test column addition on migrated table
    Schema beforeSchema = table.schema();
    String newCol1 = "newCol1";
    sparkTable.table().updateSchema().addColumn(newCol1, Types.IntegerType.get()).commit();
    Schema afterSchema = table.schema();
    Assert.assertNull(beforeSchema.findField(newCol1));
    Assert.assertNotNull(afterSchema.findField(newCol1));
    // reads should succeed without any exceptions
    List<Object[]> results1 = sql("select * from %s order by id", dest);
    Assert.assertTrue(results1.size() > 0);
    assertEquals("Output must match", results1, expected1);
    String newCol2 = "newCol2";
    sql("ALTER TABLE %s ADD COLUMN %s INT", dest, newCol2);
    StructType schema = spark.table(dest).schema();
    Assert.assertTrue(Arrays.asList(schema.fieldNames()).contains(newCol2));
    // reads should succeed without any exceptions
    List<Object[]> results2 = sql("select * from %s order by id", dest);
    Assert.assertTrue(results2.size() > 0);
    assertEquals("Output must match", results2, expected2);
}
Also used : CatalogTable(org.apache.spark.sql.catalyst.catalog.CatalogTable) SnapshotTable(org.apache.iceberg.actions.SnapshotTable) MigrateTable(org.apache.iceberg.actions.MigrateTable) Table(org.apache.iceberg.Table) SparkTable(org.apache.iceberg.spark.source.SparkTable) StructType(org.apache.spark.sql.types.StructType) Schema(org.apache.iceberg.Schema) SparkTable(org.apache.iceberg.spark.source.SparkTable) Test(org.junit.Test)

Example 3 with MigrateTable

use of org.apache.iceberg.actions.MigrateTable in project iceberg by apache.

the class TestCreateActions method removeColumnsAtEnd.

@Test
public void removeColumnsAtEnd() throws Exception {
    Assume.assumeTrue("Cannot migrate to a hadoop based catalog", !type.equals("hadoop"));
    Assume.assumeTrue("Can only migrate from Spark Session Catalog", catalog.name().equals("spark_catalog"));
    String source = sourceName("test_remove_column_migrated_table");
    String dest = source;
    String colName1 = "newCol1";
    String colName2 = "newCol2";
    File location = temp.newFolder();
    spark.range(10).selectExpr("cast(id as INT)", "CAST(id as INT) " + colName1, "CAST(id as INT) " + colName2).write().mode(SaveMode.Overwrite).saveAsTable(dest);
    List<Object[]> expected1 = sql("select id, %s from %s order by id", colName1, source);
    List<Object[]> expected2 = sql("select id from %s order by id", source);
    // migrate table
    SparkActions.get().migrateTable(source).execute();
    SparkTable sparkTable = loadTable(dest);
    Table table = sparkTable.table();
    // test column removal on migrated table
    Schema beforeSchema = table.schema();
    sparkTable.table().updateSchema().deleteColumn(colName1).commit();
    Schema afterSchema = table.schema();
    Assert.assertNotNull(beforeSchema.findField(colName1));
    Assert.assertNull(afterSchema.findField(colName1));
    // reads should succeed without any exceptions
    List<Object[]> results1 = sql("select * from %s order by id", dest);
    Assert.assertTrue(results1.size() > 0);
    assertEquals("Output must match", expected1, results1);
    sql("ALTER TABLE %s DROP COLUMN %s", dest, colName2);
    StructType schema = spark.table(dest).schema();
    Assert.assertFalse(Arrays.asList(schema.fieldNames()).contains(colName2));
    // reads should succeed without any exceptions
    List<Object[]> results2 = sql("select * from %s order by id", dest);
    Assert.assertTrue(results2.size() > 0);
    assertEquals("Output must match", expected2, results2);
}
Also used : CatalogTable(org.apache.spark.sql.catalyst.catalog.CatalogTable) SnapshotTable(org.apache.iceberg.actions.SnapshotTable) MigrateTable(org.apache.iceberg.actions.MigrateTable) Table(org.apache.iceberg.Table) SparkTable(org.apache.iceberg.spark.source.SparkTable) StructType(org.apache.spark.sql.types.StructType) Schema(org.apache.iceberg.Schema) HadoopInputFile(org.apache.parquet.hadoop.util.HadoopInputFile) File(java.io.File) SparkTable(org.apache.iceberg.spark.source.SparkTable) Test(org.junit.Test)

Aggregations

Schema (org.apache.iceberg.Schema)3 Table (org.apache.iceberg.Table)3 MigrateTable (org.apache.iceberg.actions.MigrateTable)3 SnapshotTable (org.apache.iceberg.actions.SnapshotTable)3 SparkTable (org.apache.iceberg.spark.source.SparkTable)3 CatalogTable (org.apache.spark.sql.catalyst.catalog.CatalogTable)3 Test (org.junit.Test)3 StructType (org.apache.spark.sql.types.StructType)2 File (java.io.File)1 HadoopInputFile (org.apache.parquet.hadoop.util.HadoopInputFile)1