use of org.apache.iceberg.spark.source.SparkTable in project iceberg by apache.
the class TestCreateActions method removeColumnsAtEnd.
@Test
public void removeColumnsAtEnd() throws Exception {
Assume.assumeTrue("Cannot migrate to a hadoop based catalog", !type.equals("hadoop"));
Assume.assumeTrue("Can only migrate from Spark Session Catalog", catalog.name().equals("spark_catalog"));
String source = sourceName("test_remove_column_migrated_table");
String dest = source;
String colName1 = "newCol1";
String colName2 = "newCol2";
File location = temp.newFolder();
spark.range(10).selectExpr("cast(id as INT)", "CAST(id as INT) " + colName1, "CAST(id as INT) " + colName2).write().mode(SaveMode.Overwrite).saveAsTable(dest);
List<Object[]> expected1 = sql("select id, %s from %s order by id", colName1, source);
List<Object[]> expected2 = sql("select id from %s order by id", source);
// migrate table
SparkActions.get().migrateTable(source).execute();
SparkTable sparkTable = loadTable(dest);
Table table = sparkTable.table();
// test column removal on migrated table
Schema beforeSchema = table.schema();
sparkTable.table().updateSchema().deleteColumn(colName1).commit();
Schema afterSchema = table.schema();
Assert.assertNotNull(beforeSchema.findField(colName1));
Assert.assertNull(afterSchema.findField(colName1));
// reads should succeed without any exceptions
List<Object[]> results1 = sql("select * from %s order by id", dest);
Assert.assertTrue(results1.size() > 0);
assertEquals("Output must match", expected1, results1);
sql("ALTER TABLE %s DROP COLUMN %s", dest, colName2);
StructType schema = spark.table(dest).schema();
Assert.assertFalse(Arrays.asList(schema.fieldNames()).contains(colName2));
// reads should succeed without any exceptions
List<Object[]> results2 = sql("select * from %s order by id", dest);
Assert.assertTrue(results2.size() > 0);
assertEquals("Output must match", expected2, results2);
}
use of org.apache.iceberg.spark.source.SparkTable in project iceberg by apache.
the class TestCreateActions method schemaEvolutionTestWithSparkAPI.
@Test
public void schemaEvolutionTestWithSparkAPI() throws Exception {
Assume.assumeTrue("Cannot migrate to a hadoop based catalog", !type.equals("hadoop"));
Assume.assumeTrue("Can only migrate from Spark Session Catalog", catalog.name().equals("spark_catalog"));
File location = temp.newFolder();
String tblName = sourceName("schema_evolution_test");
// Data generation and partition addition
spark.range(0, 5).selectExpr("CAST(id as INT) as col0", "CAST(id AS FLOAT) col2", "CAST(id AS LONG) col3").write().mode(SaveMode.Append).parquet(location.toURI().toString());
Dataset<Row> rowDataset = spark.range(6, 10).selectExpr("CAST(id as INT) as col0", "CAST(id AS STRING) col1", "CAST(id AS FLOAT) col2", "CAST(id AS LONG) col3");
rowDataset.write().mode(SaveMode.Append).parquet(location.toURI().toString());
spark.read().schema(rowDataset.schema()).parquet(location.toURI().toString()).write().saveAsTable(tblName);
List<Object[]> expectedBeforeAddColumn = sql("SELECT * FROM %s ORDER BY col0", tblName);
List<Object[]> expectedAfterAddColumn = sql("SELECT col0, null, col1, col2, col3 FROM %s ORDER BY col0", tblName);
// Migrate table
SparkActions.get().migrateTable(tblName).execute();
// check if iceberg and non-iceberg output
List<Object[]> afterMigarteBeforeAddResults = sql("SELECT * FROM %s ORDER BY col0", tblName);
assertEquals("Output must match", expectedBeforeAddColumn, afterMigarteBeforeAddResults);
// Update schema and check output correctness
SparkTable sparkTable = loadTable(tblName);
sparkTable.table().updateSchema().addColumn("newCol", Types.IntegerType.get()).moveAfter("newCol", "col0").commit();
List<Object[]> afterMigarteAfterAddResults = sql("SELECT * FROM %s ORDER BY col0", tblName);
assertEquals("Output must match", expectedAfterAddColumn, afterMigarteAfterAddResults);
}
use of org.apache.iceberg.spark.source.SparkTable in project OpenLineage by OpenLineage.
the class IcebergHandlerTest method testGetVersionString.
@Test
public void testGetVersionString() throws NoSuchTableException {
SparkCatalog sparkCatalog = mock(SparkCatalog.class);
SparkTable sparkTable = mock(SparkTable.class, RETURNS_DEEP_STUBS);
Identifier identifier = Identifier.of(new String[] { "database", "schema" }, "table");
when(sparkCatalog.loadTable(identifier)).thenReturn(sparkTable);
when(sparkTable.table().currentSnapshot().snapshotId()).thenReturn(1500100900L);
Optional<String> version = icebergHandler.getDatasetVersion(sparkCatalog, identifier, Collections.emptyMap());
assertTrue(version.isPresent());
assertEquals(version.get(), "1500100900");
}
Aggregations