use of org.apache.spark.sql.Row$ in project iceberg by apache.
the class TestCherrypickSnapshotProcedure method testCherrypickSnapshotRefreshesRelationCache.
@Test
public void testCherrypickSnapshotRefreshesRelationCache() {
sql("CREATE TABLE %s (id bigint NOT NULL, data string) USING iceberg", tableName);
sql("ALTER TABLE %s SET TBLPROPERTIES ('%s' 'true')", tableName, WRITE_AUDIT_PUBLISH_ENABLED);
Dataset<Row> query = spark.sql("SELECT * FROM " + tableName + " WHERE id = 1");
query.createOrReplaceTempView("tmp");
spark.sql("CACHE TABLE tmp");
assertEquals("View should not produce rows", ImmutableList.of(), sql("SELECT * FROM tmp"));
spark.conf().set("spark.wap.id", "1");
sql("INSERT INTO TABLE %s VALUES (1, 'a')", tableName);
assertEquals("Should not see rows from staged snapshot", ImmutableList.of(), sql("SELECT * FROM %s", tableName));
Table table = validationCatalog.loadTable(tableIdent);
Snapshot wapSnapshot = Iterables.getOnlyElement(table.snapshots());
sql("CALL %s.system.cherrypick_snapshot('%s', %dL)", catalogName, tableIdent, wapSnapshot.snapshotId());
assertEquals("Cherrypick snapshot should be visible", ImmutableList.of(row(1L, "a")), sql("SELECT * FROM tmp"));
sql("UNCACHE TABLE tmp");
}
use of org.apache.spark.sql.Row$ in project iceberg by apache.
the class TestConflictValidation method testOverwritePartitionSnapshotIsolation2.
@Test
public void testOverwritePartitionSnapshotIsolation2() throws Exception {
Table table = validationCatalog.loadTable(tableIdent);
final long snapshotId = table.currentSnapshot().snapshotId();
// This should delete a data file
sql("DELETE FROM %s WHERE id='1'", tableName);
// Validating from previous snapshot finds conflicts
List<SimpleRecord> records = Lists.newArrayList(new SimpleRecord(1, "a"));
spark.createDataFrame(records, SimpleRecord.class).coalesce(1).writeTo(tableName).append();
Dataset<Row> conflictingDf = spark.createDataFrame(records, SimpleRecord.class);
AssertHelpers.assertThrowsCause("Conflicting deleted data files should throw exception", ValidationException.class, "Found conflicting deleted files that can apply to records matching [id=1]", () -> {
try {
conflictingDf.writeTo(tableName).option(SparkWriteOptions.VALIDATE_FROM_SNAPSHOT_ID, String.valueOf(snapshotId)).option(SparkWriteOptions.ISOLATION_LEVEL, IsolationLevel.SNAPSHOT.toString()).overwritePartitions();
} catch (NoSuchTableException e) {
throw new RuntimeException(e);
}
});
// Validating from latest snapshot should succeed
table.refresh();
long newSnapshotId = table.currentSnapshot().snapshotId();
conflictingDf.writeTo(tableName).option(SparkWriteOptions.VALIDATE_FROM_SNAPSHOT_ID, String.valueOf(newSnapshotId)).option(SparkWriteOptions.ISOLATION_LEVEL, IsolationLevel.SNAPSHOT.toString()).overwritePartitions();
}
use of org.apache.spark.sql.Row$ in project iceberg by apache.
the class TestConflictValidation method testOverwritePartitionSerializableIsolation.
@Test
public void testOverwritePartitionSerializableIsolation() throws Exception {
Table table = validationCatalog.loadTable(tableIdent);
final long snapshotId = table.currentSnapshot().snapshotId();
List<SimpleRecord> records = Lists.newArrayList(new SimpleRecord(1, "a"));
spark.createDataFrame(records, SimpleRecord.class).writeTo(tableName).append();
// Validating from previous snapshot finds conflicts
Dataset<Row> conflictingDf = spark.createDataFrame(records, SimpleRecord.class);
AssertHelpers.assertThrowsCause("Conflicting deleted data files should throw exception", ValidationException.class, "Found conflicting files that can contain records matching partitions [id=1]", () -> {
try {
conflictingDf.writeTo(tableName).option(SparkWriteOptions.VALIDATE_FROM_SNAPSHOT_ID, String.valueOf(snapshotId)).option(SparkWriteOptions.ISOLATION_LEVEL, IsolationLevel.SERIALIZABLE.toString()).overwritePartitions();
} catch (NoSuchTableException e) {
throw new RuntimeException(e);
}
});
// Validating from latest snapshot should succeed
table.refresh();
long newSnapshotId = table.currentSnapshot().snapshotId();
conflictingDf.writeTo(tableName).option(SparkWriteOptions.VALIDATE_FROM_SNAPSHOT_ID, String.valueOf(newSnapshotId)).option(SparkWriteOptions.ISOLATION_LEVEL, IsolationLevel.SERIALIZABLE.toString()).overwritePartitions();
}
use of org.apache.spark.sql.Row$ in project iceberg by apache.
the class TestConflictValidation method testOverwritePartitionSnapshotIsolation.
@Test
public void testOverwritePartitionSnapshotIsolation() throws Exception {
List<SimpleRecord> records = Lists.newArrayList(new SimpleRecord(1, "a"), new SimpleRecord(1, "b"));
spark.createDataFrame(records, SimpleRecord.class).coalesce(1).writeTo(tableName).append();
Table table = validationCatalog.loadTable(tableIdent);
final long snapshotId = table.currentSnapshot().snapshotId();
// This should generate a delete file
sql("DELETE FROM %s WHERE data='a'", tableName);
// Validating from previous snapshot finds conflicts
Dataset<Row> conflictingDf = spark.createDataFrame(records, SimpleRecord.class);
AssertHelpers.assertThrowsCause("Conflicting deleted data files should throw exception", ValidationException.class, "Found new conflicting delete files that can apply to records matching [id=1]", () -> {
try {
conflictingDf.writeTo(tableName).option(SparkWriteOptions.VALIDATE_FROM_SNAPSHOT_ID, String.valueOf(snapshotId)).option(SparkWriteOptions.ISOLATION_LEVEL, IsolationLevel.SNAPSHOT.toString()).overwritePartitions();
} catch (NoSuchTableException e) {
throw new RuntimeException(e);
}
});
// Validating from latest snapshot should succeed
table.refresh();
long newSnapshotId = table.currentSnapshot().snapshotId();
conflictingDf.writeTo(tableName).option(SparkWriteOptions.VALIDATE_FROM_SNAPSHOT_ID, String.valueOf(newSnapshotId)).option(SparkWriteOptions.ISOLATION_LEVEL, IsolationLevel.SNAPSHOT.toString()).overwritePartitions();
}
use of org.apache.spark.sql.Row$ in project iceberg by apache.
the class TestUpdate method testUpdateRefreshesRelationCache.
@Test
public void testUpdateRefreshesRelationCache() {
createAndInitTable("id INT, dep STRING");
sql("ALTER TABLE %s ADD PARTITION FIELD dep", tableName);
append(tableName, "{ \"id\": 1, \"dep\": \"hr\" }\n" + "{ \"id\": 3, \"dep\": \"hr\" }");
append(tableName, "{ \"id\": 1, \"dep\": \"hardware\" }\n" + "{ \"id\": 2, \"dep\": \"hardware\" }");
Dataset<Row> query = spark.sql("SELECT * FROM " + tableName + " WHERE id = 1");
query.createOrReplaceTempView("tmp");
spark.sql("CACHE TABLE tmp");
assertEquals("View should have correct data", ImmutableList.of(row(1, "hardware"), row(1, "hr")), sql("SELECT * FROM tmp ORDER BY id, dep"));
sql("UPDATE %s SET id = -1 WHERE id = 1", tableName);
Table table = validationCatalog.loadTable(tableIdent);
Assert.assertEquals("Should have 3 snapshots", 3, Iterables.size(table.snapshots()));
Snapshot currentSnapshot = table.currentSnapshot();
if (mode(table) == COPY_ON_WRITE) {
validateCopyOnWrite(currentSnapshot, "2", "2", "2");
} else {
validateMergeOnRead(currentSnapshot, "2", "2", "2");
}
assertEquals("Should have expected rows", ImmutableList.of(row(-1, "hardware"), row(-1, "hr"), row(2, "hardware"), row(3, "hr")), sql("SELECT * FROM %s ORDER BY id, dep", tableName));
assertEquals("Should refresh the relation cache", ImmutableList.of(), sql("SELECT * FROM tmp ORDER BY id, dep"));
spark.sql("UNCACHE TABLE tmp");
}
Aggregations