use of org.projectnessie.model.Reference in project presto by prestodb.
the class TestNessieMultiBranching method testNamespaceVisibility.
@Test
public void testNamespaceVisibility() throws NessieConflictException, NessieNotFoundException {
Reference one = createBranch("branchOne");
Reference two = createBranch("branchTwo");
Session sessionOne = sessionOnRef(one.getName());
Session sessionTwo = sessionOnRef(two.getName());
assertQuerySucceeds(sessionOne, "CREATE SCHEMA namespace_one");
assertQuerySucceeds(sessionOne, "SHOW SCHEMAS FROM iceberg LIKE 'namespace_one'");
assertQuerySucceeds(sessionTwo, "CREATE SCHEMA namespace_two");
assertQuerySucceeds(sessionTwo, "SHOW SCHEMAS FROM iceberg LIKE 'namespace_two'");
// TODO: enable this after bump to Iceberg 0.14.0
// namespace_two shouldn't be visible on branchOne
// assertQueryFails(sessionOne, "SHOW SCHEMAS FROM iceberg LIKE 'namespace_two'", ".*Schema 'iceberg.namespace_two' does not exist");
// namespace_one shouldn't be visible on branchTwo
// assertQueryFails(sessionTwo, "SHOW SCHEMAS FROM iceberg LIKE 'namespace_one'", ".*Schema 'iceberg.namespace_one' does not exist");
}
use of org.projectnessie.model.Reference in project urban-eureka by errir503.
the class TestNessieMultiBranching method testNamespaceVisibility.
@Test
public void testNamespaceVisibility() throws NessieConflictException, NessieNotFoundException {
Reference one = createBranch("branchOne");
Reference two = createBranch("branchTwo");
Session sessionOne = sessionOnRef(one.getName());
Session sessionTwo = sessionOnRef(two.getName());
assertQuerySucceeds(sessionOne, "CREATE SCHEMA namespace_one");
assertQuerySucceeds(sessionOne, "SHOW SCHEMAS FROM iceberg LIKE 'namespace_one'");
assertQuerySucceeds(sessionTwo, "CREATE SCHEMA namespace_two");
assertQuerySucceeds(sessionTwo, "SHOW SCHEMAS FROM iceberg LIKE 'namespace_two'");
// TODO: enable this after bump to Iceberg 0.14.0
// namespace_two shouldn't be visible on branchOne
// assertQueryFails(sessionOne, "SHOW SCHEMAS FROM iceberg LIKE 'namespace_two'", ".*Schema 'iceberg.namespace_two' does not exist");
// namespace_one shouldn't be visible on branchTwo
// assertQueryFails(sessionTwo, "SHOW SCHEMAS FROM iceberg LIKE 'namespace_one'", ".*Schema 'iceberg.namespace_one' does not exist");
}
use of org.projectnessie.model.Reference in project urban-eureka by errir503.
the class TestNessieMultiBranching method testTableDataVisibility.
@Test
public void testTableDataVisibility() throws NessieConflictException, NessieNotFoundException {
assertQuerySucceeds("CREATE SCHEMA namespace_one");
assertQuerySucceeds("CREATE TABLE namespace_one.tbl (a int)");
assertQuerySucceeds("INSERT INTO namespace_one.tbl (a) VALUES (1)");
assertQuerySucceeds("INSERT INTO namespace_one.tbl (a) VALUES (2)");
Reference one = createBranch("branchOneWithTable");
Reference two = createBranch("branchTwoWithTable");
Session sessionOne = sessionOnRef(one.getName());
Session sessionTwo = sessionOnRef(two.getName());
assertQuerySucceeds(sessionOne, "INSERT INTO namespace_one.tbl (a) VALUES (3)");
assertQuerySucceeds(sessionTwo, "INSERT INTO namespace_one.tbl (a) VALUES (5)");
assertQuerySucceeds(sessionTwo, "INSERT INTO namespace_one.tbl (a) VALUES (6)");
// main branch should still have 2 entries
assertThat(computeScalar("SELECT count(*) FROM namespace_one.tbl")).isEqualTo(2L);
MaterializedResult rows = computeActual("SELECT * FROM namespace_one.tbl");
assertThat(rows.getMaterializedRows()).hasSize(2);
assertEqualsIgnoreOrder(rows.getMaterializedRows(), resultBuilder(getSession(), rows.getTypes()).row(1).row(2).build().getMaterializedRows());
// there should be 3 entries on this branch
assertThat(computeScalar(sessionOne, "SELECT count(*) FROM namespace_one.tbl")).isEqualTo(3L);
rows = computeActual(sessionOne, "SELECT * FROM namespace_one.tbl");
assertThat(rows.getMaterializedRows()).hasSize(3);
assertEqualsIgnoreOrder(rows.getMaterializedRows(), resultBuilder(sessionOne, rows.getTypes()).row(1).row(2).row(3).build().getMaterializedRows());
// and 4 entries on this branch
assertThat(computeScalar(sessionTwo, "SELECT count(*) FROM namespace_one.tbl")).isEqualTo(4L);
rows = computeActual(sessionTwo, "SELECT * FROM namespace_one.tbl");
assertThat(rows.getMaterializedRows()).hasSize(4);
assertEqualsIgnoreOrder(rows.getMaterializedRows(), resultBuilder(sessionTwo, rows.getTypes()).row(1).row(2).row(5).row(6).build().getMaterializedRows());
// retrieve the second to the last commit hash and query the table with that hash
List<LogResponse.LogEntry> logEntries = nessieApiV1.getCommitLog().refName(two.getName()).get().getLogEntries();
assertThat(logEntries).isNotEmpty();
String hash = logEntries.get(1).getCommitMeta().getHash();
Session sessionTwoAtHash = sessionOnRef(two.getName(), hash);
// TODO: enable this after bump to Iceberg 0.14.0
// at this hash there were only 3 rows
// assertThat(computeScalar(sessionTwoAtHash, "SELECT count(*) FROM namespace_one.tbl")).isEqualTo(3L);
// rows = computeActual(sessionTwoAtHash, "SELECT * FROM namespace_one.tbl");
// assertThat(rows.getMaterializedRows()).hasSize(3);
// assertEqualsIgnoreOrder(rows.getMaterializedRows(), resultBuilder(sessionTwoAtHash, rows.getTypes()).row(1).row(2).row(5).build().getMaterializedRows());
}
use of org.projectnessie.model.Reference in project nessie by projectnessie.
the class AbstractCompatibilityTests method commit.
@Test
void commit() throws Exception {
Branch defaultBranch = api.getDefaultBranch();
Branch branch = Branch.of("commitToBranch", defaultBranch.getHash());
Reference created = api.createReference().sourceRefName(defaultBranch.getName()).reference(branch).create();
assertThat(created).isEqualTo(branch);
ContentKey key = ContentKey.of("my", "tables", "table_name");
IcebergTable content = IcebergTable.of("metadata-location", 42L, 43, 44, 45, "content-id");
String commitMessage = "hello world";
Put operation = Put.of(key, content);
Branch branchNew = api.commitMultipleOperations().commitMeta(CommitMeta.fromMessage(commitMessage)).operation(operation).branch(branch).commit();
assertThat(branchNew).isNotEqualTo(branch).extracting(Branch::getName).isEqualTo(branch.getName());
LogResponse commitLog = api.getCommitLog().refName(branch.getName()).get();
assertThat(commitLog.getLogEntries()).hasSize(1).map(LogEntry::getCommitMeta).map(CommitMeta::getMessage).containsExactly(commitMessage);
}
use of org.projectnessie.model.Reference in project nessie by projectnessie.
the class GCImpl method identifyExpiredContents.
/**
* Identify the expired contents using a two-step traversal algorithm.
*
* <h2>Algorithm for identifying the live contents and return the bloom filter per content-id</h2>
*
* <p>Walk through each reference(both live and dead) distributively (one spark task for each
* reference).
*
* <p>While traversing from the head commit in a reference(use DETACHED reference to fetch commits
* from dead reference), for each live commit (commit that is not expired based on cutoff time)
* add the contents of put operation to bloom filter.
*
* <p>Collect the live content keys for this reference just before cutoff time (at first expired
* commit head). Which is used to identify the commit head for each live content key at the time
* of cutoff time to support the time travel.
*
* <p>While traversing the expired commits (commit that is expired based on cutoff time), if it is
* a head commit content for its key, add it to bloom filter. Else move to next expired commit.
*
* <p>Stop traversing the expired commits if each live content key has processed one live commit
* for it. This is an optimization to avoid traversing all the commits.
*
* <p>Collect bloom filter per content id from each task and merge them.
*
* <h2>Algorithm for identifying the expired contents and return the list of globally expired
* contents per content id per reference </h2>
*
* <p>Walk through each reference(both live and dead) distributively (one spark task for each
* reference).
*
* <p>For each commit in the reference (use DETACHED reference to fetch commits from dead
* reference) check it against bloom filter to decide whether its contents in put operation are
* globally expired or not. If globally expired, Add the contents to the expired output for this
* content id for this reference.
*
* <p>Overall the contents after or equal to cutoff time and the contents that are mapped to
* commit head of live keys at the time of cutoff timestamp will be retained.
*
* @param session spark session for distributed computation
* @return {@link IdentifiedResult} object having expired contents per content id.
*/
public IdentifiedResult identifyExpiredContents(SparkSession session) {
try (NessieApiV1 api = GCUtil.getApi(gcParams.getNessieClientConfigs())) {
DistributedIdentifyContents distributedIdentifyContents = new DistributedIdentifyContents(session, gcParams);
List<Reference> liveReferences = api.getAllReferences().get().getReferences();
Map<String, Instant> droppedReferenceTimeMap = collectDeadReferences(api);
// As this list of references is passed from Spark driver to executor,
// using available Immutables JSON serialization instead of adding java serialization to the
// classes.
List<String> allRefs = liveReferences.stream().map(GCUtil::serializeReference).collect(Collectors.toList());
if (droppedReferenceTimeMap.size() > 0) {
allRefs.addAll(droppedReferenceTimeMap.keySet());
}
long bloomFilterSize = gcParams.getBloomFilterExpectedEntries() == null ? getTotalCommitsInDefaultReference(api) : gcParams.getBloomFilterExpectedEntries();
// Identify the live contents and return the bloom filter per content-id
Map<String, ContentBloomFilter> liveContentsBloomFilterMap = distributedIdentifyContents.getLiveContentsBloomFilters(allRefs, bloomFilterSize, droppedReferenceTimeMap);
// Identify the expired contents
return distributedIdentifyContents.getIdentifiedResults(liveContentsBloomFilterMap, allRefs);
}
}
Aggregations