Search in sources :

Example 16 with Reference

use of org.projectnessie.model.Reference in project presto by prestodb.

the class TestNessieMultiBranching method testNamespaceVisibility.

@Test
public void testNamespaceVisibility() throws NessieConflictException, NessieNotFoundException {
    Reference one = createBranch("branchOne");
    Reference two = createBranch("branchTwo");
    Session sessionOne = sessionOnRef(one.getName());
    Session sessionTwo = sessionOnRef(two.getName());
    assertQuerySucceeds(sessionOne, "CREATE SCHEMA namespace_one");
    assertQuerySucceeds(sessionOne, "SHOW SCHEMAS FROM iceberg LIKE 'namespace_one'");
    assertQuerySucceeds(sessionTwo, "CREATE SCHEMA namespace_two");
    assertQuerySucceeds(sessionTwo, "SHOW SCHEMAS FROM iceberg LIKE 'namespace_two'");
// TODO: enable this after bump to Iceberg 0.14.0
// namespace_two shouldn't be visible on branchOne
// assertQueryFails(sessionOne, "SHOW SCHEMAS FROM iceberg LIKE 'namespace_two'", ".*Schema 'iceberg.namespace_two' does not exist");
// namespace_one shouldn't be visible on branchTwo
// assertQueryFails(sessionTwo, "SHOW SCHEMAS FROM iceberg LIKE 'namespace_one'", ".*Schema 'iceberg.namespace_one' does not exist");
}
Also used : Reference(org.projectnessie.model.Reference) Session(com.facebook.presto.Session) Test(org.testng.annotations.Test)

Example 17 with Reference

use of org.projectnessie.model.Reference in project urban-eureka by errir503.

the class TestNessieMultiBranching method testNamespaceVisibility.

@Test
public void testNamespaceVisibility() throws NessieConflictException, NessieNotFoundException {
    Reference one = createBranch("branchOne");
    Reference two = createBranch("branchTwo");
    Session sessionOne = sessionOnRef(one.getName());
    Session sessionTwo = sessionOnRef(two.getName());
    assertQuerySucceeds(sessionOne, "CREATE SCHEMA namespace_one");
    assertQuerySucceeds(sessionOne, "SHOW SCHEMAS FROM iceberg LIKE 'namespace_one'");
    assertQuerySucceeds(sessionTwo, "CREATE SCHEMA namespace_two");
    assertQuerySucceeds(sessionTwo, "SHOW SCHEMAS FROM iceberg LIKE 'namespace_two'");
// TODO: enable this after bump to Iceberg 0.14.0
// namespace_two shouldn't be visible on branchOne
// assertQueryFails(sessionOne, "SHOW SCHEMAS FROM iceberg LIKE 'namespace_two'", ".*Schema 'iceberg.namespace_two' does not exist");
// namespace_one shouldn't be visible on branchTwo
// assertQueryFails(sessionTwo, "SHOW SCHEMAS FROM iceberg LIKE 'namespace_one'", ".*Schema 'iceberg.namespace_one' does not exist");
}
Also used : Reference(org.projectnessie.model.Reference) Session(com.facebook.presto.Session) Test(org.testng.annotations.Test)

Example 18 with Reference

use of org.projectnessie.model.Reference in project urban-eureka by errir503.

the class TestNessieMultiBranching method testTableDataVisibility.

@Test
public void testTableDataVisibility() throws NessieConflictException, NessieNotFoundException {
    assertQuerySucceeds("CREATE SCHEMA namespace_one");
    assertQuerySucceeds("CREATE TABLE namespace_one.tbl (a int)");
    assertQuerySucceeds("INSERT INTO namespace_one.tbl (a) VALUES (1)");
    assertQuerySucceeds("INSERT INTO namespace_one.tbl (a) VALUES (2)");
    Reference one = createBranch("branchOneWithTable");
    Reference two = createBranch("branchTwoWithTable");
    Session sessionOne = sessionOnRef(one.getName());
    Session sessionTwo = sessionOnRef(two.getName());
    assertQuerySucceeds(sessionOne, "INSERT INTO namespace_one.tbl (a) VALUES (3)");
    assertQuerySucceeds(sessionTwo, "INSERT INTO namespace_one.tbl (a) VALUES (5)");
    assertQuerySucceeds(sessionTwo, "INSERT INTO namespace_one.tbl (a) VALUES (6)");
    // main branch should still have 2 entries
    assertThat(computeScalar("SELECT count(*) FROM namespace_one.tbl")).isEqualTo(2L);
    MaterializedResult rows = computeActual("SELECT * FROM namespace_one.tbl");
    assertThat(rows.getMaterializedRows()).hasSize(2);
    assertEqualsIgnoreOrder(rows.getMaterializedRows(), resultBuilder(getSession(), rows.getTypes()).row(1).row(2).build().getMaterializedRows());
    // there should be 3 entries on this branch
    assertThat(computeScalar(sessionOne, "SELECT count(*) FROM namespace_one.tbl")).isEqualTo(3L);
    rows = computeActual(sessionOne, "SELECT * FROM namespace_one.tbl");
    assertThat(rows.getMaterializedRows()).hasSize(3);
    assertEqualsIgnoreOrder(rows.getMaterializedRows(), resultBuilder(sessionOne, rows.getTypes()).row(1).row(2).row(3).build().getMaterializedRows());
    // and 4 entries on this branch
    assertThat(computeScalar(sessionTwo, "SELECT count(*) FROM namespace_one.tbl")).isEqualTo(4L);
    rows = computeActual(sessionTwo, "SELECT * FROM namespace_one.tbl");
    assertThat(rows.getMaterializedRows()).hasSize(4);
    assertEqualsIgnoreOrder(rows.getMaterializedRows(), resultBuilder(sessionTwo, rows.getTypes()).row(1).row(2).row(5).row(6).build().getMaterializedRows());
    // retrieve the second to the last commit hash and query the table with that hash
    List<LogResponse.LogEntry> logEntries = nessieApiV1.getCommitLog().refName(two.getName()).get().getLogEntries();
    assertThat(logEntries).isNotEmpty();
    String hash = logEntries.get(1).getCommitMeta().getHash();
    Session sessionTwoAtHash = sessionOnRef(two.getName(), hash);
// TODO: enable this after bump to Iceberg 0.14.0
// at this hash there were only 3 rows
// assertThat(computeScalar(sessionTwoAtHash, "SELECT count(*) FROM namespace_one.tbl")).isEqualTo(3L);
// rows = computeActual(sessionTwoAtHash, "SELECT * FROM namespace_one.tbl");
// assertThat(rows.getMaterializedRows()).hasSize(3);
// assertEqualsIgnoreOrder(rows.getMaterializedRows(), resultBuilder(sessionTwoAtHash, rows.getTypes()).row(1).row(2).row(5).build().getMaterializedRows());
}
Also used : Reference(org.projectnessie.model.Reference) MaterializedResult(com.facebook.presto.testing.MaterializedResult) Session(com.facebook.presto.Session) Test(org.testng.annotations.Test)

Example 19 with Reference

use of org.projectnessie.model.Reference in project nessie by projectnessie.

the class AbstractCompatibilityTests method commit.

@Test
void commit() throws Exception {
    Branch defaultBranch = api.getDefaultBranch();
    Branch branch = Branch.of("commitToBranch", defaultBranch.getHash());
    Reference created = api.createReference().sourceRefName(defaultBranch.getName()).reference(branch).create();
    assertThat(created).isEqualTo(branch);
    ContentKey key = ContentKey.of("my", "tables", "table_name");
    IcebergTable content = IcebergTable.of("metadata-location", 42L, 43, 44, 45, "content-id");
    String commitMessage = "hello world";
    Put operation = Put.of(key, content);
    Branch branchNew = api.commitMultipleOperations().commitMeta(CommitMeta.fromMessage(commitMessage)).operation(operation).branch(branch).commit();
    assertThat(branchNew).isNotEqualTo(branch).extracting(Branch::getName).isEqualTo(branch.getName());
    LogResponse commitLog = api.getCommitLog().refName(branch.getName()).get();
    assertThat(commitLog.getLogEntries()).hasSize(1).map(LogEntry::getCommitMeta).map(CommitMeta::getMessage).containsExactly(commitMessage);
}
Also used : ContentKey(org.projectnessie.model.ContentKey) LogResponse(org.projectnessie.model.LogResponse) Branch(org.projectnessie.model.Branch) Reference(org.projectnessie.model.Reference) IcebergTable(org.projectnessie.model.IcebergTable) Put(org.projectnessie.model.Operation.Put) LogEntry(org.projectnessie.model.LogResponse.LogEntry) Test(org.junit.jupiter.api.Test)

Example 20 with Reference

use of org.projectnessie.model.Reference in project nessie by projectnessie.

the class GCImpl method identifyExpiredContents.

/**
 * Identify the expired contents using a two-step traversal algorithm.
 *
 * <h2>Algorithm for identifying the live contents and return the bloom filter per content-id</h2>
 *
 * <p>Walk through each reference(both live and dead) distributively (one spark task for each
 * reference).
 *
 * <p>While traversing from the head commit in a reference(use DETACHED reference to fetch commits
 * from dead reference), for each live commit (commit that is not expired based on cutoff time)
 * add the contents of put operation to bloom filter.
 *
 * <p>Collect the live content keys for this reference just before cutoff time (at first expired
 * commit head). Which is used to identify the commit head for each live content key at the time
 * of cutoff time to support the time travel.
 *
 * <p>While traversing the expired commits (commit that is expired based on cutoff time), if it is
 * a head commit content for its key, add it to bloom filter. Else move to next expired commit.
 *
 * <p>Stop traversing the expired commits if each live content key has processed one live commit
 * for it. This is an optimization to avoid traversing all the commits.
 *
 * <p>Collect bloom filter per content id from each task and merge them.
 *
 * <h2>Algorithm for identifying the expired contents and return the list of globally expired
 * contents per content id per reference </h2>
 *
 * <p>Walk through each reference(both live and dead) distributively (one spark task for each
 * reference).
 *
 * <p>For each commit in the reference (use DETACHED reference to fetch commits from dead
 * reference) check it against bloom filter to decide whether its contents in put operation are
 * globally expired or not. If globally expired, Add the contents to the expired output for this
 * content id for this reference.
 *
 * <p>Overall the contents after or equal to cutoff time and the contents that are mapped to
 * commit head of live keys at the time of cutoff timestamp will be retained.
 *
 * @param session spark session for distributed computation
 * @return {@link IdentifiedResult} object having expired contents per content id.
 */
public IdentifiedResult identifyExpiredContents(SparkSession session) {
    try (NessieApiV1 api = GCUtil.getApi(gcParams.getNessieClientConfigs())) {
        DistributedIdentifyContents distributedIdentifyContents = new DistributedIdentifyContents(session, gcParams);
        List<Reference> liveReferences = api.getAllReferences().get().getReferences();
        Map<String, Instant> droppedReferenceTimeMap = collectDeadReferences(api);
        // As this list of references is passed from Spark driver to executor,
        // using available Immutables JSON serialization instead of adding java serialization to the
        // classes.
        List<String> allRefs = liveReferences.stream().map(GCUtil::serializeReference).collect(Collectors.toList());
        if (droppedReferenceTimeMap.size() > 0) {
            allRefs.addAll(droppedReferenceTimeMap.keySet());
        }
        long bloomFilterSize = gcParams.getBloomFilterExpectedEntries() == null ? getTotalCommitsInDefaultReference(api) : gcParams.getBloomFilterExpectedEntries();
        // Identify the live contents and return the bloom filter per content-id
        Map<String, ContentBloomFilter> liveContentsBloomFilterMap = distributedIdentifyContents.getLiveContentsBloomFilters(allRefs, bloomFilterSize, droppedReferenceTimeMap);
        // Identify the expired contents
        return distributedIdentifyContents.getIdentifiedResults(liveContentsBloomFilterMap, allRefs);
    }
}
Also used : Reference(org.projectnessie.model.Reference) Instant(java.time.Instant) NessieApiV1(org.projectnessie.client.api.NessieApiV1)

Aggregations

Reference (org.projectnessie.model.Reference)38 Branch (org.projectnessie.model.Branch)19 Test (org.junit.jupiter.api.Test)17 ContentKey (org.projectnessie.model.ContentKey)11 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)10 NessieNotFoundException (org.projectnessie.error.NessieNotFoundException)10 CommitMeta (org.projectnessie.model.CommitMeta)10 IcebergTable (org.projectnessie.model.IcebergTable)10 List (java.util.List)9 Collectors (java.util.stream.Collectors)9 LogResponse (org.projectnessie.model.LogResponse)9 Tag (org.projectnessie.model.Tag)9 Put (org.projectnessie.model.Operation.Put)8 Map (java.util.Map)7 Stream (java.util.stream.Stream)7 Assertions.assertThat (org.assertj.core.api.Assertions.assertThat)7 BaseNessieClientServerException (org.projectnessie.error.BaseNessieClientServerException)7 Content (org.projectnessie.model.Content)7 NessieApiV1 (org.projectnessie.client.api.NessieApiV1)6 Operation (org.projectnessie.model.Operation)6