Search in sources :

Example 31 with ReplicationTarget

use of org.apache.accumulo.core.replication.ReplicationTarget in project accumulo by apache.

the class ReplicationProcessorTest method filesWhichMakeNoProgressArentReplicatedAgain.

@Test
public void filesWhichMakeNoProgressArentReplicatedAgain() throws Exception {
    ReplicaSystem replica = EasyMock.createMock(ReplicaSystem.class);
    ReplicaSystemHelper helper = EasyMock.createMock(ReplicaSystemHelper.class);
    ReplicationProcessor proc = EasyMock.createMockBuilder(ReplicationProcessor.class).addMockedMethods("getReplicaSystem", "doesFileExist", "getStatus", "getHelper").createMock();
    ReplicationTarget target = new ReplicationTarget("peer", "1", Table.ID.of("1"));
    Status status = Status.newBuilder().setBegin(0).setEnd(0).setInfiniteEnd(true).setClosed(true).build();
    Path path = new Path("/accumulo");
    String queueKey = DistributedWorkQueueWorkAssignerHelper.getQueueKey(path.toString(), target);
    EasyMock.expect(proc.getReplicaSystem(target)).andReturn(replica);
    EasyMock.expect(proc.getStatus(path.toString(), target)).andReturn(status);
    EasyMock.expect(proc.doesFileExist(path, target)).andReturn(true);
    EasyMock.expect(proc.getHelper()).andReturn(helper);
    EasyMock.expect(replica.replicate(path, status, target, helper)).andReturn(status);
    EasyMock.replay(replica, proc);
    proc.process(queueKey, path.toString().getBytes(UTF_8));
    EasyMock.verify(replica, proc);
}
Also used : Status(org.apache.accumulo.server.replication.proto.Replication.Status) Path(org.apache.hadoop.fs.Path) ReplicationTarget(org.apache.accumulo.core.replication.ReplicationTarget) ReplicaSystemHelper(org.apache.accumulo.server.replication.ReplicaSystemHelper) ReplicaSystem(org.apache.accumulo.server.replication.ReplicaSystem) Test(org.junit.Test)

Example 32 with ReplicationTarget

use of org.apache.accumulo.core.replication.ReplicationTarget in project accumulo by apache.

the class ReplicationIT method replicatedStatusEntriesAreDeleted.

@Test
public void replicatedStatusEntriesAreDeleted() throws Exception {
    // Just stop it now, we'll restart it after we restart the tserver
    getCluster().getClusterControl().stop(ServerType.GARBAGE_COLLECTOR);
    final Connector conn = getConnector();
    log.info("Got connector to MAC");
    String table1 = "table1";
    // replication shouldn't be online when we begin
    Assert.assertFalse(ReplicationTable.isOnline(conn));
    // Create two tables
    conn.tableOperations().create(table1);
    int attempts = 5;
    while (attempts > 0) {
        try {
            // Enable replication on table1
            conn.tableOperations().setProperty(table1, Property.TABLE_REPLICATION.getKey(), "true");
            // Replicate table1 to cluster1 in the table with id of '4'
            conn.tableOperations().setProperty(table1, Property.TABLE_REPLICATION_TARGET.getKey() + "cluster1", "4");
            // Use the MockReplicaSystem impl and sleep for 5seconds
            conn.instanceOperations().setProperty(Property.REPLICATION_PEERS.getKey() + "cluster1", ReplicaSystemFactory.getPeerConfigurationValue(MockReplicaSystem.class, "1000"));
            attempts = 0;
        } catch (Exception e) {
            attempts--;
            if (attempts <= 0) {
                throw e;
            }
            sleepUninterruptibly(500, TimeUnit.MILLISECONDS);
        }
    }
    Table.ID tableId = Table.ID.of(conn.tableOperations().tableIdMap().get(table1));
    Assert.assertNotNull("Could not determine table id for " + table1, tableId);
    // Write some data to table1
    writeSomeData(conn, table1, 2000, 50);
    conn.tableOperations().flush(table1, null, null, true);
    // Make sure the replication table exists at this point
    while (!ReplicationTable.isOnline(conn)) {
        sleepUninterruptibly(MILLIS_BETWEEN_REPLICATION_TABLE_ONLINE_CHECKS, TimeUnit.MILLISECONDS);
    }
    Assert.assertTrue("Replication table did not exist", ReplicationTable.isOnline(conn));
    // Grant ourselves the write permission for later
    conn.securityOperations().grantTablePermission("root", ReplicationTable.NAME, TablePermission.WRITE);
    log.info("Checking for replication entries in replication");
    // Then we need to get those records over to the replication table
    Set<String> entries = new HashSet<>();
    for (int i = 0; i < 5; i++) {
        try (Scanner s = conn.createScanner(MetadataTable.NAME, Authorizations.EMPTY)) {
            s.setRange(ReplicationSection.getRange());
            entries.clear();
            for (Entry<Key, Value> entry : s) {
                entries.add(entry.getKey().getRow().toString());
                log.info("{}={}", entry.getKey().toStringNoTruncate(), entry.getValue());
            }
            if (!entries.isEmpty()) {
                log.info("Replication entries {}", entries);
                break;
            }
            Thread.sleep(1000);
        }
    }
    Assert.assertFalse("Did not find any replication entries in the replication table", entries.isEmpty());
    // Find the WorkSection record that will be created for that data we ingested
    boolean notFound = true;
    for (int i = 0; i < 10 && notFound; i++) {
        try (Scanner s = ReplicationTable.getScanner(conn)) {
            WorkSection.limit(s);
            Entry<Key, Value> e = Iterables.getOnlyElement(s);
            log.info("Found entry: {}", e.getKey().toStringNoTruncate());
            Text expectedColqual = new ReplicationTarget("cluster1", "4", tableId).toText();
            Assert.assertEquals(expectedColqual, e.getKey().getColumnQualifier());
            notFound = false;
        } catch (NoSuchElementException e) {
        } catch (IllegalArgumentException e) {
            // Somehow we got more than one element. Log what they were
            try (Scanner s = ReplicationTable.getScanner(conn)) {
                for (Entry<Key, Value> content : s) {
                    log.info("{} => {}", content.getKey().toStringNoTruncate(), content.getValue());
                }
                Assert.fail("Found more than one work section entry");
            }
        } catch (RuntimeException e) {
            // Catch a propagation issue, fail if it's not what we expect
            Throwable cause = e.getCause();
            if (cause instanceof AccumuloSecurityException) {
                AccumuloSecurityException sec = (AccumuloSecurityException) cause;
                switch(sec.getSecurityErrorCode()) {
                    case PERMISSION_DENIED:
                        // retry -- the grant didn't happen yet
                        log.warn("Sleeping because permission was denied");
                        break;
                    default:
                        throw e;
                }
            } else {
                throw e;
            }
        }
        Thread.sleep(2000);
    }
    if (notFound) {
        try (Scanner s = ReplicationTable.getScanner(conn)) {
            for (Entry<Key, Value> content : s) {
                log.info("{} => {}", content.getKey().toStringNoTruncate(), ProtobufUtil.toString(Status.parseFrom(content.getValue().get())));
            }
            Assert.assertFalse("Did not find the work entry for the status entry", notFound);
        }
    }
    /**
     * By this point, we should have data ingested into a table, with at least one WAL as a candidate for replication. Compacting the table should close all
     * open WALs, which should ensure all records we're going to replicate have entries in the replication table, and nothing will exist in the metadata table
     * anymore
     */
    log.info("Killing tserver");
    // Kill the tserver(s) and restart them
    // to ensure that the WALs we previously observed all move to closed.
    cluster.getClusterControl().stop(ServerType.TABLET_SERVER);
    log.info("Starting tserver");
    cluster.getClusterControl().start(ServerType.TABLET_SERVER);
    log.info("Waiting to read tables");
    sleepUninterruptibly(2 * 3, TimeUnit.SECONDS);
    // Make sure we can read all the tables (recovery complete)
    for (String table : new String[] { MetadataTable.NAME, table1 }) {
        Iterators.size(conn.createScanner(table, Authorizations.EMPTY).iterator());
    }
    log.info("Recovered metadata:");
    try (Scanner s = conn.createScanner(MetadataTable.NAME, Authorizations.EMPTY)) {
        for (Entry<Key, Value> entry : s) {
            log.info("{}={}", entry.getKey().toStringNoTruncate(), entry.getValue());
        }
    }
    cluster.getClusterControl().start(ServerType.GARBAGE_COLLECTOR);
    // Wait for a bit since the GC has to run (should be running after a one second delay)
    waitForGCLock(conn);
    Thread.sleep(1000);
    log.info("After GC");
    try (Scanner s = conn.createScanner(MetadataTable.NAME, Authorizations.EMPTY)) {
        for (Entry<Key, Value> entry : s) {
            log.info("{}={}", entry.getKey().toStringNoTruncate(), entry.getValue());
        }
    }
    // We expect no records in the metadata table after compaction. We have to poll
    // because we have to wait for the StatusMaker's next iteration which will clean
    // up the dangling *closed* records after we create the record in the replication table.
    // We need the GC to close the file (CloseWriteAheadLogReferences) before we can remove the record
    log.info("Checking metadata table for replication entries");
    Set<String> remaining = new HashSet<>();
    for (int i = 0; i < 10; i++) {
        try (Scanner s = conn.createScanner(MetadataTable.NAME, Authorizations.EMPTY)) {
            s.setRange(ReplicationSection.getRange());
            remaining.clear();
            for (Entry<Key, Value> e : s) {
                remaining.add(e.getKey().getRow().toString());
            }
            remaining.retainAll(entries);
            if (remaining.isEmpty()) {
                break;
            }
            log.info("remaining {}", remaining);
            Thread.sleep(2000);
            log.info("");
        }
    }
    Assert.assertTrue("Replication status messages were not cleaned up from metadata table", remaining.isEmpty());
    /**
     * After we close out and subsequently delete the metadata record, this will propagate to the replication table, which will cause those records to be
     * deleted after replication occurs
     */
    int recordsFound = 0;
    for (int i = 0; i < 30; i++) {
        try (Scanner s = ReplicationTable.getScanner(conn)) {
            recordsFound = 0;
            for (Entry<Key, Value> entry : s) {
                recordsFound++;
                log.info("{} {}", entry.getKey().toStringNoTruncate(), ProtobufUtil.toString(Status.parseFrom(entry.getValue().get())));
            }
            if (recordsFound <= 2) {
                break;
            } else {
                Thread.sleep(1000);
                log.info("");
            }
        }
    }
    Assert.assertTrue("Found unexpected replication records in the replication table", recordsFound <= 2);
}
Also used : Connector(org.apache.accumulo.core.client.Connector) Scanner(org.apache.accumulo.core.client.Scanner) MetadataTable(org.apache.accumulo.core.metadata.MetadataTable) Table(org.apache.accumulo.core.client.impl.Table) ReplicationTable(org.apache.accumulo.core.replication.ReplicationTable) Text(org.apache.hadoop.io.Text) TableOfflineException(org.apache.accumulo.core.client.TableOfflineException) URISyntaxException(java.net.URISyntaxException) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) ReplicationTableOfflineException(org.apache.accumulo.core.replication.ReplicationTableOfflineException) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) NoSuchElementException(java.util.NoSuchElementException) AccumuloException(org.apache.accumulo.core.client.AccumuloException) Entry(java.util.Map.Entry) LogEntry(org.apache.accumulo.core.tabletserver.log.LogEntry) ReplicationTarget(org.apache.accumulo.core.replication.ReplicationTarget) Value(org.apache.accumulo.core.data.Value) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) Key(org.apache.accumulo.core.data.Key) NoSuchElementException(java.util.NoSuchElementException) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 33 with ReplicationTarget

use of org.apache.accumulo.core.replication.ReplicationTarget in project accumulo by apache.

the class SequentialWorkAssignerIT method reprocessingOfCompletedWorkRemovesWork.

@Test
public void reprocessingOfCompletedWorkRemovesWork() throws Exception {
    ReplicationTarget target = new ReplicationTarget("cluster1", "table1", Table.ID.of("1"));
    Text serializedTarget = target.toText();
    // Create two mutations, both of which need replication work done
    BatchWriter bw = ReplicationTable.getBatchWriter(conn);
    // We want the name of file2 to sort before file1
    String filename1 = "z_file1", filename2 = "a_file1";
    String file1 = "/accumulo/wal/tserver+port/" + filename1, file2 = "/accumulo/wal/tserver+port/" + filename2;
    // File1 was closed before file2, however
    Status stat1 = Status.newBuilder().setBegin(100).setEnd(100).setClosed(true).setInfiniteEnd(false).setCreatedTime(250).build();
    Status stat2 = Status.newBuilder().setBegin(0).setEnd(100).setClosed(true).setInfiniteEnd(false).setCreatedTime(500).build();
    Mutation m = new Mutation(file1);
    WorkSection.add(m, serializedTarget, ProtobufUtil.toValue(stat1));
    bw.addMutation(m);
    m = new Mutation(file2);
    WorkSection.add(m, serializedTarget, ProtobufUtil.toValue(stat2));
    bw.addMutation(m);
    m = OrderSection.createMutation(file1, stat1.getCreatedTime());
    OrderSection.add(m, target.getSourceTableId(), ProtobufUtil.toValue(stat1));
    bw.addMutation(m);
    m = OrderSection.createMutation(file2, stat2.getCreatedTime());
    OrderSection.add(m, target.getSourceTableId(), ProtobufUtil.toValue(stat2));
    bw.addMutation(m);
    bw.close();
    DistributedWorkQueue workQueue = createMock(DistributedWorkQueue.class);
    // Treat filename1 as we have already submitted it for replication
    Map<String, Map<Table.ID, String>> queuedWork = new HashMap<>();
    Map<Table.ID, String> queuedWorkForCluster = new HashMap<>();
    queuedWorkForCluster.put(target.getSourceTableId(), DistributedWorkQueueWorkAssignerHelper.getQueueKey(filename1, target));
    queuedWork.put("cluster1", queuedWorkForCluster);
    assigner.setQueuedWork(queuedWork);
    assigner.setWorkQueue(workQueue);
    assigner.setMaxQueueSize(Integer.MAX_VALUE);
    // Make sure we expect the invocations in the correct order (accumulo is sorted)
    workQueue.addWork(DistributedWorkQueueWorkAssignerHelper.getQueueKey(filename2, target), file2);
    expectLastCall().once();
    // file2 is queued because we remove file1 because it's fully replicated
    replay(workQueue);
    assigner.createWork();
    verify(workQueue);
    Assert.assertEquals(1, queuedWork.size());
    Assert.assertTrue(queuedWork.containsKey("cluster1"));
    Map<Table.ID, String> cluster1Work = queuedWork.get("cluster1");
    Assert.assertEquals(1, cluster1Work.size());
    Assert.assertTrue(cluster1Work.containsKey(target.getSourceTableId()));
    Assert.assertEquals(DistributedWorkQueueWorkAssignerHelper.getQueueKey(filename2, target), cluster1Work.get(target.getSourceTableId()));
}
Also used : Status(org.apache.accumulo.server.replication.proto.Replication.Status) Table(org.apache.accumulo.core.client.impl.Table) ReplicationTable(org.apache.accumulo.core.replication.ReplicationTable) HashMap(java.util.HashMap) Text(org.apache.hadoop.io.Text) DistributedWorkQueue(org.apache.accumulo.server.zookeeper.DistributedWorkQueue) ReplicationTarget(org.apache.accumulo.core.replication.ReplicationTarget) BatchWriter(org.apache.accumulo.core.client.BatchWriter) Mutation(org.apache.accumulo.core.data.Mutation) HashMap(java.util.HashMap) Map(java.util.Map) Test(org.junit.Test)

Example 34 with ReplicationTarget

use of org.apache.accumulo.core.replication.ReplicationTarget in project accumulo by apache.

the class SequentialWorkAssignerIT method workAcrossPeersHappensConcurrently.

@Test
public void workAcrossPeersHappensConcurrently() throws Exception {
    ReplicationTarget target1 = new ReplicationTarget("cluster1", "table1", Table.ID.of("1"));
    Text serializedTarget1 = target1.toText();
    ReplicationTarget target2 = new ReplicationTarget("cluster2", "table1", Table.ID.of("1"));
    Text serializedTarget2 = target2.toText();
    // Create two mutations, both of which need replication work done
    BatchWriter bw = ReplicationTable.getBatchWriter(conn);
    // We want the name of file2 to sort before file1
    String filename1 = "z_file1", filename2 = "a_file1";
    String file1 = "/accumulo/wal/tserver+port/" + filename1, file2 = "/accumulo/wal/tserver+port/" + filename2;
    // File1 was closed before file2, however
    Status stat1 = Status.newBuilder().setBegin(0).setEnd(100).setClosed(true).setInfiniteEnd(false).setCreatedTime(250).build();
    Status stat2 = Status.newBuilder().setBegin(0).setEnd(100).setClosed(true).setInfiniteEnd(false).setCreatedTime(500).build();
    Mutation m = new Mutation(file1);
    WorkSection.add(m, serializedTarget1, ProtobufUtil.toValue(stat1));
    bw.addMutation(m);
    m = new Mutation(file2);
    WorkSection.add(m, serializedTarget2, ProtobufUtil.toValue(stat2));
    bw.addMutation(m);
    m = OrderSection.createMutation(file1, stat1.getCreatedTime());
    OrderSection.add(m, target1.getSourceTableId(), ProtobufUtil.toValue(stat1));
    bw.addMutation(m);
    m = OrderSection.createMutation(file2, stat2.getCreatedTime());
    OrderSection.add(m, target2.getSourceTableId(), ProtobufUtil.toValue(stat2));
    bw.addMutation(m);
    bw.close();
    DistributedWorkQueue workQueue = createMock(DistributedWorkQueue.class);
    Map<String, Map<Table.ID, String>> queuedWork = new HashMap<>();
    assigner.setQueuedWork(queuedWork);
    assigner.setWorkQueue(workQueue);
    assigner.setMaxQueueSize(Integer.MAX_VALUE);
    // Make sure we expect the invocations in the correct order (accumulo is sorted)
    workQueue.addWork(DistributedWorkQueueWorkAssignerHelper.getQueueKey(filename1, target1), file1);
    expectLastCall().once();
    workQueue.addWork(DistributedWorkQueueWorkAssignerHelper.getQueueKey(filename2, target2), file2);
    expectLastCall().once();
    // file2 is *not* queued because file1 must be replicated first
    replay(workQueue);
    assigner.createWork();
    verify(workQueue);
    Assert.assertEquals(2, queuedWork.size());
    Assert.assertTrue(queuedWork.containsKey("cluster1"));
    Map<Table.ID, String> cluster1Work = queuedWork.get("cluster1");
    Assert.assertEquals(1, cluster1Work.size());
    Assert.assertTrue(cluster1Work.containsKey(target1.getSourceTableId()));
    Assert.assertEquals(DistributedWorkQueueWorkAssignerHelper.getQueueKey(filename1, target1), cluster1Work.get(target1.getSourceTableId()));
    Map<Table.ID, String> cluster2Work = queuedWork.get("cluster2");
    Assert.assertEquals(1, cluster2Work.size());
    Assert.assertTrue(cluster2Work.containsKey(target2.getSourceTableId()));
    Assert.assertEquals(DistributedWorkQueueWorkAssignerHelper.getQueueKey(filename2, target2), cluster2Work.get(target2.getSourceTableId()));
}
Also used : Status(org.apache.accumulo.server.replication.proto.Replication.Status) Table(org.apache.accumulo.core.client.impl.Table) ReplicationTable(org.apache.accumulo.core.replication.ReplicationTable) HashMap(java.util.HashMap) Text(org.apache.hadoop.io.Text) DistributedWorkQueue(org.apache.accumulo.server.zookeeper.DistributedWorkQueue) ReplicationTarget(org.apache.accumulo.core.replication.ReplicationTarget) BatchWriter(org.apache.accumulo.core.client.BatchWriter) Mutation(org.apache.accumulo.core.data.Mutation) HashMap(java.util.HashMap) Map(java.util.Map) Test(org.junit.Test)

Example 35 with ReplicationTarget

use of org.apache.accumulo.core.replication.ReplicationTarget in project accumulo by apache.

the class UnorderedWorkAssignerIT method doNotCreateWorkForFilesNotNeedingIt.

@Test
public void doNotCreateWorkForFilesNotNeedingIt() throws Exception {
    ReplicationTarget target1 = new ReplicationTarget("cluster1", "table1", Table.ID.of("1")), target2 = new ReplicationTarget("cluster1", "table2", Table.ID.of("2"));
    Text serializedTarget1 = target1.toText(), serializedTarget2 = target2.toText();
    // Create two mutations, both of which need replication work done
    BatchWriter bw = ReplicationTable.getBatchWriter(conn);
    String filename1 = UUID.randomUUID().toString(), filename2 = UUID.randomUUID().toString();
    String file1 = "/accumulo/wal/tserver+port/" + filename1, file2 = "/accumulo/wal/tserver+port/" + filename2;
    Mutation m = new Mutation(file1);
    WorkSection.add(m, serializedTarget1, StatusUtil.fileCreatedValue(5));
    bw.addMutation(m);
    m = new Mutation(file2);
    WorkSection.add(m, serializedTarget2, StatusUtil.fileCreatedValue(10));
    bw.addMutation(m);
    bw.close();
    DistributedWorkQueue workQueue = createMock(DistributedWorkQueue.class);
    HashSet<String> queuedWork = new HashSet<>();
    assigner.setQueuedWork(queuedWork);
    assigner.setMaxQueueSize(Integer.MAX_VALUE);
    replay(workQueue);
    assigner.createWork();
    verify(workQueue);
}
Also used : ReplicationTarget(org.apache.accumulo.core.replication.ReplicationTarget) Text(org.apache.hadoop.io.Text) BatchWriter(org.apache.accumulo.core.client.BatchWriter) Mutation(org.apache.accumulo.core.data.Mutation) DistributedWorkQueue(org.apache.accumulo.server.zookeeper.DistributedWorkQueue) HashSet(java.util.HashSet) Test(org.junit.Test)

Aggregations

ReplicationTarget (org.apache.accumulo.core.replication.ReplicationTarget)42 Test (org.junit.Test)31 Status (org.apache.accumulo.server.replication.proto.Replication.Status)22 Text (org.apache.hadoop.io.Text)19 Mutation (org.apache.accumulo.core.data.Mutation)18 HashMap (java.util.HashMap)16 BatchWriter (org.apache.accumulo.core.client.BatchWriter)15 Value (org.apache.accumulo.core.data.Value)15 Path (org.apache.hadoop.fs.Path)15 Table (org.apache.accumulo.core.client.impl.Table)13 Key (org.apache.accumulo.core.data.Key)13 HashSet (java.util.HashSet)12 ReplicationTable (org.apache.accumulo.core.replication.ReplicationTable)12 DistributedWorkQueue (org.apache.accumulo.server.zookeeper.DistributedWorkQueue)9 DataInputStream (java.io.DataInputStream)8 Scanner (org.apache.accumulo.core.client.Scanner)8 AccumuloConfiguration (org.apache.accumulo.core.conf.AccumuloConfiguration)8 ConfigurationCopy (org.apache.accumulo.core.conf.ConfigurationCopy)8 ByteArrayInputStream (java.io.ByteArrayInputStream)6 Map (java.util.Map)5