Search in sources :

Example 11 with DistributedWorkQueue

use of org.apache.accumulo.server.zookeeper.DistributedWorkQueue in project accumulo by apache.

the class UnorderedWorkAssignerIT method createWorkForFilesNeedingIt.

@Test
public void createWorkForFilesNeedingIt() throws Exception {
    ReplicationTarget target1 = new ReplicationTarget("cluster1", "table1", Table.ID.of("1")), target2 = new ReplicationTarget("cluster1", "table2", Table.ID.of("2"));
    Text serializedTarget1 = target1.toText(), serializedTarget2 = target2.toText();
    String keyTarget1 = target1.getPeerName() + DistributedWorkQueueWorkAssignerHelper.KEY_SEPARATOR + target1.getRemoteIdentifier() + DistributedWorkQueueWorkAssignerHelper.KEY_SEPARATOR + target1.getSourceTableId(), keyTarget2 = target2.getPeerName() + DistributedWorkQueueWorkAssignerHelper.KEY_SEPARATOR + target2.getRemoteIdentifier() + DistributedWorkQueueWorkAssignerHelper.KEY_SEPARATOR + target2.getSourceTableId();
    Status.Builder builder = Status.newBuilder().setBegin(0).setEnd(0).setInfiniteEnd(true).setClosed(false).setCreatedTime(5l);
    Status status1 = builder.build();
    builder.setCreatedTime(10l);
    Status status2 = builder.build();
    // Create two mutations, both of which need replication work done
    BatchWriter bw = ReplicationTable.getBatchWriter(conn);
    String filename1 = UUID.randomUUID().toString(), filename2 = UUID.randomUUID().toString();
    String file1 = "/accumulo/wal/tserver+port/" + filename1, file2 = "/accumulo/wal/tserver+port/" + filename2;
    Mutation m = new Mutation(file1);
    WorkSection.add(m, serializedTarget1, ProtobufUtil.toValue(status1));
    bw.addMutation(m);
    m = OrderSection.createMutation(file1, status1.getCreatedTime());
    OrderSection.add(m, target1.getSourceTableId(), ProtobufUtil.toValue(status1));
    bw.addMutation(m);
    m = new Mutation(file2);
    WorkSection.add(m, serializedTarget2, ProtobufUtil.toValue(status2));
    bw.addMutation(m);
    m = OrderSection.createMutation(file2, status2.getCreatedTime());
    OrderSection.add(m, target2.getSourceTableId(), ProtobufUtil.toValue(status2));
    bw.addMutation(m);
    bw.close();
    DistributedWorkQueue workQueue = createMock(DistributedWorkQueue.class);
    HashSet<String> queuedWork = new HashSet<>();
    assigner.setQueuedWork(queuedWork);
    assigner.setWorkQueue(workQueue);
    assigner.setMaxQueueSize(Integer.MAX_VALUE);
    // Make sure we expect the invocations in the order they were created
    String key = filename1 + "|" + keyTarget1;
    workQueue.addWork(key, file1);
    expectLastCall().once();
    key = filename2 + "|" + keyTarget2;
    workQueue.addWork(key, file2);
    expectLastCall().once();
    replay(workQueue);
    assigner.createWork();
    verify(workQueue);
}
Also used : Status(org.apache.accumulo.server.replication.proto.Replication.Status) ReplicationTarget(org.apache.accumulo.core.replication.ReplicationTarget) Text(org.apache.hadoop.io.Text) BatchWriter(org.apache.accumulo.core.client.BatchWriter) Mutation(org.apache.accumulo.core.data.Mutation) DistributedWorkQueue(org.apache.accumulo.server.zookeeper.DistributedWorkQueue) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 12 with DistributedWorkQueue

use of org.apache.accumulo.server.zookeeper.DistributedWorkQueue in project accumulo by apache.

the class SequentialWorkAssignerTest method basicZooKeeperCleanup.

@Test
public void basicZooKeeperCleanup() throws Exception {
    DistributedWorkQueue workQueue = createMock(DistributedWorkQueue.class);
    ZooCache zooCache = createMock(ZooCache.class);
    Instance inst = createMock(Instance.class);
    Map<String, Map<Table.ID, String>> queuedWork = new TreeMap<>();
    Map<Table.ID, String> cluster1Work = new TreeMap<>();
    // Two files for cluster1, one for table '1' and another for table '2' we havce assigned work for
    cluster1Work.put(Table.ID.of("1"), DistributedWorkQueueWorkAssignerHelper.getQueueKey("file1", new ReplicationTarget("cluster1", "1", Table.ID.of("1"))));
    cluster1Work.put(Table.ID.of("2"), DistributedWorkQueueWorkAssignerHelper.getQueueKey("file2", new ReplicationTarget("cluster1", "2", Table.ID.of("2"))));
    queuedWork.put("cluster1", cluster1Work);
    assigner.setConnector(conn);
    assigner.setZooCache(zooCache);
    assigner.setWorkQueue(workQueue);
    assigner.setQueuedWork(queuedWork);
    expect(conn.getInstance()).andReturn(inst);
    expect(inst.getInstanceID()).andReturn("instance");
    // file1 replicated
    expect(zooCache.get(ZooUtil.getRoot("instance") + ReplicationConstants.ZOO_WORK_QUEUE + "/" + DistributedWorkQueueWorkAssignerHelper.getQueueKey("file1", new ReplicationTarget("cluster1", "1", Table.ID.of("1"))))).andReturn(null);
    // file2 still needs to replicate
    expect(zooCache.get(ZooUtil.getRoot("instance") + ReplicationConstants.ZOO_WORK_QUEUE + "/" + DistributedWorkQueueWorkAssignerHelper.getQueueKey("file2", new ReplicationTarget("cluster1", "2", Table.ID.of("2"))))).andReturn(new byte[0]);
    replay(workQueue, zooCache, conn, inst);
    assigner.cleanupFinishedWork();
    verify(workQueue, zooCache, conn, inst);
    Assert.assertEquals(1, cluster1Work.size());
    Assert.assertEquals(DistributedWorkQueueWorkAssignerHelper.getQueueKey("file2", new ReplicationTarget("cluster1", "2", Table.ID.of("2"))), cluster1Work.get(Table.ID.of("2")));
}
Also used : Table(org.apache.accumulo.core.client.impl.Table) ReplicationTarget(org.apache.accumulo.core.replication.ReplicationTarget) Instance(org.apache.accumulo.core.client.Instance) TreeMap(java.util.TreeMap) ZooCache(org.apache.accumulo.server.zookeeper.ZooCache) TreeMap(java.util.TreeMap) Map(java.util.Map) DistributedWorkQueue(org.apache.accumulo.server.zookeeper.DistributedWorkQueue) Test(org.junit.Test)

Example 13 with DistributedWorkQueue

use of org.apache.accumulo.server.zookeeper.DistributedWorkQueue in project accumulo by apache.

the class UnorderedWorkAssignerTest method workQueuedUsingFileName.

@Test
public void workQueuedUsingFileName() throws Exception {
    ReplicationTarget target = new ReplicationTarget("cluster1", "table1", Table.ID.of("1"));
    DistributedWorkQueue workQueue = createMock(DistributedWorkQueue.class);
    Set<String> queuedWork = new HashSet<>();
    assigner.setQueuedWork(queuedWork);
    assigner.setWorkQueue(workQueue);
    Path p = new Path("/accumulo/wal/tserver+port/" + UUID.randomUUID());
    String expectedQueueKey = p.getName() + DistributedWorkQueueWorkAssignerHelper.KEY_SEPARATOR + target.getPeerName() + DistributedWorkQueueWorkAssignerHelper.KEY_SEPARATOR + target.getRemoteIdentifier() + DistributedWorkQueueWorkAssignerHelper.KEY_SEPARATOR + target.getSourceTableId();
    workQueue.addWork(expectedQueueKey, p.toString());
    expectLastCall().once();
    replay(workQueue);
    assigner.queueWork(p, target);
    Assert.assertEquals(1, queuedWork.size());
    Assert.assertEquals(expectedQueueKey, queuedWork.iterator().next());
}
Also used : Path(org.apache.hadoop.fs.Path) ReplicationTarget(org.apache.accumulo.core.replication.ReplicationTarget) DistributedWorkQueue(org.apache.accumulo.server.zookeeper.DistributedWorkQueue) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet) Test(org.junit.Test)

Example 14 with DistributedWorkQueue

use of org.apache.accumulo.server.zookeeper.DistributedWorkQueue in project accumulo by apache.

the class CopyFailed method call.

@Override
public Repo<Master> call(long tid, Master master) throws Exception {
    // This needs to execute after the arbiter is stopped
    master.updateBulkImportStatus(source, BulkImportState.COPY_FILES);
    VolumeManager fs = master.getFileSystem();
    if (!fs.exists(new Path(error, BulkImport.FAILURES_TXT)))
        return new CleanUpBulkImport(tableId, source, bulk, error);
    HashMap<FileRef, String> failures = new HashMap<>();
    HashMap<FileRef, String> loadedFailures = new HashMap<>();
    try (BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(error, BulkImport.FAILURES_TXT)), UTF_8))) {
        String line = null;
        while ((line = in.readLine()) != null) {
            Path path = new Path(line);
            if (!fs.exists(new Path(error, path.getName())))
                failures.put(new FileRef(line, path), line);
        }
    }
    /*
     * I thought I could move files that have no file references in the table. However its possible a clone references a file. Therefore only move files that
     * have no loaded markers.
     */
    // determine which failed files were loaded
    Connector conn = master.getConnector();
    try (Scanner mscanner = new IsolatedScanner(conn.createScanner(MetadataTable.NAME, Authorizations.EMPTY))) {
        mscanner.setRange(new KeyExtent(tableId, null, null).toMetadataRange());
        mscanner.fetchColumnFamily(TabletsSection.BulkFileColumnFamily.NAME);
        for (Entry<Key, Value> entry : mscanner) {
            if (Long.parseLong(entry.getValue().toString()) == tid) {
                FileRef loadedFile = new FileRef(fs, entry.getKey());
                String absPath = failures.remove(loadedFile);
                if (absPath != null) {
                    loadedFailures.put(loadedFile, absPath);
                }
            }
        }
    }
    // move failed files that were not loaded
    for (String failure : failures.values()) {
        Path orig = new Path(failure);
        Path dest = new Path(error, orig.getName());
        fs.rename(orig, dest);
        log.debug("tid " + tid + " renamed " + orig + " to " + dest + ": import failed");
    }
    if (loadedFailures.size() > 0) {
        DistributedWorkQueue bifCopyQueue = new DistributedWorkQueue(Constants.ZROOT + "/" + master.getInstance().getInstanceID() + Constants.ZBULK_FAILED_COPYQ, master.getConfiguration());
        HashSet<String> workIds = new HashSet<>();
        for (String failure : loadedFailures.values()) {
            Path orig = new Path(failure);
            Path dest = new Path(error, orig.getName());
            if (fs.exists(dest))
                continue;
            bifCopyQueue.addWork(orig.getName(), (failure + "," + dest).getBytes(UTF_8));
            workIds.add(orig.getName());
            log.debug("tid " + tid + " added to copyq: " + orig + " to " + dest + ": failed");
        }
        bifCopyQueue.waitUntilDone(workIds);
    }
    fs.deleteRecursively(new Path(error, BulkImport.FAILURES_TXT));
    return new CleanUpBulkImport(tableId, source, bulk, error);
}
Also used : Path(org.apache.hadoop.fs.Path) VolumeManager(org.apache.accumulo.server.fs.VolumeManager) Connector(org.apache.accumulo.core.client.Connector) IsolatedScanner(org.apache.accumulo.core.client.IsolatedScanner) Scanner(org.apache.accumulo.core.client.Scanner) InputStreamReader(java.io.InputStreamReader) HashMap(java.util.HashMap) KeyExtent(org.apache.accumulo.core.data.impl.KeyExtent) DistributedWorkQueue(org.apache.accumulo.server.zookeeper.DistributedWorkQueue) FileRef(org.apache.accumulo.server.fs.FileRef) BufferedReader(java.io.BufferedReader) Value(org.apache.accumulo.core.data.Value) IsolatedScanner(org.apache.accumulo.core.client.IsolatedScanner) Key(org.apache.accumulo.core.data.Key) HashSet(java.util.HashSet)

Example 15 with DistributedWorkQueue

use of org.apache.accumulo.server.zookeeper.DistributedWorkQueue in project accumulo by apache.

the class TabletServer method run.

// main loop listens for client requests
@Override
public void run() {
    SecurityUtil.serverLogin(SiteConfiguration.getInstance());
    // We can just make the zookeeper paths before we try to use.
    try {
        ZooKeeperInitialization.ensureZooKeeperInitialized(ZooReaderWriter.getInstance(), ZooUtil.getRoot(getInstance()));
    } catch (KeeperException | InterruptedException e) {
        log.error("Could not ensure that ZooKeeper is properly initialized", e);
        throw new RuntimeException(e);
    }
    Metrics tserverMetrics = metricsFactory.createTabletServerMetrics(this);
    // Register MBeans
    try {
        tserverMetrics.register();
        mincMetrics.register();
        scanMetrics.register();
        updateMetrics.register();
    } catch (Exception e) {
        log.error("Error registering with JMX", e);
    }
    if (null != authKeyWatcher) {
        log.info("Seeding ZooKeeper watcher for authentication keys");
        try {
            authKeyWatcher.updateAuthKeys();
        } catch (KeeperException | InterruptedException e) {
            // TODO Does there need to be a better check? What are the error conditions that we'd fall out here? AUTH_FAILURE?
            // If we get the error, do we just put it on a timer and retry the exists(String, Watcher) call?
            log.error("Failed to perform initial check for authentication tokens in ZooKeeper. Delegation token authentication will be unavailable.", e);
        }
    }
    try {
        clientAddress = startTabletClientService();
    } catch (UnknownHostException e1) {
        throw new RuntimeException("Failed to start the tablet client service", e1);
    }
    announceExistence();
    try {
        walMarker.initWalMarker(getTabletSession());
    } catch (Exception e) {
        log.error("Unable to create WAL marker node in zookeeper", e);
        throw new RuntimeException(e);
    }
    ThreadPoolExecutor distWorkQThreadPool = new SimpleThreadPool(getConfiguration().getCount(Property.TSERV_WORKQ_THREADS), "distributed work queue");
    bulkFailedCopyQ = new DistributedWorkQueue(ZooUtil.getRoot(getInstance()) + Constants.ZBULK_FAILED_COPYQ, getConfiguration());
    try {
        bulkFailedCopyQ.startProcessing(new BulkFailedCopyProcessor(), distWorkQThreadPool);
    } catch (Exception e1) {
        throw new RuntimeException("Failed to start distributed work queue for copying ", e1);
    }
    try {
        logSorter.startWatchingForRecoveryLogs(distWorkQThreadPool);
    } catch (Exception ex) {
        log.error("Error setting watches for recoveries");
        throw new RuntimeException(ex);
    }
    // Start the thrift service listening for incoming replication requests
    try {
        replicationAddress = startReplicationService();
    } catch (UnknownHostException e) {
        throw new RuntimeException("Failed to start replication service", e);
    }
    // Start the pool to handle outgoing replications
    final ThreadPoolExecutor replicationThreadPool = new SimpleThreadPool(getConfiguration().getCount(Property.REPLICATION_WORKER_THREADS), "replication task");
    replWorker.setExecutor(replicationThreadPool);
    replWorker.run();
    // Check the configuration value for the size of the pool and, if changed, resize the pool, every 5 seconds);
    final AccumuloConfiguration aconf = getConfiguration();
    Runnable replicationWorkThreadPoolResizer = new Runnable() {

        @Override
        public void run() {
            int maxPoolSize = aconf.getCount(Property.REPLICATION_WORKER_THREADS);
            if (replicationThreadPool.getMaximumPoolSize() != maxPoolSize) {
                log.info("Resizing thread pool for sending replication work from {} to {}", replicationThreadPool.getMaximumPoolSize(), maxPoolSize);
                replicationThreadPool.setMaximumPoolSize(maxPoolSize);
            }
        }
    };
    SimpleTimer.getInstance(aconf).schedule(replicationWorkThreadPoolResizer, 10000, 30000);
    final long CLEANUP_BULK_LOADED_CACHE_MILLIS = 15 * 60 * 1000;
    SimpleTimer.getInstance(aconf).schedule(new BulkImportCacheCleaner(this), CLEANUP_BULK_LOADED_CACHE_MILLIS, CLEANUP_BULK_LOADED_CACHE_MILLIS);
    HostAndPort masterHost;
    while (!serverStopRequested) {
        // send all of the pending messages
        try {
            MasterMessage mm = null;
            MasterClientService.Client iface = null;
            try {
                // was requested
                while (mm == null && !serverStopRequested) {
                    mm = masterMessages.poll(1000, TimeUnit.MILLISECONDS);
                }
                // have a message to send to the master, so grab a
                // connection
                masterHost = getMasterAddress();
                iface = masterConnection(masterHost);
                TServiceClient client = iface;
                // then finally block should place mm back on queue
                while (!serverStopRequested && mm != null && client != null && client.getOutputProtocol() != null && client.getOutputProtocol().getTransport() != null && client.getOutputProtocol().getTransport().isOpen()) {
                    try {
                        mm.send(rpcCreds(), getClientAddressString(), iface);
                        mm = null;
                    } catch (TException ex) {
                        log.warn("Error sending message: queuing message again");
                        masterMessages.putFirst(mm);
                        mm = null;
                        throw ex;
                    }
                    // if any messages are immediately available grab em and
                    // send them
                    mm = masterMessages.poll();
                }
            } finally {
                if (mm != null) {
                    masterMessages.putFirst(mm);
                }
                returnMasterConnection(iface);
                sleepUninterruptibly(1, TimeUnit.SECONDS);
            }
        } catch (InterruptedException e) {
            log.info("Interrupt Exception received, shutting down");
            serverStopRequested = true;
        } catch (Exception e) {
            // may have lost connection with master
            // loop back to the beginning and wait for a new one
            // this way we survive master failures
            log.error(getClientAddressString() + ": TServerInfo: Exception. Master down?", e);
        }
    }
    // get prematurely finalized
    synchronized (this) {
        while (!shutdownComplete) {
            try {
                this.wait(1000);
            } catch (InterruptedException e) {
                log.error(e.toString());
            }
        }
    }
    log.debug("Stopping Replication Server");
    TServerUtils.stopTServer(this.replServer);
    log.debug("Stopping Thrift Servers");
    TServerUtils.stopTServer(server);
    try {
        log.debug("Closing filesystem");
        fs.close();
    } catch (IOException e) {
        log.warn("Failed to close filesystem : {}", e.getMessage(), e);
    }
    gcLogger.logGCInfo(getConfiguration());
    log.info("TServerInfo: stop requested. exiting ... ");
    try {
        tabletServerLock.unlock();
    } catch (Exception e) {
        log.warn("Failed to release tablet server lock", e);
    }
}
Also used : MasterMessage(org.apache.accumulo.tserver.mastermessage.MasterMessage) TException(org.apache.thrift.TException) UnknownHostException(java.net.UnknownHostException) IOException(java.io.IOException) IterationInterruptedException(org.apache.accumulo.core.iterators.IterationInterruptedException) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) ThriftSecurityException(org.apache.accumulo.core.client.impl.thrift.ThriftSecurityException) IterationInterruptedException(org.apache.accumulo.core.iterators.IterationInterruptedException) TSampleNotPresentException(org.apache.accumulo.core.tabletserver.thrift.TSampleNotPresentException) WalMarkerException(org.apache.accumulo.server.log.WalStateManager.WalMarkerException) ConstraintViolationException(org.apache.accumulo.core.tabletserver.thrift.ConstraintViolationException) IOException(java.io.IOException) UnknownHostException(java.net.UnknownHostException) ExecutionException(java.util.concurrent.ExecutionException) NotServingTabletException(org.apache.accumulo.core.tabletserver.thrift.NotServingTabletException) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) KeeperException(org.apache.zookeeper.KeeperException) NoSuchScanIDException(org.apache.accumulo.core.tabletserver.thrift.NoSuchScanIDException) CancellationException(java.util.concurrent.CancellationException) DistributedStoreException(org.apache.accumulo.server.master.state.DistributedStoreException) TException(org.apache.thrift.TException) NoNodeException(org.apache.zookeeper.KeeperException.NoNodeException) ThriftTableOperationException(org.apache.accumulo.core.client.impl.thrift.ThriftTableOperationException) BadLocationStateException(org.apache.accumulo.server.master.state.TabletLocationState.BadLocationStateException) TimeoutException(java.util.concurrent.TimeoutException) TabletClosedException(org.apache.accumulo.tserver.tablet.TabletClosedException) SampleNotPresentException(org.apache.accumulo.core.client.SampleNotPresentException) AccumuloException(org.apache.accumulo.core.client.AccumuloException) DistributedWorkQueue(org.apache.accumulo.server.zookeeper.DistributedWorkQueue) TServiceClient(org.apache.thrift.TServiceClient) HostAndPort(org.apache.accumulo.core.util.HostAndPort) TabletServerScanMetrics(org.apache.accumulo.tserver.metrics.TabletServerScanMetrics) Metrics(org.apache.accumulo.server.metrics.Metrics) TabletServerUpdateMetrics(org.apache.accumulo.tserver.metrics.TabletServerUpdateMetrics) BulkImportCacheCleaner(org.apache.accumulo.tserver.tablet.BulkImportCacheCleaner) LoggingRunnable(org.apache.accumulo.fate.util.LoggingRunnable) MasterClientService(org.apache.accumulo.core.master.thrift.MasterClientService) ThreadPoolExecutor(java.util.concurrent.ThreadPoolExecutor) KeeperException(org.apache.zookeeper.KeeperException) SimpleThreadPool(org.apache.accumulo.core.util.SimpleThreadPool) AccumuloConfiguration(org.apache.accumulo.core.conf.AccumuloConfiguration)

Aggregations

DistributedWorkQueue (org.apache.accumulo.server.zookeeper.DistributedWorkQueue)15 Test (org.junit.Test)10 ReplicationTarget (org.apache.accumulo.core.replication.ReplicationTarget)9 BatchWriter (org.apache.accumulo.core.client.BatchWriter)7 Mutation (org.apache.accumulo.core.data.Mutation)7 Text (org.apache.hadoop.io.Text)6 HashMap (java.util.HashMap)5 HashSet (java.util.HashSet)5 Map (java.util.Map)5 Table (org.apache.accumulo.core.client.impl.Table)5 Status (org.apache.accumulo.server.replication.proto.Replication.Status)5 ReplicationTable (org.apache.accumulo.core.replication.ReplicationTable)4 Path (org.apache.hadoop.fs.Path)2 KeeperException (org.apache.zookeeper.KeeperException)2 BufferedReader (java.io.BufferedReader)1 IOException (java.io.IOException)1 InputStreamReader (java.io.InputStreamReader)1 UnknownHostException (java.net.UnknownHostException)1 LinkedHashSet (java.util.LinkedHashSet)1 TreeMap (java.util.TreeMap)1