Search in sources :

Example 1 with GcCycleStats

use of org.apache.accumulo.core.gc.thrift.GcCycleStats in project accumulo by apache.

the class GarbageCollectWriteAheadLogsTest method deleteUnreferenceLogOnDeadServer.

@Test
public void deleteUnreferenceLogOnDeadServer() throws Exception {
    AccumuloServerContext context = EasyMock.createMock(AccumuloServerContext.class);
    VolumeManager fs = EasyMock.createMock(VolumeManager.class);
    WalStateManager marker = EasyMock.createMock(WalStateManager.class);
    LiveTServerSet tserverSet = EasyMock.createMock(LiveTServerSet.class);
    Connector conn = EasyMock.createMock(Connector.class);
    Scanner mscanner = EasyMock.createMock(Scanner.class);
    Scanner rscanner = EasyMock.createMock(Scanner.class);
    GCStatus status = new GCStatus(null, null, null, new GcCycleStats());
    EasyMock.expect(tserverSet.getCurrentServers()).andReturn(Collections.singleton(server1));
    EasyMock.expect(marker.getAllMarkers()).andReturn(markers2).once();
    EasyMock.expect(marker.state(server2, id)).andReturn(new Pair<>(WalState.OPEN, path));
    EasyMock.expect(context.getConnector()).andReturn(conn);
    EasyMock.expect(conn.createScanner(ReplicationTable.NAME, Authorizations.EMPTY)).andReturn(rscanner);
    rscanner.fetchColumnFamily(ReplicationSchema.StatusSection.NAME);
    EasyMock.expectLastCall().once();
    EasyMock.expect(rscanner.iterator()).andReturn(emptyKV);
    EasyMock.expect(conn.createScanner(MetadataTable.NAME, Authorizations.EMPTY)).andReturn(mscanner);
    mscanner.fetchColumnFamily(MetadataSchema.ReplicationSection.COLF);
    EasyMock.expectLastCall().once();
    mscanner.setRange(MetadataSchema.ReplicationSection.getRange());
    EasyMock.expectLastCall().once();
    EasyMock.expect(mscanner.iterator()).andReturn(emptyKV);
    EasyMock.expect(fs.deleteRecursively(path)).andReturn(true).once();
    marker.removeWalMarker(server2, id);
    EasyMock.expectLastCall().once();
    marker.forget(server2);
    EasyMock.expectLastCall().once();
    EasyMock.replay(context, fs, marker, tserverSet, conn, rscanner, mscanner);
    GarbageCollectWriteAheadLogs gc = new GarbageCollectWriteAheadLogs(context, fs, false, tserverSet, marker, tabletOnServer1List);
    gc.collect(status);
    EasyMock.verify(context, fs, marker, tserverSet, conn, rscanner, mscanner);
}
Also used : VolumeManager(org.apache.accumulo.server.fs.VolumeManager) Connector(org.apache.accumulo.core.client.Connector) Scanner(org.apache.accumulo.core.client.Scanner) AccumuloServerContext(org.apache.accumulo.server.AccumuloServerContext) WalStateManager(org.apache.accumulo.server.log.WalStateManager) GcCycleStats(org.apache.accumulo.core.gc.thrift.GcCycleStats) GCStatus(org.apache.accumulo.core.gc.thrift.GCStatus) LiveTServerSet(org.apache.accumulo.server.master.LiveTServerSet) Test(org.junit.Test)

Example 2 with GcCycleStats

use of org.apache.accumulo.core.gc.thrift.GcCycleStats in project accumulo by apache.

the class GarbageCollectWriteAheadLogsTest method testRemoveUnusedLog.

@Test
public void testRemoveUnusedLog() throws Exception {
    AccumuloServerContext context = EasyMock.createMock(AccumuloServerContext.class);
    VolumeManager fs = EasyMock.createMock(VolumeManager.class);
    WalStateManager marker = EasyMock.createMock(WalStateManager.class);
    LiveTServerSet tserverSet = EasyMock.createMock(LiveTServerSet.class);
    GCStatus status = new GCStatus(null, null, null, new GcCycleStats());
    EasyMock.expect(tserverSet.getCurrentServers()).andReturn(Collections.singleton(server1));
    EasyMock.expect(marker.getAllMarkers()).andReturn(markers).once();
    EasyMock.expect(marker.state(server1, id)).andReturn(new Pair<>(WalState.UNREFERENCED, path));
    EasyMock.expect(fs.deleteRecursively(path)).andReturn(true).once();
    marker.removeWalMarker(server1, id);
    EasyMock.expectLastCall().once();
    EasyMock.replay(context, fs, marker, tserverSet);
    GarbageCollectWriteAheadLogs gc = new GarbageCollectWriteAheadLogs(context, fs, false, tserverSet, marker, tabletOnServer1List) {

        @Override
        protected int removeReplicationEntries(Map<UUID, TServerInstance> candidates) throws IOException, KeeperException, InterruptedException {
            return 0;
        }
    };
    gc.collect(status);
    EasyMock.verify(context, fs, marker, tserverSet);
}
Also used : VolumeManager(org.apache.accumulo.server.fs.VolumeManager) AccumuloServerContext(org.apache.accumulo.server.AccumuloServerContext) WalStateManager(org.apache.accumulo.server.log.WalStateManager) GcCycleStats(org.apache.accumulo.core.gc.thrift.GcCycleStats) GCStatus(org.apache.accumulo.core.gc.thrift.GCStatus) Map(java.util.Map) LiveTServerSet(org.apache.accumulo.server.master.LiveTServerSet) Test(org.junit.Test)

Example 3 with GcCycleStats

use of org.apache.accumulo.core.gc.thrift.GcCycleStats in project accumulo by apache.

the class GarbageCollectWriteAheadLogs method collect.

public void collect(GCStatus status) {
    Span span = Trace.start("getCandidates");
    try {
        status.currentLog.started = System.currentTimeMillis();
        Map<TServerInstance, Set<UUID>> logsByServer = new HashMap<>();
        Map<UUID, Pair<WalState, Path>> logsState = new HashMap<>();
        // Scan for log file info first: the order is important
        // Consider:
        // * get live servers
        // * new server gets a lock, creates a log
        // * get logs
        // * the log appears to belong to a dead server
        long count = getCurrent(logsByServer, logsState);
        long fileScanStop = System.currentTimeMillis();
        log.info(String.format("Fetched %d files for %d servers in %.2f seconds", count, logsByServer.size(), (fileScanStop - status.currentLog.started) / 1000.));
        status.currentLog.candidates = count;
        span.stop();
        // now it's safe to get the liveServers
        Set<TServerInstance> currentServers = liveServers.getCurrentServers();
        Map<UUID, TServerInstance> uuidToTServer;
        span = Trace.start("removeEntriesInUse");
        try {
            uuidToTServer = removeEntriesInUse(logsByServer, currentServers, logsState);
            count = uuidToTServer.size();
        } catch (Exception ex) {
            log.error("Unable to scan metadata table", ex);
            return;
        } finally {
            span.stop();
        }
        long logEntryScanStop = System.currentTimeMillis();
        log.info(String.format("%d log entries scanned in %.2f seconds", count, (logEntryScanStop - fileScanStop) / 1000.));
        span = Trace.start("removeReplicationEntries");
        try {
            count = removeReplicationEntries(uuidToTServer);
        } catch (Exception ex) {
            log.error("Unable to scan replication table", ex);
            return;
        } finally {
            span.stop();
        }
        long replicationEntryScanStop = System.currentTimeMillis();
        log.info(String.format("%d replication entries scanned in %.2f seconds", count, (replicationEntryScanStop - logEntryScanStop) / 1000.));
        span = Trace.start("removeFiles");
        logsState.keySet().retainAll(uuidToTServer.keySet());
        count = removeFiles(logsState.values(), status);
        long removeStop = System.currentTimeMillis();
        log.info(String.format("%d total logs removed from %d servers in %.2f seconds", count, logsByServer.size(), (removeStop - logEntryScanStop) / 1000.));
        span.stop();
        span = Trace.start("removeMarkers");
        count = removeTabletServerMarkers(uuidToTServer, logsByServer, currentServers);
        long removeMarkersStop = System.currentTimeMillis();
        log.info(String.format("%d markers removed in %.2f seconds", count, (removeMarkersStop - removeStop) / 1000.));
        span.stop();
        status.currentLog.finished = removeStop;
        status.lastLog = status.currentLog;
        status.currentLog = new GcCycleStats();
    } catch (Exception e) {
        log.error("exception occured while garbage collecting write ahead logs", e);
    } finally {
        span.stop();
    }
}
Also used : LiveTServerSet(org.apache.accumulo.server.master.LiveTServerSet) Set(java.util.Set) HashSet(java.util.HashSet) HashMap(java.util.HashMap) GcCycleStats(org.apache.accumulo.core.gc.thrift.GcCycleStats) Span(org.apache.accumulo.core.trace.Span) TServerInstance(org.apache.accumulo.server.master.state.TServerInstance) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) FileNotFoundException(java.io.FileNotFoundException) ReplicationTableOfflineException(org.apache.accumulo.core.replication.ReplicationTableOfflineException) WalMarkerException(org.apache.accumulo.server.log.WalStateManager.WalMarkerException) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) KeeperException(org.apache.zookeeper.KeeperException) IOException(java.io.IOException) AccumuloException(org.apache.accumulo.core.client.AccumuloException) UUID(java.util.UUID) Pair(org.apache.accumulo.core.util.Pair)

Example 4 with GcCycleStats

use of org.apache.accumulo.core.gc.thrift.GcCycleStats in project accumulo by apache.

the class SimpleGarbageCollector method run.

private void run() {
    long tStart, tStop;
    // Sleep for an initial period, giving the master time to start up and
    // old data files to be unused
    log.info("Trying to acquire ZooKeeper lock for garbage collector");
    try {
        getZooLock(startStatsService());
    } catch (Exception ex) {
        log.error("{}", ex.getMessage(), ex);
        System.exit(1);
    }
    try {
        long delay = getStartDelay();
        log.debug("Sleeping for {} milliseconds before beginning garbage collection cycles", delay);
        Thread.sleep(delay);
    } catch (InterruptedException e) {
        log.warn("{}", e.getMessage(), e);
        return;
    }
    ProbabilitySampler sampler = new ProbabilitySampler(getConfiguration().getFraction(Property.GC_TRACE_PERCENT));
    while (true) {
        Trace.on("gc", sampler);
        Span gcSpan = Trace.start("loop");
        tStart = System.currentTimeMillis();
        try {
            // make room
            System.gc();
            status.current.started = System.currentTimeMillis();
            new GarbageCollectionAlgorithm().collect(new GCEnv(RootTable.NAME));
            new GarbageCollectionAlgorithm().collect(new GCEnv(MetadataTable.NAME));
            log.info("Number of data file candidates for deletion: {}", status.current.candidates);
            log.info("Number of data file candidates still in use: {}", status.current.inUse);
            log.info("Number of successfully deleted data files: {}", status.current.deleted);
            log.info("Number of data files delete failures: {}", status.current.errors);
            status.current.finished = System.currentTimeMillis();
            status.last = status.current;
            status.current = new GcCycleStats();
        } catch (Exception e) {
            log.error("{}", e.getMessage(), e);
        }
        tStop = System.currentTimeMillis();
        log.info(String.format("Collect cycle took %.2f seconds", ((tStop - tStart) / 1000.0)));
        // We want to prune references to fully-replicated WALs from the replication table which are no longer referenced in the metadata table
        // before running GarbageCollectWriteAheadLogs to ensure we delete as many files as possible.
        Span replSpan = Trace.start("replicationClose");
        try {
            CloseWriteAheadLogReferences closeWals = new CloseWriteAheadLogReferences(this);
            closeWals.run();
        } catch (Exception e) {
            log.error("Error trying to close write-ahead logs for replication table", e);
        } finally {
            replSpan.stop();
        }
        // Clean up any unused write-ahead logs
        Span waLogs = Trace.start("walogs");
        try {
            GarbageCollectWriteAheadLogs walogCollector = new GarbageCollectWriteAheadLogs(this, fs, isUsingTrash());
            log.info("Beginning garbage collection of write-ahead logs");
            walogCollector.collect(status);
        } catch (Exception e) {
            log.error("{}", e.getMessage(), e);
        } finally {
            waLogs.stop();
        }
        gcSpan.stop();
        // we just made a lot of metadata changes: flush them out
        try {
            Connector connector = getConnector();
            connector.tableOperations().compact(MetadataTable.NAME, null, null, true, true);
            connector.tableOperations().compact(RootTable.NAME, null, null, true, true);
        } catch (Exception e) {
            log.warn("{}", e.getMessage(), e);
        }
        Trace.off();
        try {
            long gcDelay = getConfiguration().getTimeInMillis(Property.GC_CYCLE_DELAY);
            log.debug("Sleeping for {} milliseconds", gcDelay);
            Thread.sleep(gcDelay);
        } catch (InterruptedException e) {
            log.warn("{}", e.getMessage(), e);
            return;
        }
    }
}
Also used : ProbabilitySampler(org.apache.accumulo.core.trace.ProbabilitySampler) CloseWriteAheadLogReferences(org.apache.accumulo.gc.replication.CloseWriteAheadLogReferences) Connector(org.apache.accumulo.core.client.Connector) GcCycleStats(org.apache.accumulo.core.gc.thrift.GcCycleStats) Span(org.apache.accumulo.core.trace.Span) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) InvalidProtocolBufferException(com.google.protobuf.InvalidProtocolBufferException) MutationsRejectedException(org.apache.accumulo.core.client.MutationsRejectedException) FileNotFoundException(java.io.FileNotFoundException) ReplicationTableOfflineException(org.apache.accumulo.core.replication.ReplicationTableOfflineException) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) KeeperException(org.apache.zookeeper.KeeperException) IOException(java.io.IOException) UnknownHostException(java.net.UnknownHostException) AccumuloException(org.apache.accumulo.core.client.AccumuloException)

Example 5 with GcCycleStats

use of org.apache.accumulo.core.gc.thrift.GcCycleStats in project accumulo by apache.

the class GarbageCollectWriteAheadLogsTest method replicationDelaysFileCollection.

@Test
public void replicationDelaysFileCollection() throws Exception {
    AccumuloServerContext context = EasyMock.createMock(AccumuloServerContext.class);
    VolumeManager fs = EasyMock.createMock(VolumeManager.class);
    WalStateManager marker = EasyMock.createMock(WalStateManager.class);
    LiveTServerSet tserverSet = EasyMock.createMock(LiveTServerSet.class);
    Connector conn = EasyMock.createMock(Connector.class);
    Scanner mscanner = EasyMock.createMock(Scanner.class);
    Scanner rscanner = EasyMock.createMock(Scanner.class);
    String row = MetadataSchema.ReplicationSection.getRowPrefix() + path.toString();
    String colf = MetadataSchema.ReplicationSection.COLF.toString();
    String colq = "1";
    Map<Key, Value> replicationWork = Collections.singletonMap(new Key(row, colf, colq), new Value(new byte[0]));
    GCStatus status = new GCStatus(null, null, null, new GcCycleStats());
    EasyMock.expect(tserverSet.getCurrentServers()).andReturn(Collections.singleton(server1));
    EasyMock.expect(marker.getAllMarkers()).andReturn(markers).once();
    EasyMock.expect(marker.state(server1, id)).andReturn(new Pair<>(WalState.UNREFERENCED, path));
    EasyMock.expect(context.getConnector()).andReturn(conn);
    EasyMock.expect(conn.createScanner(ReplicationTable.NAME, Authorizations.EMPTY)).andReturn(rscanner);
    rscanner.fetchColumnFamily(ReplicationSchema.StatusSection.NAME);
    EasyMock.expectLastCall().once();
    EasyMock.expect(rscanner.iterator()).andReturn(emptyKV);
    EasyMock.expect(conn.createScanner(MetadataTable.NAME, Authorizations.EMPTY)).andReturn(mscanner);
    mscanner.fetchColumnFamily(MetadataSchema.ReplicationSection.COLF);
    EasyMock.expectLastCall().once();
    mscanner.setRange(MetadataSchema.ReplicationSection.getRange());
    EasyMock.expectLastCall().once();
    EasyMock.expect(mscanner.iterator()).andReturn(replicationWork.entrySet().iterator());
    EasyMock.replay(context, fs, marker, tserverSet, conn, rscanner, mscanner);
    GarbageCollectWriteAheadLogs gc = new GarbageCollectWriteAheadLogs(context, fs, false, tserverSet, marker, tabletOnServer1List);
    gc.collect(status);
    EasyMock.verify(context, fs, marker, tserverSet, conn, rscanner, mscanner);
}
Also used : VolumeManager(org.apache.accumulo.server.fs.VolumeManager) Connector(org.apache.accumulo.core.client.Connector) Scanner(org.apache.accumulo.core.client.Scanner) AccumuloServerContext(org.apache.accumulo.server.AccumuloServerContext) GcCycleStats(org.apache.accumulo.core.gc.thrift.GcCycleStats) GCStatus(org.apache.accumulo.core.gc.thrift.GCStatus) LiveTServerSet(org.apache.accumulo.server.master.LiveTServerSet) WalStateManager(org.apache.accumulo.server.log.WalStateManager) Value(org.apache.accumulo.core.data.Value) Key(org.apache.accumulo.core.data.Key) Test(org.junit.Test)

Aggregations

GcCycleStats (org.apache.accumulo.core.gc.thrift.GcCycleStats)7 LiveTServerSet (org.apache.accumulo.server.master.LiveTServerSet)6 GCStatus (org.apache.accumulo.core.gc.thrift.GCStatus)5 AccumuloServerContext (org.apache.accumulo.server.AccumuloServerContext)5 VolumeManager (org.apache.accumulo.server.fs.VolumeManager)5 WalStateManager (org.apache.accumulo.server.log.WalStateManager)5 Test (org.junit.Test)5 Connector (org.apache.accumulo.core.client.Connector)4 Scanner (org.apache.accumulo.core.client.Scanner)3 FileNotFoundException (java.io.FileNotFoundException)2 IOException (java.io.IOException)2 Map (java.util.Map)2 AccumuloException (org.apache.accumulo.core.client.AccumuloException)2 AccumuloSecurityException (org.apache.accumulo.core.client.AccumuloSecurityException)2 TableNotFoundException (org.apache.accumulo.core.client.TableNotFoundException)2 ReplicationTableOfflineException (org.apache.accumulo.core.replication.ReplicationTableOfflineException)2 Span (org.apache.accumulo.core.trace.Span)2 KeeperException (org.apache.zookeeper.KeeperException)2 InvalidProtocolBufferException (com.google.protobuf.InvalidProtocolBufferException)1 UnknownHostException (java.net.UnknownHostException)1