Search in sources :

Example 1 with ReplicationTableOfflineException

use of org.apache.accumulo.core.replication.ReplicationTableOfflineException in project accumulo by apache.

the class GarbageCollectWriteAheadLogs method removeReplicationEntries.

protected int removeReplicationEntries(Map<UUID, TServerInstance> candidates) throws IOException, KeeperException, InterruptedException {
    Connector conn;
    try {
        conn = context.getConnector();
        try {
            final Scanner s = ReplicationTable.getScanner(conn);
            StatusSection.limit(s);
            for (Entry<Key, Value> entry : s) {
                UUID id = path2uuid(new Path(entry.getKey().getRow().toString()));
                candidates.remove(id);
                log.info("Ignore closed log " + id + " because it is being replicated");
            }
        } catch (ReplicationTableOfflineException ex) {
            return candidates.size();
        }
        final Scanner scanner = conn.createScanner(MetadataTable.NAME, Authorizations.EMPTY);
        scanner.fetchColumnFamily(MetadataSchema.ReplicationSection.COLF);
        scanner.setRange(MetadataSchema.ReplicationSection.getRange());
        for (Entry<Key, Value> entry : scanner) {
            Text file = new Text();
            MetadataSchema.ReplicationSection.getFile(entry.getKey(), file);
            UUID id = path2uuid(new Path(file.toString()));
            candidates.remove(id);
            log.info("Ignore closed log " + id + " because it is being replicated");
        }
        return candidates.size();
    } catch (AccumuloException | AccumuloSecurityException | TableNotFoundException e) {
        log.error("Failed to scan metadata table", e);
        throw new IllegalArgumentException(e);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Connector(org.apache.accumulo.core.client.Connector) Scanner(org.apache.accumulo.core.client.Scanner) AccumuloException(org.apache.accumulo.core.client.AccumuloException) Text(org.apache.hadoop.io.Text) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) Value(org.apache.accumulo.core.data.Value) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) ReplicationTableOfflineException(org.apache.accumulo.core.replication.ReplicationTableOfflineException) UUID(java.util.UUID) Key(org.apache.accumulo.core.data.Key)

Example 2 with ReplicationTableOfflineException

use of org.apache.accumulo.core.replication.ReplicationTableOfflineException in project accumulo by apache.

the class DistributedWorkQueueWorkAssigner method createWork.

/**
 * Scan over the {@link WorkSection} of the replication table adding work for entries that have data to replicate and have not already been queued.
 */
protected void createWork() {
    // Create a scanner over the replication table's order entries
    Scanner s;
    try {
        s = ReplicationTable.getScanner(conn);
    } catch (ReplicationTableOfflineException e) {
        // no work to do; replication is off
        return;
    }
    OrderSection.limit(s);
    Text buffer = new Text();
    for (Entry<Key, Value> orderEntry : s) {
        // to add more work entries
        if (getQueueSize() > maxQueueSize) {
            log.warn("Queued replication work exceeds configured maximum ({}), sleeping to allow work to occur", maxQueueSize);
            return;
        }
        String file = OrderSection.getFile(orderEntry.getKey(), buffer);
        OrderSection.getTableId(orderEntry.getKey(), buffer);
        String sourceTableId = buffer.toString();
        log.info("Determining if {} from {} needs to be replicated", file, sourceTableId);
        Scanner workScanner;
        try {
            workScanner = ReplicationTable.getScanner(conn);
        } catch (ReplicationTableOfflineException e) {
            log.warn("Replication table is offline. Will retry...");
            sleepUninterruptibly(5, TimeUnit.SECONDS);
            return;
        }
        WorkSection.limit(workScanner);
        workScanner.setRange(Range.exact(file));
        int newReplicationTasksSubmitted = 0, workEntriesRead = 0;
        // For a file, we can concurrently replicate it to multiple targets
        for (Entry<Key, Value> workEntry : workScanner) {
            workEntriesRead++;
            Status status;
            try {
                status = StatusUtil.fromValue(workEntry.getValue());
            } catch (InvalidProtocolBufferException e) {
                log.warn("Could not deserialize protobuf from work entry for {} to {}, will retry", file, ReplicationTarget.from(workEntry.getKey().getColumnQualifier()), e);
                continue;
            }
            // Get the ReplicationTarget for this Work record
            ReplicationTarget target = WorkSection.getTarget(workEntry.getKey(), buffer);
            // Get the file (if any) currently being replicated to the given peer for the given source table
            Collection<String> keysBeingReplicated = getQueuedWork(target);
            Path p = new Path(file);
            String filename = p.getName();
            String key = DistributedWorkQueueWorkAssignerHelper.getQueueKey(filename, target);
            if (!shouldQueueWork(target)) {
                if (!isWorkRequired(status) && keysBeingReplicated.contains(key)) {
                    log.debug("Removing {} from replication state to {} because replication is complete", key, target.getPeerName());
                    this.removeQueuedWork(target, key);
                }
                continue;
            }
            // If there is work to do
            if (isWorkRequired(status)) {
                if (queueWork(p, target)) {
                    newReplicationTasksSubmitted++;
                }
            } else {
                log.debug("Not queueing work for {} to {} because {} doesn't need replication", file, target, ProtobufUtil.toString(status));
                if (keysBeingReplicated.contains(key)) {
                    log.debug("Removing {} from replication state to {} because replication is complete", key, target.getPeerName());
                    this.removeQueuedWork(target, key);
                }
            }
        }
        log.debug("Read {} replication entries from the WorkSection of the replication table", workEntriesRead);
        log.info("Assigned {} replication work entries for {}", newReplicationTasksSubmitted, file);
    }
}
Also used : Status(org.apache.accumulo.server.replication.proto.Replication.Status) Path(org.apache.hadoop.fs.Path) Scanner(org.apache.accumulo.core.client.Scanner) InvalidProtocolBufferException(com.google.protobuf.InvalidProtocolBufferException) Text(org.apache.hadoop.io.Text) ReplicationTarget(org.apache.accumulo.core.replication.ReplicationTarget) Value(org.apache.accumulo.core.data.Value) ReplicationTableOfflineException(org.apache.accumulo.core.replication.ReplicationTableOfflineException) Key(org.apache.accumulo.core.data.Key)

Example 3 with ReplicationTableOfflineException

use of org.apache.accumulo.core.replication.ReplicationTableOfflineException in project accumulo by apache.

the class RemoveCompleteReplicationRecords method run.

@Override
public void run() {
    BatchScanner bs;
    BatchWriter bw;
    try {
        bs = ReplicationTable.getBatchScanner(conn, 4);
        bw = ReplicationTable.getBatchWriter(conn);
        if (bs == null || bw == null)
            throw new AssertionError("Inconceivable; an exception should have been thrown, but 'bs' or 'bw' was null instead");
    } catch (ReplicationTableOfflineException e) {
        log.debug("Not attempting to remove complete replication records as the table ({}) isn't yet online", ReplicationTable.NAME);
        return;
    }
    bs.setRanges(Collections.singleton(new Range()));
    IteratorSetting cfg = new IteratorSetting(50, WholeRowIterator.class);
    StatusSection.limit(bs);
    WorkSection.limit(bs);
    bs.addScanIterator(cfg);
    Stopwatch sw = new Stopwatch();
    long recordsRemoved = 0;
    try {
        sw.start();
        recordsRemoved = removeCompleteRecords(conn, bs, bw);
    } finally {
        if (null != bs) {
            bs.close();
        }
        if (null != bw) {
            try {
                bw.close();
            } catch (MutationsRejectedException e) {
                log.error("Error writing mutations to {}, will retry", ReplicationTable.NAME, e);
            }
        }
        sw.stop();
    }
    log.info("Removed {} complete replication entries from the table {}", recordsRemoved, ReplicationTable.NAME);
}
Also used : IteratorSetting(org.apache.accumulo.core.client.IteratorSetting) BatchScanner(org.apache.accumulo.core.client.BatchScanner) Stopwatch(com.google.common.base.Stopwatch) BatchWriter(org.apache.accumulo.core.client.BatchWriter) ReplicationTableOfflineException(org.apache.accumulo.core.replication.ReplicationTableOfflineException) Range(org.apache.accumulo.core.data.Range) MutationsRejectedException(org.apache.accumulo.core.client.MutationsRejectedException)

Example 4 with ReplicationTableOfflineException

use of org.apache.accumulo.core.replication.ReplicationTableOfflineException in project accumulo by apache.

the class StatusMaker method run.

public void run() {
    Span span = Trace.start("replicationStatusMaker");
    try {
        // Read from a source table (typically accumulo.metadata)
        final Scanner s;
        try {
            s = conn.createScanner(sourceTableName, Authorizations.EMPTY);
        } catch (TableNotFoundException e) {
            throw new RuntimeException(e);
        }
        // Only pull replication records
        s.fetchColumnFamily(ReplicationSection.COLF);
        s.setRange(ReplicationSection.getRange());
        Text file = new Text();
        for (Entry<Key, Value> entry : s) {
            // Get a writer to the replication table
            if (null == replicationWriter) {
                // Ensures table is online
                try {
                    ReplicationTable.setOnline(conn);
                    replicationWriter = ReplicationTable.getBatchWriter(conn);
                } catch (ReplicationTableOfflineException | AccumuloSecurityException | AccumuloException e) {
                    log.warn("Replication table did not come online");
                    replicationWriter = null;
                    return;
                }
            }
            // Extract the useful bits from the status key
            MetadataSchema.ReplicationSection.getFile(entry.getKey(), file);
            Table.ID tableId = MetadataSchema.ReplicationSection.getTableId(entry.getKey());
            Status status;
            try {
                status = Status.parseFrom(entry.getValue().get());
            } catch (InvalidProtocolBufferException e) {
                log.warn("Could not deserialize protobuf for {}", file);
                continue;
            }
            log.debug("Creating replication status record for {} on table {} with {}.", file, tableId, ProtobufUtil.toString(status));
            Span workSpan = Trace.start("createStatusMutations");
            try {
                // Create entries in the replication table from the metadata table
                if (!addStatusRecord(file, tableId, entry.getValue())) {
                    continue;
                }
            } finally {
                workSpan.stop();
            }
            if (status.getClosed()) {
                Span orderSpan = Trace.start("recordStatusOrder");
                try {
                    if (!addOrderRecord(file, tableId, status, entry.getValue())) {
                        continue;
                    }
                } finally {
                    orderSpan.stop();
                }
                Span deleteSpan = Trace.start("deleteClosedStatus");
                try {
                    deleteStatusRecord(entry.getKey());
                } finally {
                    deleteSpan.stop();
                }
            }
        }
    } finally {
        span.stop();
    }
}
Also used : Status(org.apache.accumulo.server.replication.proto.Replication.Status) Scanner(org.apache.accumulo.core.client.Scanner) AccumuloException(org.apache.accumulo.core.client.AccumuloException) MetadataTable(org.apache.accumulo.core.metadata.MetadataTable) Table(org.apache.accumulo.core.client.impl.Table) ReplicationTable(org.apache.accumulo.core.replication.ReplicationTable) InvalidProtocolBufferException(com.google.protobuf.InvalidProtocolBufferException) Text(org.apache.hadoop.io.Text) Span(org.apache.accumulo.core.trace.Span) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) Value(org.apache.accumulo.core.data.Value) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) ReplicationTableOfflineException(org.apache.accumulo.core.replication.ReplicationTableOfflineException) Key(org.apache.accumulo.core.data.Key)

Example 5 with ReplicationTableOfflineException

use of org.apache.accumulo.core.replication.ReplicationTableOfflineException in project accumulo by apache.

the class WorkMaker method run.

public void run() {
    if (!ReplicationTable.isOnline(conn)) {
        log.debug("Replication table is not yet online");
        return;
    }
    Span span = Trace.start("replicationWorkMaker");
    try {
        final Scanner s;
        try {
            s = ReplicationTable.getScanner(conn);
            if (null == writer) {
                setBatchWriter(ReplicationTable.getBatchWriter(conn));
            }
        } catch (ReplicationTableOfflineException e) {
            log.warn("Replication table was online, but not anymore");
            writer = null;
            return;
        }
        // Only pull records about data that has been ingested and is ready for replication
        StatusSection.limit(s);
        TableConfiguration tableConf;
        Text file = new Text();
        for (Entry<Key, Value> entry : s) {
            // Extract the useful bits from the status key
            ReplicationSchema.StatusSection.getFile(entry.getKey(), file);
            Table.ID tableId = ReplicationSchema.StatusSection.getTableId(entry.getKey());
            log.debug("Processing replication status record for {} on table {}", file, tableId);
            Status status;
            try {
                status = Status.parseFrom(entry.getValue().get());
            } catch (InvalidProtocolBufferException e) {
                log.error("Could not parse protobuf for {} from table {}", file, tableId);
                continue;
            }
            // TODO put this into a filter on serverside
            if (!shouldCreateWork(status)) {
                log.debug("Not creating work: {}", status.toString());
                continue;
            }
            // Get the table configuration for the table specified by the status record
            tableConf = context.getServerConfigurationFactory().getTableConfiguration(tableId);
            // getTableConfiguration(String) returns null if the table no longer exists
            if (null == tableConf) {
                continue;
            }
            // Pull the relevant replication targets
            // TODO Cache this instead of pulling it every time
            Map<String, String> replicationTargets = getReplicationTargets(tableConf);
            // -- Another scanner over the WorkSection can make this relatively cheap
            if (!replicationTargets.isEmpty()) {
                Span workSpan = Trace.start("createWorkMutations");
                try {
                    addWorkRecord(file, entry.getValue(), replicationTargets, tableId);
                } finally {
                    workSpan.stop();
                }
            } else {
                log.warn("No configured targets for table with ID {}", tableId);
            }
        }
    } finally {
        span.stop();
    }
}
Also used : Status(org.apache.accumulo.server.replication.proto.Replication.Status) Scanner(org.apache.accumulo.core.client.Scanner) Table(org.apache.accumulo.core.client.impl.Table) ReplicationTable(org.apache.accumulo.core.replication.ReplicationTable) InvalidProtocolBufferException(com.google.protobuf.InvalidProtocolBufferException) Text(org.apache.hadoop.io.Text) Span(org.apache.accumulo.core.trace.Span) Value(org.apache.accumulo.core.data.Value) ReplicationTableOfflineException(org.apache.accumulo.core.replication.ReplicationTableOfflineException) TableConfiguration(org.apache.accumulo.server.conf.TableConfiguration) Key(org.apache.accumulo.core.data.Key)

Aggregations

ReplicationTableOfflineException (org.apache.accumulo.core.replication.ReplicationTableOfflineException)8 InvalidProtocolBufferException (com.google.protobuf.InvalidProtocolBufferException)6 Key (org.apache.accumulo.core.data.Key)6 Value (org.apache.accumulo.core.data.Value)6 Status (org.apache.accumulo.server.replication.proto.Replication.Status)6 Scanner (org.apache.accumulo.core.client.Scanner)5 Text (org.apache.hadoop.io.Text)5 AccumuloException (org.apache.accumulo.core.client.AccumuloException)3 AccumuloSecurityException (org.apache.accumulo.core.client.AccumuloSecurityException)3 BatchScanner (org.apache.accumulo.core.client.BatchScanner)3 Table (org.apache.accumulo.core.client.impl.Table)3 ReplicationTable (org.apache.accumulo.core.replication.ReplicationTable)3 ReplicationTarget (org.apache.accumulo.core.replication.ReplicationTarget)3 Path (org.apache.hadoop.fs.Path)3 IOException (java.io.IOException)2 NoSuchElementException (java.util.NoSuchElementException)2 BatchWriter (org.apache.accumulo.core.client.BatchWriter)2 IteratorSetting (org.apache.accumulo.core.client.IteratorSetting)2 MutationsRejectedException (org.apache.accumulo.core.client.MutationsRejectedException)2 TableNotFoundException (org.apache.accumulo.core.client.TableNotFoundException)2