use of org.apache.accumulo.core.client.impl.Table in project accumulo by apache.
the class AbstractInputFormat method getSplits.
/**
* Gets the splits of the tables that have been set on the job by reading the metadata table for the specified ranges.
*
* @return the splits from the tables based on the ranges.
* @throws java.io.IOException
* if a table set on the job doesn't exist or an error occurs initializing the tablet locator
*/
@Override
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
Level logLevel = getLogLevel(job);
log.setLevel(logLevel);
validateOptions(job);
Random random = new Random();
LinkedList<InputSplit> splits = new LinkedList<>();
Map<String, InputTableConfig> tableConfigs = getInputTableConfigs(job);
for (Map.Entry<String, InputTableConfig> tableConfigEntry : tableConfigs.entrySet()) {
String tableName = tableConfigEntry.getKey();
InputTableConfig tableConfig = tableConfigEntry.getValue();
Instance instance = getInstance(job);
Table.ID tableId;
// resolve table name to id once, and use id from this point forward
if (DeprecationUtil.isMockInstance(instance)) {
tableId = Table.ID.of("");
} else {
try {
tableId = Tables.getTableId(instance, tableName);
} catch (TableNotFoundException e) {
throw new IOException(e);
}
}
Authorizations auths = getScanAuthorizations(job);
String principal = getPrincipal(job);
AuthenticationToken token = getAuthenticationToken(job);
boolean batchScan = InputConfigurator.isBatchScan(CLASS, job);
boolean supportBatchScan = !(tableConfig.isOfflineScan() || tableConfig.shouldUseIsolatedScanners() || tableConfig.shouldUseLocalIterators());
if (batchScan && !supportBatchScan)
throw new IllegalArgumentException("BatchScanner optimization not available for offline scan, isolated, or local iterators");
boolean autoAdjust = tableConfig.shouldAutoAdjustRanges();
if (batchScan && !autoAdjust)
throw new IllegalArgumentException("AutoAdjustRanges must be enabled when using BatchScanner optimization");
List<Range> ranges = autoAdjust ? Range.mergeOverlapping(tableConfig.getRanges()) : tableConfig.getRanges();
if (ranges.isEmpty()) {
ranges = new ArrayList<>(1);
ranges.add(new Range());
}
// get the metadata information for these ranges
Map<String, Map<KeyExtent, List<Range>>> binnedRanges = new HashMap<>();
TabletLocator tl;
try {
if (tableConfig.isOfflineScan()) {
binnedRanges = binOfflineTable(job, tableId, ranges);
while (binnedRanges == null) {
// Some tablets were still online, try again
// sleep randomly between 100 and 200 ms
sleepUninterruptibly(100 + random.nextInt(100), TimeUnit.MILLISECONDS);
binnedRanges = binOfflineTable(job, tableId, ranges);
}
} else {
tl = InputConfigurator.getTabletLocator(CLASS, job, tableId);
// its possible that the cache could contain complete, but old information about a tables tablets... so clear it
tl.invalidateCache();
ClientContext context = new ClientContext(getInstance(job), new Credentials(getPrincipal(job), getAuthenticationToken(job)), getClientConfiguration(job));
while (!tl.binRanges(context, ranges, binnedRanges).isEmpty()) {
if (!DeprecationUtil.isMockInstance(instance)) {
String tableIdStr = tableId.canonicalID();
if (!Tables.exists(instance, tableId))
throw new TableDeletedException(tableIdStr);
if (Tables.getTableState(instance, tableId) == TableState.OFFLINE)
throw new TableOfflineException(instance, tableIdStr);
}
binnedRanges.clear();
log.warn("Unable to locate bins for specified ranges. Retrying.");
// sleep randomly between 100 and 200 ms
sleepUninterruptibly(100 + random.nextInt(100), TimeUnit.MILLISECONDS);
tl.invalidateCache();
}
}
} catch (Exception e) {
throw new IOException(e);
}
HashMap<Range, ArrayList<String>> splitsToAdd = null;
if (!autoAdjust)
splitsToAdd = new HashMap<>();
HashMap<String, String> hostNameCache = new HashMap<>();
for (Map.Entry<String, Map<KeyExtent, List<Range>>> tserverBin : binnedRanges.entrySet()) {
String ip = tserverBin.getKey().split(":", 2)[0];
String location = hostNameCache.get(ip);
if (location == null) {
InetAddress inetAddress = InetAddress.getByName(ip);
location = inetAddress.getCanonicalHostName();
hostNameCache.put(ip, location);
}
for (Map.Entry<KeyExtent, List<Range>> extentRanges : tserverBin.getValue().entrySet()) {
Range ke = extentRanges.getKey().toDataRange();
if (batchScan) {
// group ranges by tablet to be read by a BatchScanner
ArrayList<Range> clippedRanges = new ArrayList<>();
for (Range r : extentRanges.getValue()) clippedRanges.add(ke.clip(r));
BatchInputSplit split = new BatchInputSplit(tableName, tableId, clippedRanges, new String[] { location });
SplitUtils.updateSplit(split, instance, tableConfig, principal, token, auths, logLevel);
splits.add(split);
} else {
// not grouping by tablet
for (Range r : extentRanges.getValue()) {
if (autoAdjust) {
// divide ranges into smaller ranges, based on the tablets
RangeInputSplit split = new RangeInputSplit(tableName, tableId.canonicalID(), ke.clip(r), new String[] { location });
SplitUtils.updateSplit(split, instance, tableConfig, principal, token, auths, logLevel);
split.setOffline(tableConfig.isOfflineScan());
split.setIsolatedScan(tableConfig.shouldUseIsolatedScanners());
split.setUsesLocalIterators(tableConfig.shouldUseLocalIterators());
splits.add(split);
} else {
// don't divide ranges
ArrayList<String> locations = splitsToAdd.get(r);
if (locations == null)
locations = new ArrayList<>(1);
locations.add(location);
splitsToAdd.put(r, locations);
}
}
}
}
}
if (!autoAdjust)
for (Map.Entry<Range, ArrayList<String>> entry : splitsToAdd.entrySet()) {
RangeInputSplit split = new RangeInputSplit(tableName, tableId.canonicalID(), entry.getKey(), entry.getValue().toArray(new String[0]));
SplitUtils.updateSplit(split, instance, tableConfig, principal, token, auths, logLevel);
split.setOffline(tableConfig.isOfflineScan());
split.setIsolatedScan(tableConfig.shouldUseIsolatedScanners());
split.setUsesLocalIterators(tableConfig.shouldUseLocalIterators());
splits.add(split);
}
}
return splits.toArray(new InputSplit[splits.size()]);
}
use of org.apache.accumulo.core.client.impl.Table in project accumulo by apache.
the class ClientServiceHandler method getDiskUsage.
@Override
public List<TDiskUsage> getDiskUsage(Set<String> tables, TCredentials credentials) throws ThriftTableOperationException, ThriftSecurityException, TException {
try {
HashSet<Table.ID> tableIds = new HashSet<>();
for (String table : tables) {
// ensure that table table exists
Table.ID tableId = checkTableId(instance, table, null);
tableIds.add(tableId);
Namespace.ID namespaceId = Tables.getNamespaceId(instance, tableId);
if (!security.canScan(credentials, tableId, namespaceId))
throw new ThriftSecurityException(credentials.getPrincipal(), SecurityErrorCode.PERMISSION_DENIED);
}
// use the same set of tableIds that were validated above to avoid race conditions
Map<TreeSet<String>, Long> diskUsage = TableDiskUsage.getDiskUsage(tableIds, fs, context.getConnector());
List<TDiskUsage> retUsages = new ArrayList<>();
for (Map.Entry<TreeSet<String>, Long> usageItem : diskUsage.entrySet()) {
retUsages.add(new TDiskUsage(new ArrayList<>(usageItem.getKey()), usageItem.getValue()));
}
return retUsages;
} catch (AccumuloSecurityException e) {
throw e.asThriftException();
} catch (AccumuloException | TableNotFoundException | IOException e) {
throw new TException(e);
}
}
use of org.apache.accumulo.core.client.impl.Table in project accumulo by apache.
the class RandomizeVolumes method randomize.
public static int randomize(Connector c, String tableName) throws IOException, AccumuloSecurityException, AccumuloException, TableNotFoundException {
final VolumeManager vm = VolumeManagerImpl.get();
if (vm.getVolumes().size() < 2) {
log.error("There are not enough volumes configured");
return 1;
}
String tblStr = c.tableOperations().tableIdMap().get(tableName);
if (null == tblStr) {
log.error("Could not determine the table ID for table {}", tableName);
return 2;
}
Table.ID tableId = Table.ID.of(tblStr);
TableState tableState = TableManager.getInstance().getTableState(tableId);
if (TableState.OFFLINE != tableState) {
log.info("Taking {} offline", tableName);
c.tableOperations().offline(tableName, true);
log.info("{} offline", tableName);
}
SimpleThreadPool pool = new SimpleThreadPool(50, "directory maker");
log.info("Rewriting entries for {}", tableName);
Scanner scanner = c.createScanner(MetadataTable.NAME, Authorizations.EMPTY);
DIRECTORY_COLUMN.fetch(scanner);
scanner.setRange(TabletsSection.getRange(tableId));
BatchWriter writer = c.createBatchWriter(MetadataTable.NAME, null);
int count = 0;
for (Entry<Key, Value> entry : scanner) {
String oldLocation = entry.getValue().toString();
String directory;
if (oldLocation.contains(":")) {
String[] parts = oldLocation.split(Path.SEPARATOR);
Table.ID tableIdEntry = Table.ID.of(parts[parts.length - 2]);
if (!tableIdEntry.equals(tableId)) {
log.error("Unexpected table id found: {}, expected {}; skipping", tableIdEntry, tableId);
continue;
}
directory = parts[parts.length - 1];
} else {
directory = oldLocation.substring(Path.SEPARATOR.length());
}
Key key = entry.getKey();
Mutation m = new Mutation(key.getRow());
VolumeChooserEnvironment chooserEnv = new VolumeChooserEnvironment(tableId);
final String newLocation = vm.choose(chooserEnv, ServerConstants.getBaseUris()) + Path.SEPARATOR + ServerConstants.TABLE_DIR + Path.SEPARATOR + tableId + Path.SEPARATOR + directory;
m.put(key.getColumnFamily(), key.getColumnQualifier(), new Value(newLocation.getBytes(UTF_8)));
if (log.isTraceEnabled()) {
log.trace("Replacing {} with {}", oldLocation, newLocation);
}
writer.addMutation(m);
pool.submit(new Runnable() {
@Override
public void run() {
try {
vm.mkdirs(new Path(newLocation));
} catch (IOException ex) {
// nevermind
}
}
});
count++;
}
writer.close();
pool.shutdown();
while (!pool.isTerminated()) {
log.trace("Waiting for mkdir() calls to finish");
try {
pool.awaitTermination(5, TimeUnit.SECONDS);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
break;
}
}
log.info("Updated {} entries for table {}", count, tableName);
if (TableState.OFFLINE != tableState) {
c.tableOperations().online(tableName, true);
log.info("table {} back online", tableName);
}
return 0;
}
use of org.apache.accumulo.core.client.impl.Table in project accumulo by apache.
the class TableDiskUsage method printDiskUsage.
public static void printDiskUsage(Collection<String> tableNames, VolumeManager fs, Connector conn, Printer printer, boolean humanReadable) throws TableNotFoundException, IOException {
HashSet<Table.ID> tableIds = new HashSet<>();
// Get table IDs for all tables requested to be 'du'
for (String tableName : tableNames) {
Table.ID tableId = Tables.getTableId(conn.getInstance(), tableName);
if (tableId == null)
throw new TableNotFoundException(null, tableName, "Table " + tableName + " not found");
tableIds.add(tableId);
}
Map<TreeSet<String>, Long> usage = getDiskUsage(tableIds, fs, conn);
String valueFormat = humanReadable ? "%9s" : "%,24d";
for (Entry<TreeSet<String>, Long> entry : usage.entrySet()) {
Object value = humanReadable ? NumUtil.bigNumberForSize(entry.getValue()) : entry.getValue();
printer.print(String.format(valueFormat + " %s", value, entry.getKey()));
}
}
use of org.apache.accumulo.core.client.impl.Table in project accumulo by apache.
the class ReplicationResource method getReplicationInformation.
/**
* Generates the replication table as a JSON object
*
* @return Replication list
*/
@GET
public List<ReplicationInformation> getReplicationInformation() throws AccumuloException, AccumuloSecurityException {
final Connector conn = Monitor.getContext().getConnector();
final TableOperations tops = conn.tableOperations();
final Map<String, String> properties = conn.instanceOperations().getSystemConfiguration();
final Map<String, String> peers = new HashMap<>();
final String definedPeersPrefix = Property.REPLICATION_PEERS.getKey();
final ReplicaSystemFactory replicaSystemFactory = new ReplicaSystemFactory();
// Get the defined peers and what ReplicaSystem impl they're using
for (Entry<String, String> property : properties.entrySet()) {
String key = property.getKey();
// Filter out cruft that we don't want
if (key.startsWith(definedPeersPrefix) && !key.startsWith(Property.REPLICATION_PEER_USER.getKey()) && !key.startsWith(Property.REPLICATION_PEER_PASSWORD.getKey())) {
String peerName = property.getKey().substring(definedPeersPrefix.length());
ReplicaSystem replica;
try {
replica = replicaSystemFactory.get(property.getValue());
} catch (Exception e) {
log.warn("Could not instantiate ReplicaSystem for {} with configuration {}", property.getKey(), property.getValue(), e);
continue;
}
peers.put(peerName, replica.getClass().getName());
}
}
final String targetPrefix = Property.TABLE_REPLICATION_TARGET.getKey();
// The total set of configured targets
Set<ReplicationTarget> allConfiguredTargets = new HashSet<>();
// Number of files per target we have to replicate
Map<ReplicationTarget, Long> targetCounts = new HashMap<>();
Map<String, Table.ID> tableNameToId = Tables.getNameToIdMap(conn.getInstance());
Map<Table.ID, String> tableIdToName = invert(tableNameToId);
for (String table : tops.list()) {
if (MetadataTable.NAME.equals(table) || RootTable.NAME.equals(table)) {
continue;
}
Table.ID localId = tableNameToId.get(table);
if (null == localId) {
log.trace("Could not determine ID for {}", table);
continue;
}
Iterable<Entry<String, String>> propertiesForTable;
try {
propertiesForTable = tops.getProperties(table);
} catch (TableNotFoundException e) {
log.warn("Could not fetch properties for {}", table, e);
continue;
}
for (Entry<String, String> prop : propertiesForTable) {
if (prop.getKey().startsWith(targetPrefix)) {
String peerName = prop.getKey().substring(targetPrefix.length());
String remoteIdentifier = prop.getValue();
ReplicationTarget target = new ReplicationTarget(peerName, remoteIdentifier, localId);
allConfiguredTargets.add(target);
}
}
}
// Read over the queued work
BatchScanner bs;
try {
bs = conn.createBatchScanner(ReplicationTable.NAME, Authorizations.EMPTY, 4);
} catch (TableOfflineException | TableNotFoundException e) {
log.error("Could not read replication table", e);
return Collections.emptyList();
}
bs.setRanges(Collections.singleton(new Range()));
WorkSection.limit(bs);
try {
Text buffer = new Text();
for (Entry<Key, Value> entry : bs) {
Key k = entry.getKey();
k.getColumnQualifier(buffer);
ReplicationTarget target = ReplicationTarget.from(buffer);
// TODO ACCUMULO-2835 once explicit lengths are tracked, we can give size-based estimates instead of just file-based
Long count = targetCounts.get(target);
if (null == count) {
targetCounts.put(target, 1l);
} else {
targetCounts.put(target, count + 1);
}
}
} finally {
bs.close();
}
List<ReplicationInformation> replicationInformation = new ArrayList<>();
for (ReplicationTarget configuredTarget : allConfiguredTargets) {
String tableName = tableIdToName.get(configuredTarget.getSourceTableId());
if (null == tableName) {
log.trace("Could not determine table name from id {}", configuredTarget.getSourceTableId());
continue;
}
String replicaSystemClass = peers.get(configuredTarget.getPeerName());
if (null == replicaSystemClass) {
log.trace("Could not determine configured ReplicaSystem for {}", configuredTarget.getPeerName());
continue;
}
Long numFiles = targetCounts.get(configuredTarget);
replicationInformation.add(new ReplicationInformation(tableName, configuredTarget.getPeerName(), configuredTarget.getRemoteIdentifier(), replicaSystemClass, (null == numFiles) ? 0 : numFiles));
}
return replicationInformation;
}
Aggregations