use of org.apache.accumulo.core.client.impl.TabletLocator in project accumulo by apache.
the class BulkImporter method importFiles.
public AssignmentStats importFiles(List<String> files, Path failureDir) throws IOException, AccumuloException, AccumuloSecurityException, ThriftTableOperationException {
int numThreads = context.getConfiguration().getCount(Property.TSERV_BULK_PROCESS_THREADS);
int numAssignThreads = context.getConfiguration().getCount(Property.TSERV_BULK_ASSIGNMENT_THREADS);
timer = new StopWatch<>(Timers.class);
timer.start(Timers.TOTAL);
Configuration conf = CachedConfiguration.getInstance();
VolumeManagerImpl.get(context.getConfiguration());
final VolumeManager fs = VolumeManagerImpl.get(context.getConfiguration());
Set<Path> paths = new HashSet<>();
for (String file : files) {
paths.add(new Path(file));
}
AssignmentStats assignmentStats = new AssignmentStats(paths.size());
final Map<Path, List<KeyExtent>> completeFailures = Collections.synchronizedSortedMap(new TreeMap<Path, List<KeyExtent>>());
ClientService.Client client = null;
final TabletLocator locator = TabletLocator.getLocator(context, Table.ID.of(tableId));
try {
final Map<Path, List<TabletLocation>> assignments = Collections.synchronizedSortedMap(new TreeMap<Path, List<TabletLocation>>());
timer.start(Timers.EXAMINE_MAP_FILES);
ExecutorService threadPool = Executors.newFixedThreadPool(numThreads, new NamingThreadFactory("findOverlapping"));
for (Path path : paths) {
final Path mapFile = path;
Runnable getAssignments = new Runnable() {
@Override
public void run() {
List<TabletLocation> tabletsToAssignMapFileTo = Collections.emptyList();
try {
tabletsToAssignMapFileTo = findOverlappingTablets(context, fs, locator, mapFile);
} catch (Exception ex) {
log.warn("Unable to find tablets that overlap file " + mapFile.toString(), ex);
}
log.debug("Map file {} found to overlap {} tablets", mapFile, tabletsToAssignMapFileTo.size());
if (tabletsToAssignMapFileTo.size() == 0) {
List<KeyExtent> empty = Collections.emptyList();
completeFailures.put(mapFile, empty);
} else
assignments.put(mapFile, tabletsToAssignMapFileTo);
}
};
threadPool.submit(new TraceRunnable(new LoggingRunnable(log, getAssignments)));
}
threadPool.shutdown();
while (!threadPool.isTerminated()) {
try {
threadPool.awaitTermination(60, TimeUnit.SECONDS);
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
}
timer.stop(Timers.EXAMINE_MAP_FILES);
assignmentStats.attemptingAssignments(assignments);
Map<Path, List<KeyExtent>> assignmentFailures = assignMapFiles(context, conf, fs, tableId, assignments, paths, numAssignThreads, numThreads);
assignmentStats.assignmentsFailed(assignmentFailures);
Map<Path, Integer> failureCount = new TreeMap<>();
for (Entry<Path, List<KeyExtent>> entry : assignmentFailures.entrySet()) failureCount.put(entry.getKey(), 1);
long sleepTime = 2 * 1000;
while (assignmentFailures.size() > 0) {
sleepTime = Math.min(sleepTime * 2, 60 * 1000);
locator.invalidateCache();
// assumption about assignment failures is that it caused by a split
// happening or a missing location
//
// for splits we need to find children key extents that cover the
// same key range and are contiguous (no holes, no overlap)
timer.start(Timers.SLEEP);
sleepUninterruptibly(sleepTime, TimeUnit.MILLISECONDS);
timer.stop(Timers.SLEEP);
log.debug("Trying to assign {} map files that previously failed on some key extents", assignmentFailures.size());
assignments.clear();
// assign to
for (Entry<Path, List<KeyExtent>> entry : assignmentFailures.entrySet()) {
Iterator<KeyExtent> keListIter = entry.getValue().iterator();
List<TabletLocation> tabletsToAssignMapFileTo = new ArrayList<>();
while (keListIter.hasNext()) {
KeyExtent ke = keListIter.next();
timer.start(Timers.QUERY_METADATA);
try {
tabletsToAssignMapFileTo.addAll(findOverlappingTablets(context, fs, locator, entry.getKey(), ke));
keListIter.remove();
} catch (Exception ex) {
log.warn("Exception finding overlapping tablets, will retry tablet " + ke, ex);
}
timer.stop(Timers.QUERY_METADATA);
}
if (tabletsToAssignMapFileTo.size() > 0)
assignments.put(entry.getKey(), tabletsToAssignMapFileTo);
}
assignmentStats.attemptingAssignments(assignments);
Map<Path, List<KeyExtent>> assignmentFailures2 = assignMapFiles(context, conf, fs, tableId, assignments, paths, numAssignThreads, numThreads);
assignmentStats.assignmentsFailed(assignmentFailures2);
// merge assignmentFailures2 into assignmentFailures
for (Entry<Path, List<KeyExtent>> entry : assignmentFailures2.entrySet()) {
assignmentFailures.get(entry.getKey()).addAll(entry.getValue());
Integer fc = failureCount.get(entry.getKey());
if (fc == null)
fc = 0;
failureCount.put(entry.getKey(), fc + 1);
}
// remove map files that have no more key extents to assign
Iterator<Entry<Path, List<KeyExtent>>> afIter = assignmentFailures.entrySet().iterator();
while (afIter.hasNext()) {
Entry<Path, List<KeyExtent>> entry = afIter.next();
if (entry.getValue().size() == 0)
afIter.remove();
}
Set<Entry<Path, Integer>> failureIter = failureCount.entrySet();
for (Entry<Path, Integer> entry : failureIter) {
int retries = context.getConfiguration().getCount(Property.TSERV_BULK_RETRY);
if (entry.getValue() > retries && assignmentFailures.get(entry.getKey()) != null) {
log.error("Map file {} failed more than {} times, giving up.", entry.getKey(), retries);
completeFailures.put(entry.getKey(), assignmentFailures.get(entry.getKey()));
assignmentFailures.remove(entry.getKey());
}
}
}
assignmentStats.assignmentsAbandoned(completeFailures);
Set<Path> failedFailures = processFailures(completeFailures);
assignmentStats.unrecoveredMapFiles(failedFailures);
timer.stop(Timers.TOTAL);
printReport(paths);
return assignmentStats;
} finally {
if (client != null) {
ServerClient.close(client);
}
}
}
use of org.apache.accumulo.core.client.impl.TabletLocator in project accumulo by apache.
the class AbstractInputFormat method getSplits.
/**
* Gets the splits of the tables that have been set on the job by reading the metadata table for the specified ranges.
*
* @return the splits from the tables based on the ranges.
* @throws java.io.IOException
* if a table set on the job doesn't exist or an error occurs initializing the tablet locator
*/
@Override
public List<InputSplit> getSplits(JobContext context) throws IOException {
Level logLevel = getLogLevel(context);
log.setLevel(logLevel);
validateOptions(context);
Random random = new Random();
LinkedList<InputSplit> splits = new LinkedList<>();
Map<String, InputTableConfig> tableConfigs = getInputTableConfigs(context);
for (Map.Entry<String, InputTableConfig> tableConfigEntry : tableConfigs.entrySet()) {
String tableName = tableConfigEntry.getKey();
InputTableConfig tableConfig = tableConfigEntry.getValue();
Instance instance = getInstance(context);
Table.ID tableId;
// resolve table name to id once, and use id from this point forward
if (DeprecationUtil.isMockInstance(instance)) {
tableId = Table.ID.of("");
} else {
try {
tableId = Tables.getTableId(instance, tableName);
} catch (TableNotFoundException e) {
throw new IOException(e);
}
}
Authorizations auths = getScanAuthorizations(context);
String principal = getPrincipal(context);
AuthenticationToken token = getAuthenticationToken(context);
boolean batchScan = InputConfigurator.isBatchScan(CLASS, context.getConfiguration());
boolean supportBatchScan = !(tableConfig.isOfflineScan() || tableConfig.shouldUseIsolatedScanners() || tableConfig.shouldUseLocalIterators());
if (batchScan && !supportBatchScan)
throw new IllegalArgumentException("BatchScanner optimization not available for offline scan, isolated, or local iterators");
boolean autoAdjust = tableConfig.shouldAutoAdjustRanges();
if (batchScan && !autoAdjust)
throw new IllegalArgumentException("AutoAdjustRanges must be enabled when using BatchScanner optimization");
List<Range> ranges = autoAdjust ? Range.mergeOverlapping(tableConfig.getRanges()) : tableConfig.getRanges();
if (ranges.isEmpty()) {
ranges = new ArrayList<>(1);
ranges.add(new Range());
}
// get the metadata information for these ranges
Map<String, Map<KeyExtent, List<Range>>> binnedRanges = new HashMap<>();
TabletLocator tl;
try {
if (tableConfig.isOfflineScan()) {
binnedRanges = binOfflineTable(context, tableId, ranges);
while (binnedRanges == null) {
// Some tablets were still online, try again
// sleep randomly between 100 and 200 ms
sleepUninterruptibly(100 + random.nextInt(100), TimeUnit.MILLISECONDS);
binnedRanges = binOfflineTable(context, tableId, ranges);
}
} else {
tl = InputConfigurator.getTabletLocator(CLASS, context.getConfiguration(), tableId);
// its possible that the cache could contain complete, but old information about a tables tablets... so clear it
tl.invalidateCache();
ClientContext clientContext = new ClientContext(getInstance(context), new Credentials(getPrincipal(context), getAuthenticationToken(context)), getClientConfiguration(context));
while (!tl.binRanges(clientContext, ranges, binnedRanges).isEmpty()) {
if (!DeprecationUtil.isMockInstance(instance)) {
String tableIdStr = tableId.canonicalID();
if (!Tables.exists(instance, tableId))
throw new TableDeletedException(tableIdStr);
if (Tables.getTableState(instance, tableId) == TableState.OFFLINE)
throw new TableOfflineException(instance, tableIdStr);
}
binnedRanges.clear();
log.warn("Unable to locate bins for specified ranges. Retrying.");
// sleep randomly between 100 and 200 ms
sleepUninterruptibly(100 + random.nextInt(100), TimeUnit.MILLISECONDS);
tl.invalidateCache();
}
}
} catch (Exception e) {
throw new IOException(e);
}
// all of this code will add either range per each locations or split ranges and add range-location split
// Map from Range to Array of Locations, we only use this if we're don't split
HashMap<Range, ArrayList<String>> splitsToAdd = null;
if (!autoAdjust)
splitsToAdd = new HashMap<>();
HashMap<String, String> hostNameCache = new HashMap<>();
for (Map.Entry<String, Map<KeyExtent, List<Range>>> tserverBin : binnedRanges.entrySet()) {
String ip = tserverBin.getKey().split(":", 2)[0];
String location = hostNameCache.get(ip);
if (location == null) {
InetAddress inetAddress = InetAddress.getByName(ip);
location = inetAddress.getCanonicalHostName();
hostNameCache.put(ip, location);
}
for (Map.Entry<KeyExtent, List<Range>> extentRanges : tserverBin.getValue().entrySet()) {
Range ke = extentRanges.getKey().toDataRange();
if (batchScan) {
// group ranges by tablet to be read by a BatchScanner
ArrayList<Range> clippedRanges = new ArrayList<>();
for (Range r : extentRanges.getValue()) clippedRanges.add(ke.clip(r));
BatchInputSplit split = new BatchInputSplit(tableName, tableId, clippedRanges, new String[] { location });
SplitUtils.updateSplit(split, instance, tableConfig, principal, token, auths, logLevel);
splits.add(split);
} else {
// not grouping by tablet
for (Range r : extentRanges.getValue()) {
if (autoAdjust) {
// divide ranges into smaller ranges, based on the tablets
RangeInputSplit split = new RangeInputSplit(tableName, tableId.canonicalID(), ke.clip(r), new String[] { location });
SplitUtils.updateSplit(split, instance, tableConfig, principal, token, auths, logLevel);
split.setOffline(tableConfig.isOfflineScan());
split.setIsolatedScan(tableConfig.shouldUseIsolatedScanners());
split.setUsesLocalIterators(tableConfig.shouldUseLocalIterators());
splits.add(split);
} else {
// don't divide ranges
ArrayList<String> locations = splitsToAdd.get(r);
if (locations == null)
locations = new ArrayList<>(1);
locations.add(location);
splitsToAdd.put(r, locations);
}
}
}
}
}
if (!autoAdjust)
for (Map.Entry<Range, ArrayList<String>> entry : splitsToAdd.entrySet()) {
RangeInputSplit split = new RangeInputSplit(tableName, tableId.canonicalID(), entry.getKey(), entry.getValue().toArray(new String[0]));
SplitUtils.updateSplit(split, instance, tableConfig, principal, token, auths, logLevel);
split.setOffline(tableConfig.isOfflineScan());
split.setIsolatedScan(tableConfig.shouldUseIsolatedScanners());
split.setUsesLocalIterators(tableConfig.shouldUseLocalIterators());
splits.add(split);
}
}
return splits;
}
use of org.apache.accumulo.core.client.impl.TabletLocator in project accumulo by apache.
the class AbstractInputFormat method getSplits.
/**
* Gets the splits of the tables that have been set on the job by reading the metadata table for the specified ranges.
*
* @return the splits from the tables based on the ranges.
* @throws java.io.IOException
* if a table set on the job doesn't exist or an error occurs initializing the tablet locator
*/
@Override
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
Level logLevel = getLogLevel(job);
log.setLevel(logLevel);
validateOptions(job);
Random random = new Random();
LinkedList<InputSplit> splits = new LinkedList<>();
Map<String, InputTableConfig> tableConfigs = getInputTableConfigs(job);
for (Map.Entry<String, InputTableConfig> tableConfigEntry : tableConfigs.entrySet()) {
String tableName = tableConfigEntry.getKey();
InputTableConfig tableConfig = tableConfigEntry.getValue();
Instance instance = getInstance(job);
Table.ID tableId;
// resolve table name to id once, and use id from this point forward
if (DeprecationUtil.isMockInstance(instance)) {
tableId = Table.ID.of("");
} else {
try {
tableId = Tables.getTableId(instance, tableName);
} catch (TableNotFoundException e) {
throw new IOException(e);
}
}
Authorizations auths = getScanAuthorizations(job);
String principal = getPrincipal(job);
AuthenticationToken token = getAuthenticationToken(job);
boolean batchScan = InputConfigurator.isBatchScan(CLASS, job);
boolean supportBatchScan = !(tableConfig.isOfflineScan() || tableConfig.shouldUseIsolatedScanners() || tableConfig.shouldUseLocalIterators());
if (batchScan && !supportBatchScan)
throw new IllegalArgumentException("BatchScanner optimization not available for offline scan, isolated, or local iterators");
boolean autoAdjust = tableConfig.shouldAutoAdjustRanges();
if (batchScan && !autoAdjust)
throw new IllegalArgumentException("AutoAdjustRanges must be enabled when using BatchScanner optimization");
List<Range> ranges = autoAdjust ? Range.mergeOverlapping(tableConfig.getRanges()) : tableConfig.getRanges();
if (ranges.isEmpty()) {
ranges = new ArrayList<>(1);
ranges.add(new Range());
}
// get the metadata information for these ranges
Map<String, Map<KeyExtent, List<Range>>> binnedRanges = new HashMap<>();
TabletLocator tl;
try {
if (tableConfig.isOfflineScan()) {
binnedRanges = binOfflineTable(job, tableId, ranges);
while (binnedRanges == null) {
// Some tablets were still online, try again
// sleep randomly between 100 and 200 ms
sleepUninterruptibly(100 + random.nextInt(100), TimeUnit.MILLISECONDS);
binnedRanges = binOfflineTable(job, tableId, ranges);
}
} else {
tl = InputConfigurator.getTabletLocator(CLASS, job, tableId);
// its possible that the cache could contain complete, but old information about a tables tablets... so clear it
tl.invalidateCache();
ClientContext context = new ClientContext(getInstance(job), new Credentials(getPrincipal(job), getAuthenticationToken(job)), getClientConfiguration(job));
while (!tl.binRanges(context, ranges, binnedRanges).isEmpty()) {
if (!DeprecationUtil.isMockInstance(instance)) {
String tableIdStr = tableId.canonicalID();
if (!Tables.exists(instance, tableId))
throw new TableDeletedException(tableIdStr);
if (Tables.getTableState(instance, tableId) == TableState.OFFLINE)
throw new TableOfflineException(instance, tableIdStr);
}
binnedRanges.clear();
log.warn("Unable to locate bins for specified ranges. Retrying.");
// sleep randomly between 100 and 200 ms
sleepUninterruptibly(100 + random.nextInt(100), TimeUnit.MILLISECONDS);
tl.invalidateCache();
}
}
} catch (Exception e) {
throw new IOException(e);
}
HashMap<Range, ArrayList<String>> splitsToAdd = null;
if (!autoAdjust)
splitsToAdd = new HashMap<>();
HashMap<String, String> hostNameCache = new HashMap<>();
for (Map.Entry<String, Map<KeyExtent, List<Range>>> tserverBin : binnedRanges.entrySet()) {
String ip = tserverBin.getKey().split(":", 2)[0];
String location = hostNameCache.get(ip);
if (location == null) {
InetAddress inetAddress = InetAddress.getByName(ip);
location = inetAddress.getCanonicalHostName();
hostNameCache.put(ip, location);
}
for (Map.Entry<KeyExtent, List<Range>> extentRanges : tserverBin.getValue().entrySet()) {
Range ke = extentRanges.getKey().toDataRange();
if (batchScan) {
// group ranges by tablet to be read by a BatchScanner
ArrayList<Range> clippedRanges = new ArrayList<>();
for (Range r : extentRanges.getValue()) clippedRanges.add(ke.clip(r));
BatchInputSplit split = new BatchInputSplit(tableName, tableId, clippedRanges, new String[] { location });
SplitUtils.updateSplit(split, instance, tableConfig, principal, token, auths, logLevel);
splits.add(split);
} else {
// not grouping by tablet
for (Range r : extentRanges.getValue()) {
if (autoAdjust) {
// divide ranges into smaller ranges, based on the tablets
RangeInputSplit split = new RangeInputSplit(tableName, tableId.canonicalID(), ke.clip(r), new String[] { location });
SplitUtils.updateSplit(split, instance, tableConfig, principal, token, auths, logLevel);
split.setOffline(tableConfig.isOfflineScan());
split.setIsolatedScan(tableConfig.shouldUseIsolatedScanners());
split.setUsesLocalIterators(tableConfig.shouldUseLocalIterators());
splits.add(split);
} else {
// don't divide ranges
ArrayList<String> locations = splitsToAdd.get(r);
if (locations == null)
locations = new ArrayList<>(1);
locations.add(location);
splitsToAdd.put(r, locations);
}
}
}
}
}
if (!autoAdjust)
for (Map.Entry<Range, ArrayList<String>> entry : splitsToAdd.entrySet()) {
RangeInputSplit split = new RangeInputSplit(tableName, tableId.canonicalID(), entry.getKey(), entry.getValue().toArray(new String[0]));
SplitUtils.updateSplit(split, instance, tableConfig, principal, token, auths, logLevel);
split.setOffline(tableConfig.isOfflineScan());
split.setIsolatedScan(tableConfig.shouldUseIsolatedScanners());
split.setUsesLocalIterators(tableConfig.shouldUseLocalIterators());
splits.add(split);
}
}
return splits.toArray(new InputSplit[splits.size()]);
}
use of org.apache.accumulo.core.client.impl.TabletLocator in project gora by apache.
the class AccumuloStore method getPartitions.
@Override
public List<PartitionQuery<K, T>> getPartitions(Query<K, T> query) throws GoraException {
try {
TabletLocator tl;
if (conn instanceof MockConnector)
tl = new MockTabletLocator();
else
tl = TabletLocator.getLocator(new ClientContext(conn.getInstance(), credentials, AccumuloConfiguration.getTableConfiguration(conn, Tables.getTableId(conn.getInstance(), mapping.tableName))), new Text(Tables.getTableId(conn.getInstance(), mapping.tableName)));
Map<String, Map<KeyExtent, List<Range>>> binnedRanges = new HashMap<>();
tl.invalidateCache();
while (tl.binRanges(new ClientContext(conn.getInstance(), credentials, AccumuloConfiguration.getTableConfiguration(conn, Tables.getTableId(conn.getInstance(), mapping.tableName))), Collections.singletonList(createRange(query)), binnedRanges).size() > 0) {
// TODO log?
if (!Tables.exists(conn.getInstance(), Tables.getTableId(conn.getInstance(), mapping.tableName)))
throw new TableDeletedException(Tables.getTableId(conn.getInstance(), mapping.tableName));
else if (Tables.getTableState(conn.getInstance(), Tables.getTableId(conn.getInstance(), mapping.tableName)) == TableState.OFFLINE)
throw new TableOfflineException(conn.getInstance(), Tables.getTableId(conn.getInstance(), mapping.tableName));
UtilWaitThread.sleep(100);
tl.invalidateCache();
}
List<PartitionQuery<K, T>> ret = new ArrayList<>();
Text startRow = null;
Text endRow = null;
if (query.getStartKey() != null)
startRow = new Text(toBytes(query.getStartKey()));
if (query.getEndKey() != null)
endRow = new Text(toBytes(query.getEndKey()));
// hadoop expects hostnames, accumulo keeps track of IPs... so need to convert
HashMap<String, String> hostNameCache = new HashMap<>();
for (Entry<String, Map<KeyExtent, List<Range>>> entry : binnedRanges.entrySet()) {
String ip = entry.getKey().split(":", 2)[0];
String location = hostNameCache.get(ip);
if (location == null) {
InetAddress inetAddress = InetAddress.getByName(ip);
location = inetAddress.getHostName();
hostNameCache.put(ip, location);
}
Map<KeyExtent, List<Range>> tablets = entry.getValue();
for (KeyExtent ke : tablets.keySet()) {
K startKey = null;
if (startRow == null || !ke.contains(startRow)) {
if (ke.getPrevEndRow() != null) {
startKey = followingKey(encoder, getKeyClass(), getBytes(ke.getPrevEndRow()));
}
} else {
startKey = fromBytes(getKeyClass(), getBytes(startRow));
}
K endKey = null;
if (endRow == null || !ke.contains(endRow)) {
if (ke.getEndRow() != null)
endKey = lastPossibleKey(encoder, getKeyClass(), getBytes(ke.getEndRow()));
} else {
endKey = fromBytes(getKeyClass(), getBytes(endRow));
}
PartitionQueryImpl<K, T> pqi = new PartitionQueryImpl<>(query, startKey, endKey, location);
pqi.setConf(getConf());
ret.add(pqi);
}
}
return ret;
} catch (Exception e) {
throw new GoraException(e);
}
}
Aggregations