use of org.apache.accumulo.core.clientImpl.TabletLocator in project accumulo by apache.
the class AccumuloRecordReader method getSplits.
/**
* Gets the splits of the tables that have been set on the job by reading the metadata table for
* the specified ranges.
*/
public static InputSplit[] getSplits(JobConf job, Class<?> callingClass) throws IOException {
validateOptions(job, callingClass);
LinkedList<InputSplit> splits = new LinkedList<>();
Map<String, InputTableConfig> tableConfigs = InputConfigurator.getInputTableConfigs(callingClass, job);
try (AccumuloClient client = createClient(job, callingClass);
var context = ((ClientContext) client)) {
for (Map.Entry<String, InputTableConfig> tableConfigEntry : tableConfigs.entrySet()) {
String tableName = tableConfigEntry.getKey();
InputTableConfig tableConfig = tableConfigEntry.getValue();
TableId tableId;
// resolve table name to id once, and use id from this point forward
try {
tableId = context.getTableId(tableName);
} catch (TableNotFoundException e) {
throw new IOException(e);
}
boolean batchScan = InputConfigurator.isBatchScan(callingClass, job);
boolean supportBatchScan = !(tableConfig.isOfflineScan() || tableConfig.shouldUseIsolatedScanners() || tableConfig.shouldUseLocalIterators());
if (batchScan && !supportBatchScan)
throw new IllegalArgumentException("BatchScanner optimization not available for offline" + " scan, isolated, or local iterators");
boolean autoAdjust = tableConfig.shouldAutoAdjustRanges();
if (batchScan && !autoAdjust)
throw new IllegalArgumentException("AutoAdjustRanges must be enabled when using BatchScanner optimization");
List<Range> ranges = autoAdjust ? Range.mergeOverlapping(tableConfig.getRanges()) : tableConfig.getRanges();
if (ranges.isEmpty()) {
ranges = new ArrayList<>(1);
ranges.add(new Range());
}
// get the metadata information for these ranges
Map<String, Map<KeyExtent, List<Range>>> binnedRanges = new HashMap<>();
TabletLocator tl;
try {
if (tableConfig.isOfflineScan()) {
binnedRanges = binOfflineTable(job, tableId, ranges, callingClass);
while (binnedRanges == null) {
// Some tablets were still online, try again
// sleep randomly between 100 and 200 ms
sleepUninterruptibly(100 + random.nextInt(100), TimeUnit.MILLISECONDS);
binnedRanges = binOfflineTable(job, tableId, ranges, callingClass);
}
} else {
tl = InputConfigurator.getTabletLocator(callingClass, job, tableId);
// its possible that the cache could contain complete, but old information about a
// tables
// tablets... so clear it
tl.invalidateCache();
while (!tl.binRanges(context, ranges, binnedRanges).isEmpty()) {
context.requireNotDeleted(tableId);
context.requireNotOffline(tableId, tableName);
binnedRanges.clear();
log.warn("Unable to locate bins for specified ranges. Retrying.");
// sleep randomly between 100 and 200 ms
sleepUninterruptibly(100 + random.nextInt(100), TimeUnit.MILLISECONDS);
tl.invalidateCache();
}
}
} catch (TableOfflineException | TableNotFoundException | AccumuloException | AccumuloSecurityException e) {
throw new IOException(e);
}
HashMap<Range, ArrayList<String>> splitsToAdd = null;
if (!autoAdjust)
splitsToAdd = new HashMap<>();
HashMap<String, String> hostNameCache = new HashMap<>();
for (Map.Entry<String, Map<KeyExtent, List<Range>>> tserverBin : binnedRanges.entrySet()) {
String ip = tserverBin.getKey().split(":", 2)[0];
String location = hostNameCache.get(ip);
if (location == null) {
InetAddress inetAddress = InetAddress.getByName(ip);
location = inetAddress.getCanonicalHostName();
hostNameCache.put(ip, location);
}
for (Map.Entry<KeyExtent, List<Range>> extentRanges : tserverBin.getValue().entrySet()) {
Range ke = extentRanges.getKey().toDataRange();
if (batchScan) {
// group ranges by tablet to be read by a BatchScanner
ArrayList<Range> clippedRanges = new ArrayList<>();
for (Range r : extentRanges.getValue()) clippedRanges.add(ke.clip(r));
BatchInputSplit split = new BatchInputSplit(tableName, tableId, clippedRanges, new String[] { location });
SplitUtils.updateSplit(split, tableConfig);
splits.add(split);
} else {
// not grouping by tablet
for (Range r : extentRanges.getValue()) {
if (autoAdjust) {
// divide ranges into smaller ranges, based on the tablets
RangeInputSplit split = new RangeInputSplit(tableName, tableId.canonical(), ke.clip(r), new String[] { location });
SplitUtils.updateSplit(split, tableConfig);
split.setOffline(tableConfig.isOfflineScan());
split.setIsolatedScan(tableConfig.shouldUseIsolatedScanners());
split.setUsesLocalIterators(tableConfig.shouldUseLocalIterators());
splits.add(split);
} else {
// don't divide ranges
ArrayList<String> locations = splitsToAdd.get(r);
if (locations == null)
locations = new ArrayList<>(1);
locations.add(location);
splitsToAdd.put(r, locations);
}
}
}
}
}
if (!autoAdjust)
for (Map.Entry<Range, ArrayList<String>> entry : splitsToAdd.entrySet()) {
RangeInputSplit split = new RangeInputSplit(tableName, tableId.canonical(), entry.getKey(), entry.getValue().toArray(new String[0]));
SplitUtils.updateSplit(split, tableConfig);
split.setOffline(tableConfig.isOfflineScan());
split.setIsolatedScan(tableConfig.shouldUseIsolatedScanners());
split.setUsesLocalIterators(tableConfig.shouldUseLocalIterators());
splits.add(split);
}
}
}
return splits.toArray(new InputSplit[splits.size()]);
}
use of org.apache.accumulo.core.clientImpl.TabletLocator in project accumulo by apache.
the class AbstractInputFormat method getSplits.
/**
* Gets the splits of the tables that have been set on the job by reading the metadata table for
* the specified ranges.
*
* @return the splits from the tables based on the ranges.
* @throws java.io.IOException
* if a table set on the job doesn't exist or an error occurs initializing the tablet
* locator
*/
@Override
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
Level logLevel = getLogLevel(job);
log.setLevel(logLevel);
validateOptions(job);
LinkedList<InputSplit> splits = new LinkedList<>();
Map<String, org.apache.accumulo.core.client.mapreduce.InputTableConfig> tableConfigs = getInputTableConfigs(job);
for (Map.Entry<String, org.apache.accumulo.core.client.mapreduce.InputTableConfig> tableConfigEntry : tableConfigs.entrySet()) {
String tableName = tableConfigEntry.getKey();
org.apache.accumulo.core.client.mapreduce.InputTableConfig tableConfig = tableConfigEntry.getValue();
ClientContext client;
try {
client = InputConfigurator.client(CLASS, job);
} catch (AccumuloException | AccumuloSecurityException e) {
throw new IOException(e);
}
TableId tableId;
// resolve table name to id once, and use id from this point forward
try {
tableId = client.getTableId(tableName);
} catch (TableNotFoundException e) {
throw new IOException(e);
}
boolean batchScan = InputConfigurator.isBatchScan(CLASS, job);
boolean supportBatchScan = !(tableConfig.isOfflineScan() || tableConfig.shouldUseIsolatedScanners() || tableConfig.shouldUseLocalIterators());
if (batchScan && !supportBatchScan)
throw new IllegalArgumentException("BatchScanner optimization not available for offline" + " scan, isolated, or local iterators");
boolean autoAdjust = tableConfig.shouldAutoAdjustRanges();
if (batchScan && !autoAdjust)
throw new IllegalArgumentException("AutoAdjustRanges must be enabled when using BatchScanner optimization");
List<Range> ranges = autoAdjust ? Range.mergeOverlapping(tableConfig.getRanges()) : tableConfig.getRanges();
if (ranges.isEmpty()) {
ranges = new ArrayList<>(1);
ranges.add(new Range());
}
// get the metadata information for these ranges
Map<String, Map<KeyExtent, List<Range>>> binnedRanges = new HashMap<>();
TabletLocator tl;
try {
if (tableConfig.isOfflineScan()) {
binnedRanges = binOfflineTable(job, tableId, ranges);
while (binnedRanges == null) {
// Some tablets were still online, try again
// sleep randomly between 100 and 200 ms
sleepUninterruptibly(100 + random.nextInt(100), TimeUnit.MILLISECONDS);
binnedRanges = binOfflineTable(job, tableId, ranges);
}
} else {
tl = TabletLocator.getLocator(client, tableId);
// its possible that the cache could contain complete, but old information about a
// tables tablets... so clear it
tl.invalidateCache();
while (!tl.binRanges(client, ranges, binnedRanges).isEmpty()) {
client.requireNotDeleted(tableId);
client.requireNotOffline(tableId, tableName);
binnedRanges.clear();
log.warn("Unable to locate bins for specified ranges. Retrying.");
// sleep randomly between 100 and 200 ms
sleepUninterruptibly(100 + random.nextInt(100), TimeUnit.MILLISECONDS);
tl.invalidateCache();
}
}
} catch (Exception e) {
throw new IOException(e);
}
// all of this code will add either range per each locations or split ranges and add
// range-location split
// Map from Range to Array of Locations, we only use this if we're don't split
HashMap<Range, ArrayList<String>> splitsToAdd = null;
if (!autoAdjust)
splitsToAdd = new HashMap<>();
HashMap<String, String> hostNameCache = new HashMap<>();
for (Map.Entry<String, Map<KeyExtent, List<Range>>> tserverBin : binnedRanges.entrySet()) {
String ip = tserverBin.getKey().split(":", 2)[0];
String location = hostNameCache.get(ip);
if (location == null) {
InetAddress inetAddress = InetAddress.getByName(ip);
location = inetAddress.getCanonicalHostName();
hostNameCache.put(ip, location);
}
for (Map.Entry<KeyExtent, List<Range>> extentRanges : tserverBin.getValue().entrySet()) {
Range ke = extentRanges.getKey().toDataRange();
if (batchScan) {
// group ranges by tablet to be read by a BatchScanner
ArrayList<Range> clippedRanges = new ArrayList<>();
for (Range r : extentRanges.getValue()) clippedRanges.add(ke.clip(r));
org.apache.accumulo.core.clientImpl.mapred.BatchInputSplit split = new org.apache.accumulo.core.clientImpl.mapred.BatchInputSplit(tableName, tableId, clippedRanges, new String[] { location });
org.apache.accumulo.core.clientImpl.mapreduce.SplitUtils.updateSplit(split, tableConfig, logLevel);
splits.add(split);
} else {
// not grouping by tablet
for (Range r : extentRanges.getValue()) {
if (autoAdjust) {
// divide ranges into smaller ranges, based on the tablets
RangeInputSplit split = new RangeInputSplit(tableName, tableId.canonical(), ke.clip(r), new String[] { location });
org.apache.accumulo.core.clientImpl.mapreduce.SplitUtils.updateSplit(split, tableConfig, logLevel);
split.setOffline(tableConfig.isOfflineScan());
split.setIsolatedScan(tableConfig.shouldUseIsolatedScanners());
split.setUsesLocalIterators(tableConfig.shouldUseLocalIterators());
splits.add(split);
} else {
// don't divide ranges
ArrayList<String> locations = splitsToAdd.get(r);
if (locations == null)
locations = new ArrayList<>(1);
locations.add(location);
splitsToAdd.put(r, locations);
}
}
}
}
}
if (!autoAdjust)
for (Map.Entry<Range, ArrayList<String>> entry : splitsToAdd.entrySet()) {
RangeInputSplit split = new RangeInputSplit(tableName, tableId.canonical(), entry.getKey(), entry.getValue().toArray(new String[0]));
org.apache.accumulo.core.clientImpl.mapreduce.SplitUtils.updateSplit(split, tableConfig, logLevel);
split.setOffline(tableConfig.isOfflineScan());
split.setIsolatedScan(tableConfig.shouldUseIsolatedScanners());
split.setUsesLocalIterators(tableConfig.shouldUseLocalIterators());
splits.add(split);
}
}
return splits.toArray(new InputSplit[splits.size()]);
}
use of org.apache.accumulo.core.clientImpl.TabletLocator in project accumulo by apache.
the class BulkImporter method importFiles.
public AssignmentStats importFiles(List<String> files) {
int numThreads = context.getConfiguration().getCount(Property.TSERV_BULK_PROCESS_THREADS);
int numAssignThreads = context.getConfiguration().getCount(Property.TSERV_BULK_ASSIGNMENT_THREADS);
timer = new StopWatch<>(Timers.class);
timer.start(Timers.TOTAL);
final VolumeManager fs = context.getVolumeManager();
Set<Path> paths = new HashSet<>();
for (String file : files) {
paths.add(new Path(file));
}
AssignmentStats assignmentStats = new AssignmentStats(paths.size());
final Map<Path, List<KeyExtent>> completeFailures = Collections.synchronizedSortedMap(new TreeMap<>());
ClientService.Client client = null;
final TabletLocator locator = TabletLocator.getLocator(context, TableId.of(tableId));
try {
final Map<Path, List<TabletLocation>> assignments = Collections.synchronizedSortedMap(new TreeMap<>());
timer.start(Timers.EXAMINE_MAP_FILES);
ExecutorService threadPool = ThreadPools.createFixedThreadPool(numThreads, "findOverlapping", false);
for (Path path : paths) {
final Path mapFile = path;
Runnable getAssignments = new Runnable() {
@Override
public void run() {
List<TabletLocation> tabletsToAssignMapFileTo = Collections.emptyList();
try {
tabletsToAssignMapFileTo = findOverlappingTablets(context, fs, locator, mapFile);
} catch (Exception ex) {
log.warn("Unable to find tablets that overlap file " + mapFile, ex);
}
log.debug("Map file {} found to overlap {} tablets", mapFile, tabletsToAssignMapFileTo.size());
if (tabletsToAssignMapFileTo.isEmpty()) {
List<KeyExtent> empty = Collections.emptyList();
completeFailures.put(mapFile, empty);
} else
assignments.put(mapFile, tabletsToAssignMapFileTo);
}
};
threadPool.submit(getAssignments);
}
threadPool.shutdown();
while (!threadPool.isTerminated()) {
try {
threadPool.awaitTermination(60, TimeUnit.SECONDS);
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
}
timer.stop(Timers.EXAMINE_MAP_FILES);
assignmentStats.attemptingAssignments(assignments);
Map<Path, List<KeyExtent>> assignmentFailures = assignMapFiles(fs, assignments, paths, numAssignThreads, numThreads);
assignmentStats.assignmentsFailed(assignmentFailures);
Map<Path, Integer> failureCount = new TreeMap<>();
for (Entry<Path, List<KeyExtent>> entry : assignmentFailures.entrySet()) failureCount.put(entry.getKey(), 1);
long sleepTime = 2_000;
while (!assignmentFailures.isEmpty()) {
sleepTime = Math.min(sleepTime * 2, MINUTES.toMillis(1));
locator.invalidateCache();
// assumption about assignment failures is that it caused by a split
// happening or a missing location
//
// for splits we need to find children key extents that cover the
// same key range and are contiguous (no holes, no overlap)
timer.start(Timers.SLEEP);
sleepUninterruptibly(sleepTime, TimeUnit.MILLISECONDS);
timer.stop(Timers.SLEEP);
log.debug("Trying to assign {} map files that previously failed on some key extents", assignmentFailures.size());
assignments.clear();
// assign to
for (Entry<Path, List<KeyExtent>> entry : assignmentFailures.entrySet()) {
Iterator<KeyExtent> keListIter = entry.getValue().iterator();
List<TabletLocation> tabletsToAssignMapFileTo = new ArrayList<>();
while (keListIter.hasNext()) {
KeyExtent ke = keListIter.next();
timer.start(Timers.QUERY_METADATA);
try {
tabletsToAssignMapFileTo.addAll(findOverlappingTablets(context, fs, locator, entry.getKey(), ke));
keListIter.remove();
} catch (Exception ex) {
log.warn("Exception finding overlapping tablets, will retry tablet " + ke, ex);
}
timer.stop(Timers.QUERY_METADATA);
}
if (!tabletsToAssignMapFileTo.isEmpty())
assignments.put(entry.getKey(), tabletsToAssignMapFileTo);
}
assignmentStats.attemptingAssignments(assignments);
Map<Path, List<KeyExtent>> assignmentFailures2 = assignMapFiles(fs, assignments, paths, numAssignThreads, numThreads);
assignmentStats.assignmentsFailed(assignmentFailures2);
// merge assignmentFailures2 into assignmentFailures
for (Entry<Path, List<KeyExtent>> entry : assignmentFailures2.entrySet()) {
assignmentFailures.get(entry.getKey()).addAll(entry.getValue());
Integer fc = failureCount.get(entry.getKey());
if (fc == null)
fc = 0;
failureCount.put(entry.getKey(), fc + 1);
}
// remove map files that have no more key extents to assign
assignmentFailures.values().removeIf(List::isEmpty);
Set<Entry<Path, Integer>> failureIter = failureCount.entrySet();
for (Entry<Path, Integer> entry : failureIter) {
int retries = context.getConfiguration().getCount(Property.TSERV_BULK_RETRY);
if (entry.getValue() > retries && assignmentFailures.get(entry.getKey()) != null) {
log.error("Map file {} failed more than {} times, giving up.", entry.getKey(), retries);
completeFailures.put(entry.getKey(), assignmentFailures.get(entry.getKey()));
assignmentFailures.remove(entry.getKey());
}
}
}
assignmentStats.assignmentsAbandoned(completeFailures);
Set<Path> failedFailures = processFailures(completeFailures);
assignmentStats.unrecoveredMapFiles(failedFailures);
timer.stop(Timers.TOTAL);
printReport(paths);
return assignmentStats;
} finally {
if (client != null) {
ServerClient.close(client, context);
}
}
}
Aggregations