use of org.apache.htrace.wrappers.TraceRunnable in project accumulo by apache.
the class BulkImporter method estimateSizes.
private Map<Path, List<AssignmentInfo>> estimateSizes(final AccumuloConfiguration acuConf, final Configuration conf, final VolumeManager vm, Map<Path, List<TabletLocation>> assignments, Collection<Path> paths, int numThreads) {
long t1 = System.currentTimeMillis();
final Map<Path, Long> mapFileSizes = new TreeMap<>();
try {
for (Path path : paths) {
FileSystem fs = vm.getVolumeByPath(path).getFileSystem();
mapFileSizes.put(path, fs.getContentSummary(path).getLength());
}
} catch (IOException e) {
log.error("Failed to get map files in for {}: {}", paths, e.getMessage(), e);
throw new RuntimeException(e);
}
final Map<Path, List<AssignmentInfo>> ais = Collections.synchronizedMap(new TreeMap<Path, List<AssignmentInfo>>());
ExecutorService threadPool = Executors.newFixedThreadPool(numThreads, new NamingThreadFactory("estimateSizes"));
for (final Entry<Path, List<TabletLocation>> entry : assignments.entrySet()) {
if (entry.getValue().size() == 1) {
TabletLocation tabletLocation = entry.getValue().get(0);
// if the tablet completely contains the map file, there is no
// need to estimate its
// size
ais.put(entry.getKey(), Collections.singletonList(new AssignmentInfo(tabletLocation.tablet_extent, mapFileSizes.get(entry.getKey()))));
continue;
}
Runnable estimationTask = new Runnable() {
@Override
public void run() {
Map<KeyExtent, Long> estimatedSizes = null;
try {
estimatedSizes = FileUtil.estimateSizes(acuConf, entry.getKey(), mapFileSizes.get(entry.getKey()), extentsOf(entry.getValue()), conf, vm);
} catch (IOException e) {
log.warn("Failed to estimate map file sizes {}", e.getMessage());
}
if (estimatedSizes == null) {
// estimation failed, do a simple estimation
estimatedSizes = new TreeMap<>();
long estSize = (long) (mapFileSizes.get(entry.getKey()) / (double) entry.getValue().size());
for (TabletLocation tl : entry.getValue()) estimatedSizes.put(tl.tablet_extent, estSize);
}
List<AssignmentInfo> assignmentInfoList = new ArrayList<>(estimatedSizes.size());
for (Entry<KeyExtent, Long> entry2 : estimatedSizes.entrySet()) assignmentInfoList.add(new AssignmentInfo(entry2.getKey(), entry2.getValue()));
ais.put(entry.getKey(), assignmentInfoList);
}
};
threadPool.submit(new TraceRunnable(new LoggingRunnable(log, estimationTask)));
}
threadPool.shutdown();
while (!threadPool.isTerminated()) {
try {
threadPool.awaitTermination(60, TimeUnit.SECONDS);
} catch (InterruptedException e) {
log.error("Encountered InterruptedException while waiting for the threadPool to terminate.", e);
throw new RuntimeException(e);
}
}
long t2 = System.currentTimeMillis();
log.debug(String.format("Estimated map files sizes in %6.2f secs", (t2 - t1) / 1000.0));
return ais;
}
use of org.apache.htrace.wrappers.TraceRunnable in project accumulo by apache.
the class BulkImporter method importFiles.
public AssignmentStats importFiles(List<String> files, Path failureDir) throws IOException, AccumuloException, AccumuloSecurityException, ThriftTableOperationException {
int numThreads = context.getConfiguration().getCount(Property.TSERV_BULK_PROCESS_THREADS);
int numAssignThreads = context.getConfiguration().getCount(Property.TSERV_BULK_ASSIGNMENT_THREADS);
timer = new StopWatch<>(Timers.class);
timer.start(Timers.TOTAL);
Configuration conf = CachedConfiguration.getInstance();
VolumeManagerImpl.get(context.getConfiguration());
final VolumeManager fs = VolumeManagerImpl.get(context.getConfiguration());
Set<Path> paths = new HashSet<>();
for (String file : files) {
paths.add(new Path(file));
}
AssignmentStats assignmentStats = new AssignmentStats(paths.size());
final Map<Path, List<KeyExtent>> completeFailures = Collections.synchronizedSortedMap(new TreeMap<Path, List<KeyExtent>>());
ClientService.Client client = null;
final TabletLocator locator = TabletLocator.getLocator(context, Table.ID.of(tableId));
try {
final Map<Path, List<TabletLocation>> assignments = Collections.synchronizedSortedMap(new TreeMap<Path, List<TabletLocation>>());
timer.start(Timers.EXAMINE_MAP_FILES);
ExecutorService threadPool = Executors.newFixedThreadPool(numThreads, new NamingThreadFactory("findOverlapping"));
for (Path path : paths) {
final Path mapFile = path;
Runnable getAssignments = new Runnable() {
@Override
public void run() {
List<TabletLocation> tabletsToAssignMapFileTo = Collections.emptyList();
try {
tabletsToAssignMapFileTo = findOverlappingTablets(context, fs, locator, mapFile);
} catch (Exception ex) {
log.warn("Unable to find tablets that overlap file " + mapFile.toString(), ex);
}
log.debug("Map file {} found to overlap {} tablets", mapFile, tabletsToAssignMapFileTo.size());
if (tabletsToAssignMapFileTo.size() == 0) {
List<KeyExtent> empty = Collections.emptyList();
completeFailures.put(mapFile, empty);
} else
assignments.put(mapFile, tabletsToAssignMapFileTo);
}
};
threadPool.submit(new TraceRunnable(new LoggingRunnable(log, getAssignments)));
}
threadPool.shutdown();
while (!threadPool.isTerminated()) {
try {
threadPool.awaitTermination(60, TimeUnit.SECONDS);
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
}
timer.stop(Timers.EXAMINE_MAP_FILES);
assignmentStats.attemptingAssignments(assignments);
Map<Path, List<KeyExtent>> assignmentFailures = assignMapFiles(context, conf, fs, tableId, assignments, paths, numAssignThreads, numThreads);
assignmentStats.assignmentsFailed(assignmentFailures);
Map<Path, Integer> failureCount = new TreeMap<>();
for (Entry<Path, List<KeyExtent>> entry : assignmentFailures.entrySet()) failureCount.put(entry.getKey(), 1);
long sleepTime = 2 * 1000;
while (assignmentFailures.size() > 0) {
sleepTime = Math.min(sleepTime * 2, 60 * 1000);
locator.invalidateCache();
// assumption about assignment failures is that it caused by a split
// happening or a missing location
//
// for splits we need to find children key extents that cover the
// same key range and are contiguous (no holes, no overlap)
timer.start(Timers.SLEEP);
sleepUninterruptibly(sleepTime, TimeUnit.MILLISECONDS);
timer.stop(Timers.SLEEP);
log.debug("Trying to assign {} map files that previously failed on some key extents", assignmentFailures.size());
assignments.clear();
// assign to
for (Entry<Path, List<KeyExtent>> entry : assignmentFailures.entrySet()) {
Iterator<KeyExtent> keListIter = entry.getValue().iterator();
List<TabletLocation> tabletsToAssignMapFileTo = new ArrayList<>();
while (keListIter.hasNext()) {
KeyExtent ke = keListIter.next();
timer.start(Timers.QUERY_METADATA);
try {
tabletsToAssignMapFileTo.addAll(findOverlappingTablets(context, fs, locator, entry.getKey(), ke));
keListIter.remove();
} catch (Exception ex) {
log.warn("Exception finding overlapping tablets, will retry tablet " + ke, ex);
}
timer.stop(Timers.QUERY_METADATA);
}
if (tabletsToAssignMapFileTo.size() > 0)
assignments.put(entry.getKey(), tabletsToAssignMapFileTo);
}
assignmentStats.attemptingAssignments(assignments);
Map<Path, List<KeyExtent>> assignmentFailures2 = assignMapFiles(context, conf, fs, tableId, assignments, paths, numAssignThreads, numThreads);
assignmentStats.assignmentsFailed(assignmentFailures2);
// merge assignmentFailures2 into assignmentFailures
for (Entry<Path, List<KeyExtent>> entry : assignmentFailures2.entrySet()) {
assignmentFailures.get(entry.getKey()).addAll(entry.getValue());
Integer fc = failureCount.get(entry.getKey());
if (fc == null)
fc = 0;
failureCount.put(entry.getKey(), fc + 1);
}
// remove map files that have no more key extents to assign
Iterator<Entry<Path, List<KeyExtent>>> afIter = assignmentFailures.entrySet().iterator();
while (afIter.hasNext()) {
Entry<Path, List<KeyExtent>> entry = afIter.next();
if (entry.getValue().size() == 0)
afIter.remove();
}
Set<Entry<Path, Integer>> failureIter = failureCount.entrySet();
for (Entry<Path, Integer> entry : failureIter) {
int retries = context.getConfiguration().getCount(Property.TSERV_BULK_RETRY);
if (entry.getValue() > retries && assignmentFailures.get(entry.getKey()) != null) {
log.error("Map file {} failed more than {} times, giving up.", entry.getKey(), retries);
completeFailures.put(entry.getKey(), assignmentFailures.get(entry.getKey()));
assignmentFailures.remove(entry.getKey());
}
}
}
assignmentStats.assignmentsAbandoned(completeFailures);
Set<Path> failedFailures = processFailures(completeFailures);
assignmentStats.unrecoveredMapFiles(failedFailures);
timer.stop(Timers.TOTAL);
printReport(paths);
return assignmentStats;
} finally {
if (client != null) {
ServerClient.close(client);
}
}
}
use of org.apache.htrace.wrappers.TraceRunnable in project accumulo by apache.
the class CompactionQueue method poll.
@Override
public synchronized TraceRunnable poll() {
if (task.size() == 0)
return null;
TraceRunnable min = Collections.min(task, comparator);
Iterator<TraceRunnable> iterator = task.iterator();
while (iterator.hasNext()) {
if (iterator.next() == min) {
iterator.remove();
return min;
}
}
throw new IllegalStateException("Minimum object found, but not there when removing");
}
use of org.apache.htrace.wrappers.TraceRunnable in project accumulo by apache.
the class TabletServerBatchReaderIterator method doLookups.
private void doLookups(Map<String, Map<KeyExtent, List<Range>>> binnedRanges, final ResultReceiver receiver, List<Column> columns) {
if (timedoutServers.containsAll(binnedRanges.keySet())) {
// all servers have timed out
throw new TimedOutException(timedoutServers);
}
// when there are lots of threads and a few tablet servers
// it is good to break request to tablet servers up, the
// following code determines if this is the case
int maxTabletsPerRequest = Integer.MAX_VALUE;
if (numThreads / binnedRanges.size() > 1) {
int totalNumberOfTablets = 0;
for (Entry<String, Map<KeyExtent, List<Range>>> entry : binnedRanges.entrySet()) {
totalNumberOfTablets += entry.getValue().size();
}
maxTabletsPerRequest = totalNumberOfTablets / numThreads;
if (maxTabletsPerRequest == 0) {
maxTabletsPerRequest = 1;
}
}
Map<KeyExtent, List<Range>> failures = new HashMap<>();
if (timedoutServers.size() > 0) {
// go ahead and fail any timed out servers
for (Iterator<Entry<String, Map<KeyExtent, List<Range>>>> iterator = binnedRanges.entrySet().iterator(); iterator.hasNext(); ) {
Entry<String, Map<KeyExtent, List<Range>>> entry = iterator.next();
if (timedoutServers.contains(entry.getKey())) {
failures.putAll(entry.getValue());
iterator.remove();
}
}
}
// randomize tabletserver order... this will help when there are multiple
// batch readers and writers running against accumulo
List<String> locations = new ArrayList<>(binnedRanges.keySet());
Collections.shuffle(locations);
List<QueryTask> queryTasks = new ArrayList<>();
for (final String tsLocation : locations) {
final Map<KeyExtent, List<Range>> tabletsRanges = binnedRanges.get(tsLocation);
if (maxTabletsPerRequest == Integer.MAX_VALUE || tabletsRanges.size() == 1) {
QueryTask queryTask = new QueryTask(tsLocation, tabletsRanges, failures, receiver, columns);
queryTasks.add(queryTask);
} else {
HashMap<KeyExtent, List<Range>> tabletSubset = new HashMap<>();
for (Entry<KeyExtent, List<Range>> entry : tabletsRanges.entrySet()) {
tabletSubset.put(entry.getKey(), entry.getValue());
if (tabletSubset.size() >= maxTabletsPerRequest) {
QueryTask queryTask = new QueryTask(tsLocation, tabletSubset, failures, receiver, columns);
queryTasks.add(queryTask);
tabletSubset = new HashMap<>();
}
}
if (tabletSubset.size() > 0) {
QueryTask queryTask = new QueryTask(tsLocation, tabletSubset, failures, receiver, columns);
queryTasks.add(queryTask);
}
}
}
final Semaphore semaphore = new Semaphore(queryTasks.size());
semaphore.acquireUninterruptibly(queryTasks.size());
for (QueryTask queryTask : queryTasks) {
queryTask.setSemaphore(semaphore, queryTasks.size());
queryThreadPool.execute(new TraceRunnable(queryTask));
}
}
Aggregations