use of org.apache.accumulo.core.client.impl.TabletLocator.TabletLocation in project accumulo by apache.
the class BulkImporter method findOverlappingTablets.
public static List<TabletLocation> findOverlappingTablets(ClientContext context, VolumeManager vm, TabletLocator locator, Path file, Text startRow, Text endRow) throws Exception {
List<TabletLocation> result = new ArrayList<>();
Collection<ByteSequence> columnFamilies = Collections.emptyList();
String filename = file.toString();
// log.debug(filename + " finding overlapping tablets " + startRow + " -> " + endRow);
FileSystem fs = vm.getVolumeByPath(file).getFileSystem();
try (FileSKVIterator reader = FileOperations.getInstance().newReaderBuilder().forFile(filename, fs, fs.getConf()).withTableConfiguration(context.getConfiguration()).seekToBeginning().build()) {
Text row = startRow;
if (row == null)
row = new Text();
while (true) {
// log.debug(filename + " Seeking to row " + row);
reader.seek(new Range(row, null), columnFamilies, false);
if (!reader.hasTop()) {
// log.debug(filename + " not found");
break;
}
row = reader.getTopKey().getRow();
TabletLocation tabletLocation = locator.locateTablet(context, row, false, true);
// log.debug(filename + " found row " + row + " at location " + tabletLocation);
result.add(tabletLocation);
row = tabletLocation.tablet_extent.getEndRow();
if (row != null && (endRow == null || row.compareTo(endRow) < 0)) {
row = new Text(row);
row.append(byte0, 0, byte0.length);
} else
break;
}
}
// log.debug(filename + " to be sent to " + result);
return result;
}
use of org.apache.accumulo.core.client.impl.TabletLocator.TabletLocation in project accumulo by apache.
the class BulkImporter method estimateSizes.
private Map<Path, List<AssignmentInfo>> estimateSizes(final AccumuloConfiguration acuConf, final Configuration conf, final VolumeManager vm, Map<Path, List<TabletLocation>> assignments, Collection<Path> paths, int numThreads) {
long t1 = System.currentTimeMillis();
final Map<Path, Long> mapFileSizes = new TreeMap<>();
try {
for (Path path : paths) {
FileSystem fs = vm.getVolumeByPath(path).getFileSystem();
mapFileSizes.put(path, fs.getContentSummary(path).getLength());
}
} catch (IOException e) {
log.error("Failed to get map files in for {}: {}", paths, e.getMessage(), e);
throw new RuntimeException(e);
}
final Map<Path, List<AssignmentInfo>> ais = Collections.synchronizedMap(new TreeMap<Path, List<AssignmentInfo>>());
ExecutorService threadPool = Executors.newFixedThreadPool(numThreads, new NamingThreadFactory("estimateSizes"));
for (final Entry<Path, List<TabletLocation>> entry : assignments.entrySet()) {
if (entry.getValue().size() == 1) {
TabletLocation tabletLocation = entry.getValue().get(0);
// if the tablet completely contains the map file, there is no
// need to estimate its
// size
ais.put(entry.getKey(), Collections.singletonList(new AssignmentInfo(tabletLocation.tablet_extent, mapFileSizes.get(entry.getKey()))));
continue;
}
Runnable estimationTask = new Runnable() {
@Override
public void run() {
Map<KeyExtent, Long> estimatedSizes = null;
try {
estimatedSizes = FileUtil.estimateSizes(acuConf, entry.getKey(), mapFileSizes.get(entry.getKey()), extentsOf(entry.getValue()), conf, vm);
} catch (IOException e) {
log.warn("Failed to estimate map file sizes {}", e.getMessage());
}
if (estimatedSizes == null) {
// estimation failed, do a simple estimation
estimatedSizes = new TreeMap<>();
long estSize = (long) (mapFileSizes.get(entry.getKey()) / (double) entry.getValue().size());
for (TabletLocation tl : entry.getValue()) estimatedSizes.put(tl.tablet_extent, estSize);
}
List<AssignmentInfo> assignmentInfoList = new ArrayList<>(estimatedSizes.size());
for (Entry<KeyExtent, Long> entry2 : estimatedSizes.entrySet()) assignmentInfoList.add(new AssignmentInfo(entry2.getKey(), entry2.getValue()));
ais.put(entry.getKey(), assignmentInfoList);
}
};
threadPool.submit(new TraceRunnable(new LoggingRunnable(log, estimationTask)));
}
threadPool.shutdown();
while (!threadPool.isTerminated()) {
try {
threadPool.awaitTermination(60, TimeUnit.SECONDS);
} catch (InterruptedException e) {
log.error("Encountered InterruptedException while waiting for the threadPool to terminate.", e);
throw new RuntimeException(e);
}
}
long t2 = System.currentTimeMillis();
log.debug(String.format("Estimated map files sizes in %6.2f secs", (t2 - t1) / 1000.0));
return ais;
}
use of org.apache.accumulo.core.client.impl.TabletLocator.TabletLocation in project accumulo by apache.
the class BulkImporter method importFiles.
public AssignmentStats importFiles(List<String> files, Path failureDir) throws IOException, AccumuloException, AccumuloSecurityException, ThriftTableOperationException {
int numThreads = context.getConfiguration().getCount(Property.TSERV_BULK_PROCESS_THREADS);
int numAssignThreads = context.getConfiguration().getCount(Property.TSERV_BULK_ASSIGNMENT_THREADS);
timer = new StopWatch<>(Timers.class);
timer.start(Timers.TOTAL);
Configuration conf = CachedConfiguration.getInstance();
VolumeManagerImpl.get(context.getConfiguration());
final VolumeManager fs = VolumeManagerImpl.get(context.getConfiguration());
Set<Path> paths = new HashSet<>();
for (String file : files) {
paths.add(new Path(file));
}
AssignmentStats assignmentStats = new AssignmentStats(paths.size());
final Map<Path, List<KeyExtent>> completeFailures = Collections.synchronizedSortedMap(new TreeMap<Path, List<KeyExtent>>());
ClientService.Client client = null;
final TabletLocator locator = TabletLocator.getLocator(context, Table.ID.of(tableId));
try {
final Map<Path, List<TabletLocation>> assignments = Collections.synchronizedSortedMap(new TreeMap<Path, List<TabletLocation>>());
timer.start(Timers.EXAMINE_MAP_FILES);
ExecutorService threadPool = Executors.newFixedThreadPool(numThreads, new NamingThreadFactory("findOverlapping"));
for (Path path : paths) {
final Path mapFile = path;
Runnable getAssignments = new Runnable() {
@Override
public void run() {
List<TabletLocation> tabletsToAssignMapFileTo = Collections.emptyList();
try {
tabletsToAssignMapFileTo = findOverlappingTablets(context, fs, locator, mapFile);
} catch (Exception ex) {
log.warn("Unable to find tablets that overlap file " + mapFile.toString(), ex);
}
log.debug("Map file {} found to overlap {} tablets", mapFile, tabletsToAssignMapFileTo.size());
if (tabletsToAssignMapFileTo.size() == 0) {
List<KeyExtent> empty = Collections.emptyList();
completeFailures.put(mapFile, empty);
} else
assignments.put(mapFile, tabletsToAssignMapFileTo);
}
};
threadPool.submit(new TraceRunnable(new LoggingRunnable(log, getAssignments)));
}
threadPool.shutdown();
while (!threadPool.isTerminated()) {
try {
threadPool.awaitTermination(60, TimeUnit.SECONDS);
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
}
timer.stop(Timers.EXAMINE_MAP_FILES);
assignmentStats.attemptingAssignments(assignments);
Map<Path, List<KeyExtent>> assignmentFailures = assignMapFiles(context, conf, fs, tableId, assignments, paths, numAssignThreads, numThreads);
assignmentStats.assignmentsFailed(assignmentFailures);
Map<Path, Integer> failureCount = new TreeMap<>();
for (Entry<Path, List<KeyExtent>> entry : assignmentFailures.entrySet()) failureCount.put(entry.getKey(), 1);
long sleepTime = 2 * 1000;
while (assignmentFailures.size() > 0) {
sleepTime = Math.min(sleepTime * 2, 60 * 1000);
locator.invalidateCache();
// assumption about assignment failures is that it caused by a split
// happening or a missing location
//
// for splits we need to find children key extents that cover the
// same key range and are contiguous (no holes, no overlap)
timer.start(Timers.SLEEP);
sleepUninterruptibly(sleepTime, TimeUnit.MILLISECONDS);
timer.stop(Timers.SLEEP);
log.debug("Trying to assign {} map files that previously failed on some key extents", assignmentFailures.size());
assignments.clear();
// assign to
for (Entry<Path, List<KeyExtent>> entry : assignmentFailures.entrySet()) {
Iterator<KeyExtent> keListIter = entry.getValue().iterator();
List<TabletLocation> tabletsToAssignMapFileTo = new ArrayList<>();
while (keListIter.hasNext()) {
KeyExtent ke = keListIter.next();
timer.start(Timers.QUERY_METADATA);
try {
tabletsToAssignMapFileTo.addAll(findOverlappingTablets(context, fs, locator, entry.getKey(), ke));
keListIter.remove();
} catch (Exception ex) {
log.warn("Exception finding overlapping tablets, will retry tablet " + ke, ex);
}
timer.stop(Timers.QUERY_METADATA);
}
if (tabletsToAssignMapFileTo.size() > 0)
assignments.put(entry.getKey(), tabletsToAssignMapFileTo);
}
assignmentStats.attemptingAssignments(assignments);
Map<Path, List<KeyExtent>> assignmentFailures2 = assignMapFiles(context, conf, fs, tableId, assignments, paths, numAssignThreads, numThreads);
assignmentStats.assignmentsFailed(assignmentFailures2);
// merge assignmentFailures2 into assignmentFailures
for (Entry<Path, List<KeyExtent>> entry : assignmentFailures2.entrySet()) {
assignmentFailures.get(entry.getKey()).addAll(entry.getValue());
Integer fc = failureCount.get(entry.getKey());
if (fc == null)
fc = 0;
failureCount.put(entry.getKey(), fc + 1);
}
// remove map files that have no more key extents to assign
Iterator<Entry<Path, List<KeyExtent>>> afIter = assignmentFailures.entrySet().iterator();
while (afIter.hasNext()) {
Entry<Path, List<KeyExtent>> entry = afIter.next();
if (entry.getValue().size() == 0)
afIter.remove();
}
Set<Entry<Path, Integer>> failureIter = failureCount.entrySet();
for (Entry<Path, Integer> entry : failureIter) {
int retries = context.getConfiguration().getCount(Property.TSERV_BULK_RETRY);
if (entry.getValue() > retries && assignmentFailures.get(entry.getKey()) != null) {
log.error("Map file {} failed more than {} times, giving up.", entry.getKey(), retries);
completeFailures.put(entry.getKey(), assignmentFailures.get(entry.getKey()));
assignmentFailures.remove(entry.getKey());
}
}
}
assignmentStats.assignmentsAbandoned(completeFailures);
Set<Path> failedFailures = processFailures(completeFailures);
assignmentStats.unrecoveredMapFiles(failedFailures);
timer.stop(Timers.TOTAL);
printReport(paths);
return assignmentStats;
} finally {
if (client != null) {
ServerClient.close(client);
}
}
}
use of org.apache.accumulo.core.client.impl.TabletLocator.TabletLocation in project accumulo by apache.
the class BulkImporterTest method testFindOverlappingTablets.
@Test
public void testFindOverlappingTablets() throws Exception {
MockTabletLocator locator = new MockTabletLocator();
FileSystem fs = FileSystem.getLocal(CachedConfiguration.getInstance());
ClientContext context = EasyMock.createMock(ClientContext.class);
EasyMock.expect(context.getConfiguration()).andReturn(DefaultConfiguration.getInstance()).anyTimes();
EasyMock.replay(context);
String file = "target/testFile.rf";
fs.delete(new Path(file), true);
FileSKVWriter writer = FileOperations.getInstance().newWriterBuilder().forFile(file, fs, fs.getConf()).withTableConfiguration(context.getConfiguration()).build();
writer.startDefaultLocalityGroup();
Value empty = new Value(new byte[] {});
writer.append(new Key("a", "cf", "cq"), empty);
writer.append(new Key("a", "cf", "cq1"), empty);
writer.append(new Key("a", "cf", "cq2"), empty);
writer.append(new Key("a", "cf", "cq3"), empty);
writer.append(new Key("a", "cf", "cq4"), empty);
writer.append(new Key("a", "cf", "cq5"), empty);
writer.append(new Key("d", "cf", "cq"), empty);
writer.append(new Key("d", "cf", "cq1"), empty);
writer.append(new Key("d", "cf", "cq2"), empty);
writer.append(new Key("d", "cf", "cq3"), empty);
writer.append(new Key("d", "cf", "cq4"), empty);
writer.append(new Key("d", "cf", "cq5"), empty);
writer.append(new Key("dd", "cf", "cq1"), empty);
writer.append(new Key("ichabod", "cf", "cq"), empty);
writer.append(new Key("icky", "cf", "cq1"), empty);
writer.append(new Key("iffy", "cf", "cq2"), empty);
writer.append(new Key("internal", "cf", "cq3"), empty);
writer.append(new Key("is", "cf", "cq4"), empty);
writer.append(new Key("iterator", "cf", "cq5"), empty);
writer.append(new Key("xyzzy", "cf", "cq"), empty);
writer.close();
VolumeManager vm = VolumeManagerImpl.get(context.getConfiguration());
List<TabletLocation> overlaps = BulkImporter.findOverlappingTablets(context, vm, locator, new Path(file));
Assert.assertEquals(5, overlaps.size());
Collections.sort(overlaps);
Assert.assertEquals(new KeyExtent(tableId, new Text("a"), null), overlaps.get(0).tablet_extent);
Assert.assertEquals(new KeyExtent(tableId, new Text("d"), new Text("cm")), overlaps.get(1).tablet_extent);
Assert.assertEquals(new KeyExtent(tableId, new Text("dm"), new Text("d")), overlaps.get(2).tablet_extent);
Assert.assertEquals(new KeyExtent(tableId, new Text("j"), new Text("i")), overlaps.get(3).tablet_extent);
Assert.assertEquals(new KeyExtent(tableId, null, new Text("l")), overlaps.get(4).tablet_extent);
List<TabletLocation> overlaps2 = BulkImporter.findOverlappingTablets(context, vm, locator, new Path(file), new KeyExtent(tableId, new Text("h"), new Text("b")));
Assert.assertEquals(3, overlaps2.size());
Assert.assertEquals(new KeyExtent(tableId, new Text("d"), new Text("cm")), overlaps2.get(0).tablet_extent);
Assert.assertEquals(new KeyExtent(tableId, new Text("dm"), new Text("d")), overlaps2.get(1).tablet_extent);
Assert.assertEquals(new KeyExtent(tableId, new Text("j"), new Text("i")), overlaps2.get(2).tablet_extent);
Assert.assertEquals(locator.invalidated, 1);
}
use of org.apache.accumulo.core.client.impl.TabletLocator.TabletLocation in project accumulo by apache.
the class TabletLocatorImplTest method createMetaCacheKE.
static TreeMap<KeyExtent, TabletLocation> createMetaCacheKE(Object... data) {
TreeMap<KeyExtent, TabletLocation> mcke = new TreeMap<>();
for (int i = 0; i < data.length; i += 2) {
KeyExtent ke = (KeyExtent) data[i];
String loc = (String) data[i + 1];
mcke.put(ke, new TabletLocation(ke, loc, "1"));
}
return mcke;
}
Aggregations