use of org.apache.accumulo.core.client.impl.Table in project accumulo by apache.
the class AbstractInputFormat method getSplits.
/**
* Gets the splits of the tables that have been set on the job by reading the metadata table for the specified ranges.
*
* @return the splits from the tables based on the ranges.
* @throws java.io.IOException
* if a table set on the job doesn't exist or an error occurs initializing the tablet locator
*/
@Override
public List<InputSplit> getSplits(JobContext context) throws IOException {
Level logLevel = getLogLevel(context);
log.setLevel(logLevel);
validateOptions(context);
Random random = new Random();
LinkedList<InputSplit> splits = new LinkedList<>();
Map<String, InputTableConfig> tableConfigs = getInputTableConfigs(context);
for (Map.Entry<String, InputTableConfig> tableConfigEntry : tableConfigs.entrySet()) {
String tableName = tableConfigEntry.getKey();
InputTableConfig tableConfig = tableConfigEntry.getValue();
Instance instance = getInstance(context);
Table.ID tableId;
// resolve table name to id once, and use id from this point forward
if (DeprecationUtil.isMockInstance(instance)) {
tableId = Table.ID.of("");
} else {
try {
tableId = Tables.getTableId(instance, tableName);
} catch (TableNotFoundException e) {
throw new IOException(e);
}
}
Authorizations auths = getScanAuthorizations(context);
String principal = getPrincipal(context);
AuthenticationToken token = getAuthenticationToken(context);
boolean batchScan = InputConfigurator.isBatchScan(CLASS, context.getConfiguration());
boolean supportBatchScan = !(tableConfig.isOfflineScan() || tableConfig.shouldUseIsolatedScanners() || tableConfig.shouldUseLocalIterators());
if (batchScan && !supportBatchScan)
throw new IllegalArgumentException("BatchScanner optimization not available for offline scan, isolated, or local iterators");
boolean autoAdjust = tableConfig.shouldAutoAdjustRanges();
if (batchScan && !autoAdjust)
throw new IllegalArgumentException("AutoAdjustRanges must be enabled when using BatchScanner optimization");
List<Range> ranges = autoAdjust ? Range.mergeOverlapping(tableConfig.getRanges()) : tableConfig.getRanges();
if (ranges.isEmpty()) {
ranges = new ArrayList<>(1);
ranges.add(new Range());
}
// get the metadata information for these ranges
Map<String, Map<KeyExtent, List<Range>>> binnedRanges = new HashMap<>();
TabletLocator tl;
try {
if (tableConfig.isOfflineScan()) {
binnedRanges = binOfflineTable(context, tableId, ranges);
while (binnedRanges == null) {
// Some tablets were still online, try again
// sleep randomly between 100 and 200 ms
sleepUninterruptibly(100 + random.nextInt(100), TimeUnit.MILLISECONDS);
binnedRanges = binOfflineTable(context, tableId, ranges);
}
} else {
tl = InputConfigurator.getTabletLocator(CLASS, context.getConfiguration(), tableId);
// its possible that the cache could contain complete, but old information about a tables tablets... so clear it
tl.invalidateCache();
ClientContext clientContext = new ClientContext(getInstance(context), new Credentials(getPrincipal(context), getAuthenticationToken(context)), getClientConfiguration(context));
while (!tl.binRanges(clientContext, ranges, binnedRanges).isEmpty()) {
if (!DeprecationUtil.isMockInstance(instance)) {
String tableIdStr = tableId.canonicalID();
if (!Tables.exists(instance, tableId))
throw new TableDeletedException(tableIdStr);
if (Tables.getTableState(instance, tableId) == TableState.OFFLINE)
throw new TableOfflineException(instance, tableIdStr);
}
binnedRanges.clear();
log.warn("Unable to locate bins for specified ranges. Retrying.");
// sleep randomly between 100 and 200 ms
sleepUninterruptibly(100 + random.nextInt(100), TimeUnit.MILLISECONDS);
tl.invalidateCache();
}
}
} catch (Exception e) {
throw new IOException(e);
}
// all of this code will add either range per each locations or split ranges and add range-location split
// Map from Range to Array of Locations, we only use this if we're don't split
HashMap<Range, ArrayList<String>> splitsToAdd = null;
if (!autoAdjust)
splitsToAdd = new HashMap<>();
HashMap<String, String> hostNameCache = new HashMap<>();
for (Map.Entry<String, Map<KeyExtent, List<Range>>> tserverBin : binnedRanges.entrySet()) {
String ip = tserverBin.getKey().split(":", 2)[0];
String location = hostNameCache.get(ip);
if (location == null) {
InetAddress inetAddress = InetAddress.getByName(ip);
location = inetAddress.getCanonicalHostName();
hostNameCache.put(ip, location);
}
for (Map.Entry<KeyExtent, List<Range>> extentRanges : tserverBin.getValue().entrySet()) {
Range ke = extentRanges.getKey().toDataRange();
if (batchScan) {
// group ranges by tablet to be read by a BatchScanner
ArrayList<Range> clippedRanges = new ArrayList<>();
for (Range r : extentRanges.getValue()) clippedRanges.add(ke.clip(r));
BatchInputSplit split = new BatchInputSplit(tableName, tableId, clippedRanges, new String[] { location });
SplitUtils.updateSplit(split, instance, tableConfig, principal, token, auths, logLevel);
splits.add(split);
} else {
// not grouping by tablet
for (Range r : extentRanges.getValue()) {
if (autoAdjust) {
// divide ranges into smaller ranges, based on the tablets
RangeInputSplit split = new RangeInputSplit(tableName, tableId.canonicalID(), ke.clip(r), new String[] { location });
SplitUtils.updateSplit(split, instance, tableConfig, principal, token, auths, logLevel);
split.setOffline(tableConfig.isOfflineScan());
split.setIsolatedScan(tableConfig.shouldUseIsolatedScanners());
split.setUsesLocalIterators(tableConfig.shouldUseLocalIterators());
splits.add(split);
} else {
// don't divide ranges
ArrayList<String> locations = splitsToAdd.get(r);
if (locations == null)
locations = new ArrayList<>(1);
locations.add(location);
splitsToAdd.put(r, locations);
}
}
}
}
}
if (!autoAdjust)
for (Map.Entry<Range, ArrayList<String>> entry : splitsToAdd.entrySet()) {
RangeInputSplit split = new RangeInputSplit(tableName, tableId.canonicalID(), entry.getKey(), entry.getValue().toArray(new String[0]));
SplitUtils.updateSplit(split, instance, tableConfig, principal, token, auths, logLevel);
split.setOffline(tableConfig.isOfflineScan());
split.setIsolatedScan(tableConfig.shouldUseIsolatedScanners());
split.setUsesLocalIterators(tableConfig.shouldUseLocalIterators());
splits.add(split);
}
}
return splits;
}
use of org.apache.accumulo.core.client.impl.Table in project accumulo by apache.
the class TableDiskUsage method getDiskUsage.
public static Map<TreeSet<String>, Long> getDiskUsage(Set<Table.ID> tableIds, VolumeManager fs, Connector conn) throws IOException {
TableDiskUsage tdu = new TableDiskUsage();
// Add each tableID
for (Table.ID tableId : tableIds) tdu.addTable(tableId);
HashSet<Table.ID> tablesReferenced = new HashSet<>(tableIds);
HashSet<Table.ID> emptyTableIds = new HashSet<>();
HashSet<String> nameSpacesReferenced = new HashSet<>();
// For each table ID
for (Table.ID tableId : tableIds) {
Scanner mdScanner;
try {
mdScanner = conn.createScanner(MetadataTable.NAME, Authorizations.EMPTY);
} catch (TableNotFoundException e) {
throw new RuntimeException(e);
}
mdScanner.fetchColumnFamily(DataFileColumnFamily.NAME);
mdScanner.setRange(new KeyExtent(tableId, null, null).toMetadataRange());
if (!mdScanner.iterator().hasNext()) {
emptyTableIds.add(tableId);
}
// Read each file referenced by that table
for (Entry<Key, Value> entry : mdScanner) {
String file = entry.getKey().getColumnQualifier().toString();
String[] parts = file.split("/");
// the filename
String uniqueName = parts[parts.length - 1];
if (file.contains(":") || file.startsWith("../")) {
String ref = parts[parts.length - 3];
// Track any tables which are referenced externally by the current table
if (!ref.equals(tableId.canonicalID())) {
tablesReferenced.add(Table.ID.of(ref));
}
if (file.contains(":") && parts.length > 3) {
List<String> base = Arrays.asList(Arrays.copyOf(parts, parts.length - 3));
nameSpacesReferenced.add(Joiner.on("/").join(base));
}
}
// add this file to this table
tdu.linkFileAndTable(tableId, uniqueName);
}
}
// Each table seen (provided by user, or reference by table the user provided)
for (Table.ID tableId : tablesReferenced) {
for (String tableDir : nameSpacesReferenced) {
// Find each file and add its size
FileStatus[] files = fs.globStatus(new Path(tableDir + "/" + tableId + "/*/*"));
if (files != null) {
for (FileStatus fileStatus : files) {
// Assumes that all filenames are unique
String name = fileStatus.getPath().getName();
tdu.addFileSize(name, fileStatus.getLen());
}
}
}
}
Map<Table.ID, String> reverseTableIdMap = Tables.getIdToNameMap(conn.getInstance());
TreeMap<TreeSet<String>, Long> usage = new TreeMap<>((o1, o2) -> {
int len1 = o1.size();
int len2 = o2.size();
int min = Math.min(len1, len2);
Iterator<String> iter1 = o1.iterator();
Iterator<String> iter2 = o2.iterator();
int count = 0;
while (count < min) {
String s1 = iter1.next();
String s2 = iter2.next();
int cmp = s1.compareTo(s2);
if (cmp != 0)
return cmp;
count++;
}
return len1 - len2;
});
for (Entry<List<Table.ID>, Long> entry : tdu.calculateUsage().entrySet()) {
TreeSet<String> tableNames = new TreeSet<>();
// Convert size shared by each table id into size shared by each table name
for (Table.ID tableId : entry.getKey()) tableNames.add(reverseTableIdMap.get(tableId));
// Make table names to shared file size
usage.put(tableNames, entry.getValue());
}
if (!emptyTableIds.isEmpty()) {
TreeSet<String> emptyTables = new TreeSet<>();
for (Table.ID tableId : emptyTableIds) {
emptyTables.add(reverseTableIdMap.get(tableId));
}
usage.put(emptyTables, 0L);
}
return usage;
}
use of org.apache.accumulo.core.client.impl.Table in project accumulo by apache.
the class TabletIterator method hasNext.
@Override
public boolean hasNext() {
while (currentTabletKeys == null) {
currentTabletKeys = scanToPrevEndRow();
if (currentTabletKeys.size() == 0) {
break;
}
Key prevEndRowKey = currentTabletKeys.lastKey();
Value prevEndRowValue = currentTabletKeys.get(prevEndRowKey);
if (!TabletsSection.TabletColumnFamily.PREV_ROW_COLUMN.hasColumns(prevEndRowKey)) {
log.debug("{}", currentTabletKeys);
throw new RuntimeException("Unexpected key " + prevEndRowKey);
}
Text per = KeyExtent.decodePrevEndRow(prevEndRowValue);
Text lastEndRow;
if (lastTablet == null) {
lastEndRow = null;
} else {
lastEndRow = new KeyExtent(lastTablet, (Text) null).getEndRow();
// do table transition sanity check
Table.ID lastTable = new KeyExtent(lastTablet, (Text) null).getTableId();
Table.ID currentTable = new KeyExtent(prevEndRowKey.getRow(), (Text) null).getTableId();
if (!lastTable.equals(currentTable) && (per != null || lastEndRow != null)) {
log.info("Metadata inconsistency on table transition : {} {} {} {}", lastTable, currentTable, per, lastEndRow);
currentTabletKeys = null;
resetScanner();
sleepUninterruptibly(250, TimeUnit.MILLISECONDS);
continue;
}
}
boolean perEqual = (per == null && lastEndRow == null) || (per != null && lastEndRow != null && per.equals(lastEndRow));
if (!perEqual) {
log.info("Metadata inconsistency : {} != {} metadataKey = {}", per, lastEndRow, prevEndRowKey);
currentTabletKeys = null;
resetScanner();
sleepUninterruptibly(250, TimeUnit.MILLISECONDS);
continue;
}
// this tablet is good, so set it as the last tablet
lastTablet = prevEndRowKey.getRow();
}
return currentTabletKeys.size() > 0;
}
use of org.apache.accumulo.core.client.impl.Table in project accumulo by apache.
the class VerifyTabletAssignments method checkTable.
private static void checkTable(final ClientContext context, final Opts opts, String tableName, HashSet<KeyExtent> check) throws AccumuloException, AccumuloSecurityException, TableNotFoundException, InterruptedException {
if (check == null)
System.out.println("Checking table " + tableName);
else
System.out.println("Checking table " + tableName + " again, failures " + check.size());
TreeMap<KeyExtent, String> tabletLocations = new TreeMap<>();
Table.ID tableId = Tables.getNameToIdMap(context.getInstance()).get(tableName);
MetadataServicer.forTableId(context, tableId).getTabletLocations(tabletLocations);
final HashSet<KeyExtent> failures = new HashSet<>();
Map<HostAndPort, List<KeyExtent>> extentsPerServer = new TreeMap<>();
for (Entry<KeyExtent, String> entry : tabletLocations.entrySet()) {
KeyExtent keyExtent = entry.getKey();
String loc = entry.getValue();
if (loc == null)
System.out.println(" Tablet " + keyExtent + " has no location");
else if (opts.verbose)
System.out.println(" Tablet " + keyExtent + " is located at " + loc);
if (loc != null) {
final HostAndPort parsedLoc = HostAndPort.fromString(loc);
List<KeyExtent> extentList = extentsPerServer.get(parsedLoc);
if (extentList == null) {
extentList = new ArrayList<>();
extentsPerServer.put(parsedLoc, extentList);
}
if (check == null || check.contains(keyExtent))
extentList.add(keyExtent);
}
}
ExecutorService tp = Executors.newFixedThreadPool(20);
for (final Entry<HostAndPort, List<KeyExtent>> entry : extentsPerServer.entrySet()) {
Runnable r = new Runnable() {
@Override
public void run() {
try {
checkTabletServer(context, entry, failures);
} catch (Exception e) {
log.error("Failure on tablet server '" + entry.getKey() + ".", e);
failures.addAll(entry.getValue());
}
}
};
tp.execute(r);
}
tp.shutdown();
while (!tp.awaitTermination(1, TimeUnit.HOURS)) {
}
if (failures.size() > 0)
checkTable(context, opts, tableName, failures);
}
use of org.apache.accumulo.core.client.impl.Table in project accumulo by apache.
the class FileArchiveIT method testDeletedTableIsArchived.
@Test
public void testDeletedTableIsArchived() throws Exception {
final Connector conn = getConnector();
final String tableName = getUniqueNames(1)[0];
conn.tableOperations().create(tableName);
final Table.ID tableId = Table.ID.of(conn.tableOperations().tableIdMap().get(tableName));
Assert.assertNotNull("Could not get table ID", tableId);
BatchWriter bw = conn.createBatchWriter(tableName, new BatchWriterConfig());
Mutation m = new Mutation("row");
m.put("", "", "value");
bw.addMutation(m);
bw.close();
// Compact memory to disk
conn.tableOperations().compact(tableName, null, null, true, true);
try (Scanner s = conn.createScanner(MetadataTable.NAME, Authorizations.EMPTY)) {
s.setRange(MetadataSchema.TabletsSection.getRange(tableId));
s.fetchColumnFamily(MetadataSchema.TabletsSection.DataFileColumnFamily.NAME);
Entry<Key, Value> entry = Iterables.getOnlyElement(s);
final String file = entry.getKey().getColumnQualifier().toString();
final Path p = new Path(file);
conn.tableOperations().delete(tableName);
log.info("File for table: {}", file);
FileSystem fs = getCluster().getFileSystem();
int i = 0;
while (fs.exists(p)) {
i++;
Thread.sleep(1000);
if (0 == i % 10) {
log.info("Waited {} iterations, file still exists", i);
}
}
log.info("File was removed");
String filePath = p.toUri().getPath().substring(getCluster().getConfig().getAccumuloDir().toString().length());
log.info("File relative to accumulo dir: {}", filePath);
Path fileArchiveDir = new Path(getCluster().getConfig().getAccumuloDir().toString(), ServerConstants.FILE_ARCHIVE_DIR);
Assert.assertTrue("File archive directory didn't exist", fs.exists(fileArchiveDir));
// Remove the leading '/' to make sure Path treats the 2nd arg as a child.
Path archivedFile = new Path(fileArchiveDir, filePath.substring(1));
Assert.assertTrue("File doesn't exists in archive directory: " + archivedFile, fs.exists(archivedFile));
}
}
Aggregations