Search in sources :

Example 21 with Table

use of org.apache.accumulo.core.client.impl.Table in project accumulo by apache.

the class AbstractInputFormat method getSplits.

/**
 * Gets the splits of the tables that have been set on the job by reading the metadata table for the specified ranges.
 *
 * @return the splits from the tables based on the ranges.
 * @throws java.io.IOException
 *           if a table set on the job doesn't exist or an error occurs initializing the tablet locator
 */
@Override
public List<InputSplit> getSplits(JobContext context) throws IOException {
    Level logLevel = getLogLevel(context);
    log.setLevel(logLevel);
    validateOptions(context);
    Random random = new Random();
    LinkedList<InputSplit> splits = new LinkedList<>();
    Map<String, InputTableConfig> tableConfigs = getInputTableConfigs(context);
    for (Map.Entry<String, InputTableConfig> tableConfigEntry : tableConfigs.entrySet()) {
        String tableName = tableConfigEntry.getKey();
        InputTableConfig tableConfig = tableConfigEntry.getValue();
        Instance instance = getInstance(context);
        Table.ID tableId;
        // resolve table name to id once, and use id from this point forward
        if (DeprecationUtil.isMockInstance(instance)) {
            tableId = Table.ID.of("");
        } else {
            try {
                tableId = Tables.getTableId(instance, tableName);
            } catch (TableNotFoundException e) {
                throw new IOException(e);
            }
        }
        Authorizations auths = getScanAuthorizations(context);
        String principal = getPrincipal(context);
        AuthenticationToken token = getAuthenticationToken(context);
        boolean batchScan = InputConfigurator.isBatchScan(CLASS, context.getConfiguration());
        boolean supportBatchScan = !(tableConfig.isOfflineScan() || tableConfig.shouldUseIsolatedScanners() || tableConfig.shouldUseLocalIterators());
        if (batchScan && !supportBatchScan)
            throw new IllegalArgumentException("BatchScanner optimization not available for offline scan, isolated, or local iterators");
        boolean autoAdjust = tableConfig.shouldAutoAdjustRanges();
        if (batchScan && !autoAdjust)
            throw new IllegalArgumentException("AutoAdjustRanges must be enabled when using BatchScanner optimization");
        List<Range> ranges = autoAdjust ? Range.mergeOverlapping(tableConfig.getRanges()) : tableConfig.getRanges();
        if (ranges.isEmpty()) {
            ranges = new ArrayList<>(1);
            ranges.add(new Range());
        }
        // get the metadata information for these ranges
        Map<String, Map<KeyExtent, List<Range>>> binnedRanges = new HashMap<>();
        TabletLocator tl;
        try {
            if (tableConfig.isOfflineScan()) {
                binnedRanges = binOfflineTable(context, tableId, ranges);
                while (binnedRanges == null) {
                    // Some tablets were still online, try again
                    // sleep randomly between 100 and 200 ms
                    sleepUninterruptibly(100 + random.nextInt(100), TimeUnit.MILLISECONDS);
                    binnedRanges = binOfflineTable(context, tableId, ranges);
                }
            } else {
                tl = InputConfigurator.getTabletLocator(CLASS, context.getConfiguration(), tableId);
                // its possible that the cache could contain complete, but old information about a tables tablets... so clear it
                tl.invalidateCache();
                ClientContext clientContext = new ClientContext(getInstance(context), new Credentials(getPrincipal(context), getAuthenticationToken(context)), getClientConfiguration(context));
                while (!tl.binRanges(clientContext, ranges, binnedRanges).isEmpty()) {
                    if (!DeprecationUtil.isMockInstance(instance)) {
                        String tableIdStr = tableId.canonicalID();
                        if (!Tables.exists(instance, tableId))
                            throw new TableDeletedException(tableIdStr);
                        if (Tables.getTableState(instance, tableId) == TableState.OFFLINE)
                            throw new TableOfflineException(instance, tableIdStr);
                    }
                    binnedRanges.clear();
                    log.warn("Unable to locate bins for specified ranges. Retrying.");
                    // sleep randomly between 100 and 200 ms
                    sleepUninterruptibly(100 + random.nextInt(100), TimeUnit.MILLISECONDS);
                    tl.invalidateCache();
                }
            }
        } catch (Exception e) {
            throw new IOException(e);
        }
        // all of this code will add either range per each locations or split ranges and add range-location split
        // Map from Range to Array of Locations, we only use this if we're don't split
        HashMap<Range, ArrayList<String>> splitsToAdd = null;
        if (!autoAdjust)
            splitsToAdd = new HashMap<>();
        HashMap<String, String> hostNameCache = new HashMap<>();
        for (Map.Entry<String, Map<KeyExtent, List<Range>>> tserverBin : binnedRanges.entrySet()) {
            String ip = tserverBin.getKey().split(":", 2)[0];
            String location = hostNameCache.get(ip);
            if (location == null) {
                InetAddress inetAddress = InetAddress.getByName(ip);
                location = inetAddress.getCanonicalHostName();
                hostNameCache.put(ip, location);
            }
            for (Map.Entry<KeyExtent, List<Range>> extentRanges : tserverBin.getValue().entrySet()) {
                Range ke = extentRanges.getKey().toDataRange();
                if (batchScan) {
                    // group ranges by tablet to be read by a BatchScanner
                    ArrayList<Range> clippedRanges = new ArrayList<>();
                    for (Range r : extentRanges.getValue()) clippedRanges.add(ke.clip(r));
                    BatchInputSplit split = new BatchInputSplit(tableName, tableId, clippedRanges, new String[] { location });
                    SplitUtils.updateSplit(split, instance, tableConfig, principal, token, auths, logLevel);
                    splits.add(split);
                } else {
                    // not grouping by tablet
                    for (Range r : extentRanges.getValue()) {
                        if (autoAdjust) {
                            // divide ranges into smaller ranges, based on the tablets
                            RangeInputSplit split = new RangeInputSplit(tableName, tableId.canonicalID(), ke.clip(r), new String[] { location });
                            SplitUtils.updateSplit(split, instance, tableConfig, principal, token, auths, logLevel);
                            split.setOffline(tableConfig.isOfflineScan());
                            split.setIsolatedScan(tableConfig.shouldUseIsolatedScanners());
                            split.setUsesLocalIterators(tableConfig.shouldUseLocalIterators());
                            splits.add(split);
                        } else {
                            // don't divide ranges
                            ArrayList<String> locations = splitsToAdd.get(r);
                            if (locations == null)
                                locations = new ArrayList<>(1);
                            locations.add(location);
                            splitsToAdd.put(r, locations);
                        }
                    }
                }
            }
        }
        if (!autoAdjust)
            for (Map.Entry<Range, ArrayList<String>> entry : splitsToAdd.entrySet()) {
                RangeInputSplit split = new RangeInputSplit(tableName, tableId.canonicalID(), entry.getKey(), entry.getValue().toArray(new String[0]));
                SplitUtils.updateSplit(split, instance, tableConfig, principal, token, auths, logLevel);
                split.setOffline(tableConfig.isOfflineScan());
                split.setIsolatedScan(tableConfig.shouldUseIsolatedScanners());
                split.setUsesLocalIterators(tableConfig.shouldUseLocalIterators());
                splits.add(split);
            }
    }
    return splits;
}
Also used : AuthenticationToken(org.apache.accumulo.core.client.security.tokens.AuthenticationToken) TableOfflineException(org.apache.accumulo.core.client.TableOfflineException) Instance(org.apache.accumulo.core.client.Instance) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) BatchInputSplit(org.apache.accumulo.core.client.mapreduce.impl.BatchInputSplit) KeyExtent(org.apache.accumulo.core.data.impl.KeyExtent) TableDeletedException(org.apache.accumulo.core.client.TableDeletedException) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) Random(java.util.Random) List(java.util.List) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) BatchInputSplit(org.apache.accumulo.core.client.mapreduce.impl.BatchInputSplit) InputSplit(org.apache.hadoop.mapreduce.InputSplit) Authorizations(org.apache.accumulo.core.security.Authorizations) Table(org.apache.accumulo.core.client.impl.Table) ClientContext(org.apache.accumulo.core.client.impl.ClientContext) IOException(java.io.IOException) Range(org.apache.accumulo.core.data.Range) LinkedList(java.util.LinkedList) TableOfflineException(org.apache.accumulo.core.client.TableOfflineException) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) TableDeletedException(org.apache.accumulo.core.client.TableDeletedException) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) IOException(java.io.IOException) AccumuloException(org.apache.accumulo.core.client.AccumuloException) TabletLocator(org.apache.accumulo.core.client.impl.TabletLocator) Level(org.apache.log4j.Level) Map(java.util.Map) HashMap(java.util.HashMap) InetAddress(java.net.InetAddress) Credentials(org.apache.accumulo.core.client.impl.Credentials)

Example 22 with Table

use of org.apache.accumulo.core.client.impl.Table in project accumulo by apache.

the class TableDiskUsage method getDiskUsage.

public static Map<TreeSet<String>, Long> getDiskUsage(Set<Table.ID> tableIds, VolumeManager fs, Connector conn) throws IOException {
    TableDiskUsage tdu = new TableDiskUsage();
    // Add each tableID
    for (Table.ID tableId : tableIds) tdu.addTable(tableId);
    HashSet<Table.ID> tablesReferenced = new HashSet<>(tableIds);
    HashSet<Table.ID> emptyTableIds = new HashSet<>();
    HashSet<String> nameSpacesReferenced = new HashSet<>();
    // For each table ID
    for (Table.ID tableId : tableIds) {
        Scanner mdScanner;
        try {
            mdScanner = conn.createScanner(MetadataTable.NAME, Authorizations.EMPTY);
        } catch (TableNotFoundException e) {
            throw new RuntimeException(e);
        }
        mdScanner.fetchColumnFamily(DataFileColumnFamily.NAME);
        mdScanner.setRange(new KeyExtent(tableId, null, null).toMetadataRange());
        if (!mdScanner.iterator().hasNext()) {
            emptyTableIds.add(tableId);
        }
        // Read each file referenced by that table
        for (Entry<Key, Value> entry : mdScanner) {
            String file = entry.getKey().getColumnQualifier().toString();
            String[] parts = file.split("/");
            // the filename
            String uniqueName = parts[parts.length - 1];
            if (file.contains(":") || file.startsWith("../")) {
                String ref = parts[parts.length - 3];
                // Track any tables which are referenced externally by the current table
                if (!ref.equals(tableId.canonicalID())) {
                    tablesReferenced.add(Table.ID.of(ref));
                }
                if (file.contains(":") && parts.length > 3) {
                    List<String> base = Arrays.asList(Arrays.copyOf(parts, parts.length - 3));
                    nameSpacesReferenced.add(Joiner.on("/").join(base));
                }
            }
            // add this file to this table
            tdu.linkFileAndTable(tableId, uniqueName);
        }
    }
    // Each table seen (provided by user, or reference by table the user provided)
    for (Table.ID tableId : tablesReferenced) {
        for (String tableDir : nameSpacesReferenced) {
            // Find each file and add its size
            FileStatus[] files = fs.globStatus(new Path(tableDir + "/" + tableId + "/*/*"));
            if (files != null) {
                for (FileStatus fileStatus : files) {
                    // Assumes that all filenames are unique
                    String name = fileStatus.getPath().getName();
                    tdu.addFileSize(name, fileStatus.getLen());
                }
            }
        }
    }
    Map<Table.ID, String> reverseTableIdMap = Tables.getIdToNameMap(conn.getInstance());
    TreeMap<TreeSet<String>, Long> usage = new TreeMap<>((o1, o2) -> {
        int len1 = o1.size();
        int len2 = o2.size();
        int min = Math.min(len1, len2);
        Iterator<String> iter1 = o1.iterator();
        Iterator<String> iter2 = o2.iterator();
        int count = 0;
        while (count < min) {
            String s1 = iter1.next();
            String s2 = iter2.next();
            int cmp = s1.compareTo(s2);
            if (cmp != 0)
                return cmp;
            count++;
        }
        return len1 - len2;
    });
    for (Entry<List<Table.ID>, Long> entry : tdu.calculateUsage().entrySet()) {
        TreeSet<String> tableNames = new TreeSet<>();
        // Convert size shared by each table id into size shared by each table name
        for (Table.ID tableId : entry.getKey()) tableNames.add(reverseTableIdMap.get(tableId));
        // Make table names to shared file size
        usage.put(tableNames, entry.getValue());
    }
    if (!emptyTableIds.isEmpty()) {
        TreeSet<String> emptyTables = new TreeSet<>();
        for (Table.ID tableId : emptyTableIds) {
            emptyTables.add(reverseTableIdMap.get(tableId));
        }
        usage.put(emptyTables, 0L);
    }
    return usage;
}
Also used : Scanner(org.apache.accumulo.core.client.Scanner) FileStatus(org.apache.hadoop.fs.FileStatus) KeyExtent(org.apache.accumulo.core.data.impl.KeyExtent) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) List(java.util.List) HashSet(java.util.HashSet) Path(org.apache.hadoop.fs.Path) MetadataTable(org.apache.accumulo.core.metadata.MetadataTable) Table(org.apache.accumulo.core.client.impl.Table) TreeMap(java.util.TreeMap) Value(org.apache.accumulo.core.data.Value) Key(org.apache.accumulo.core.data.Key)

Example 23 with Table

use of org.apache.accumulo.core.client.impl.Table in project accumulo by apache.

the class TabletIterator method hasNext.

@Override
public boolean hasNext() {
    while (currentTabletKeys == null) {
        currentTabletKeys = scanToPrevEndRow();
        if (currentTabletKeys.size() == 0) {
            break;
        }
        Key prevEndRowKey = currentTabletKeys.lastKey();
        Value prevEndRowValue = currentTabletKeys.get(prevEndRowKey);
        if (!TabletsSection.TabletColumnFamily.PREV_ROW_COLUMN.hasColumns(prevEndRowKey)) {
            log.debug("{}", currentTabletKeys);
            throw new RuntimeException("Unexpected key " + prevEndRowKey);
        }
        Text per = KeyExtent.decodePrevEndRow(prevEndRowValue);
        Text lastEndRow;
        if (lastTablet == null) {
            lastEndRow = null;
        } else {
            lastEndRow = new KeyExtent(lastTablet, (Text) null).getEndRow();
            // do table transition sanity check
            Table.ID lastTable = new KeyExtent(lastTablet, (Text) null).getTableId();
            Table.ID currentTable = new KeyExtent(prevEndRowKey.getRow(), (Text) null).getTableId();
            if (!lastTable.equals(currentTable) && (per != null || lastEndRow != null)) {
                log.info("Metadata inconsistency on table transition : {} {} {} {}", lastTable, currentTable, per, lastEndRow);
                currentTabletKeys = null;
                resetScanner();
                sleepUninterruptibly(250, TimeUnit.MILLISECONDS);
                continue;
            }
        }
        boolean perEqual = (per == null && lastEndRow == null) || (per != null && lastEndRow != null && per.equals(lastEndRow));
        if (!perEqual) {
            log.info("Metadata inconsistency : {} != {} metadataKey = {}", per, lastEndRow, prevEndRowKey);
            currentTabletKeys = null;
            resetScanner();
            sleepUninterruptibly(250, TimeUnit.MILLISECONDS);
            continue;
        }
        // this tablet is good, so set it as the last tablet
        lastTablet = prevEndRowKey.getRow();
    }
    return currentTabletKeys.size() > 0;
}
Also used : Table(org.apache.accumulo.core.client.impl.Table) MetadataTable(org.apache.accumulo.core.metadata.MetadataTable) Value(org.apache.accumulo.core.data.Value) Text(org.apache.hadoop.io.Text) KeyExtent(org.apache.accumulo.core.data.impl.KeyExtent) Key(org.apache.accumulo.core.data.Key) PartialKey(org.apache.accumulo.core.data.PartialKey)

Example 24 with Table

use of org.apache.accumulo.core.client.impl.Table in project accumulo by apache.

the class VerifyTabletAssignments method checkTable.

private static void checkTable(final ClientContext context, final Opts opts, String tableName, HashSet<KeyExtent> check) throws AccumuloException, AccumuloSecurityException, TableNotFoundException, InterruptedException {
    if (check == null)
        System.out.println("Checking table " + tableName);
    else
        System.out.println("Checking table " + tableName + " again, failures " + check.size());
    TreeMap<KeyExtent, String> tabletLocations = new TreeMap<>();
    Table.ID tableId = Tables.getNameToIdMap(context.getInstance()).get(tableName);
    MetadataServicer.forTableId(context, tableId).getTabletLocations(tabletLocations);
    final HashSet<KeyExtent> failures = new HashSet<>();
    Map<HostAndPort, List<KeyExtent>> extentsPerServer = new TreeMap<>();
    for (Entry<KeyExtent, String> entry : tabletLocations.entrySet()) {
        KeyExtent keyExtent = entry.getKey();
        String loc = entry.getValue();
        if (loc == null)
            System.out.println(" Tablet " + keyExtent + " has no location");
        else if (opts.verbose)
            System.out.println(" Tablet " + keyExtent + " is located at " + loc);
        if (loc != null) {
            final HostAndPort parsedLoc = HostAndPort.fromString(loc);
            List<KeyExtent> extentList = extentsPerServer.get(parsedLoc);
            if (extentList == null) {
                extentList = new ArrayList<>();
                extentsPerServer.put(parsedLoc, extentList);
            }
            if (check == null || check.contains(keyExtent))
                extentList.add(keyExtent);
        }
    }
    ExecutorService tp = Executors.newFixedThreadPool(20);
    for (final Entry<HostAndPort, List<KeyExtent>> entry : extentsPerServer.entrySet()) {
        Runnable r = new Runnable() {

            @Override
            public void run() {
                try {
                    checkTabletServer(context, entry, failures);
                } catch (Exception e) {
                    log.error("Failure on tablet server '" + entry.getKey() + ".", e);
                    failures.addAll(entry.getValue());
                }
            }
        };
        tp.execute(r);
    }
    tp.shutdown();
    while (!tp.awaitTermination(1, TimeUnit.HOURS)) {
    }
    if (failures.size() > 0)
        checkTable(context, opts, tableName, failures);
}
Also used : Table(org.apache.accumulo.core.client.impl.Table) TreeMap(java.util.TreeMap) TKeyExtent(org.apache.accumulo.core.data.thrift.TKeyExtent) KeyExtent(org.apache.accumulo.core.data.impl.KeyExtent) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) NoSuchScanIDException(org.apache.accumulo.core.tabletserver.thrift.NoSuchScanIDException) TException(org.apache.thrift.TException) AccumuloException(org.apache.accumulo.core.client.AccumuloException) ThriftSecurityException(org.apache.accumulo.core.client.impl.thrift.ThriftSecurityException) HostAndPort(org.apache.accumulo.core.util.HostAndPort) ExecutorService(java.util.concurrent.ExecutorService) ArrayList(java.util.ArrayList) List(java.util.List) HashSet(java.util.HashSet)

Example 25 with Table

use of org.apache.accumulo.core.client.impl.Table in project accumulo by apache.

the class FileArchiveIT method testDeletedTableIsArchived.

@Test
public void testDeletedTableIsArchived() throws Exception {
    final Connector conn = getConnector();
    final String tableName = getUniqueNames(1)[0];
    conn.tableOperations().create(tableName);
    final Table.ID tableId = Table.ID.of(conn.tableOperations().tableIdMap().get(tableName));
    Assert.assertNotNull("Could not get table ID", tableId);
    BatchWriter bw = conn.createBatchWriter(tableName, new BatchWriterConfig());
    Mutation m = new Mutation("row");
    m.put("", "", "value");
    bw.addMutation(m);
    bw.close();
    // Compact memory to disk
    conn.tableOperations().compact(tableName, null, null, true, true);
    try (Scanner s = conn.createScanner(MetadataTable.NAME, Authorizations.EMPTY)) {
        s.setRange(MetadataSchema.TabletsSection.getRange(tableId));
        s.fetchColumnFamily(MetadataSchema.TabletsSection.DataFileColumnFamily.NAME);
        Entry<Key, Value> entry = Iterables.getOnlyElement(s);
        final String file = entry.getKey().getColumnQualifier().toString();
        final Path p = new Path(file);
        conn.tableOperations().delete(tableName);
        log.info("File for table: {}", file);
        FileSystem fs = getCluster().getFileSystem();
        int i = 0;
        while (fs.exists(p)) {
            i++;
            Thread.sleep(1000);
            if (0 == i % 10) {
                log.info("Waited {} iterations, file still exists", i);
            }
        }
        log.info("File was removed");
        String filePath = p.toUri().getPath().substring(getCluster().getConfig().getAccumuloDir().toString().length());
        log.info("File relative to accumulo dir: {}", filePath);
        Path fileArchiveDir = new Path(getCluster().getConfig().getAccumuloDir().toString(), ServerConstants.FILE_ARCHIVE_DIR);
        Assert.assertTrue("File archive directory didn't exist", fs.exists(fileArchiveDir));
        // Remove the leading '/' to make sure Path treats the 2nd arg as a child.
        Path archivedFile = new Path(fileArchiveDir, filePath.substring(1));
        Assert.assertTrue("File doesn't exists in archive directory: " + archivedFile, fs.exists(archivedFile));
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Connector(org.apache.accumulo.core.client.Connector) Scanner(org.apache.accumulo.core.client.Scanner) Table(org.apache.accumulo.core.client.impl.Table) MetadataTable(org.apache.accumulo.core.metadata.MetadataTable) FileSystem(org.apache.hadoop.fs.FileSystem) Value(org.apache.accumulo.core.data.Value) BatchWriterConfig(org.apache.accumulo.core.client.BatchWriterConfig) BatchWriter(org.apache.accumulo.core.client.BatchWriter) Mutation(org.apache.accumulo.core.data.Mutation) Key(org.apache.accumulo.core.data.Key) Test(org.junit.Test)

Aggregations

Table (org.apache.accumulo.core.client.impl.Table)55 MetadataTable (org.apache.accumulo.core.metadata.MetadataTable)34 Value (org.apache.accumulo.core.data.Value)27 Key (org.apache.accumulo.core.data.Key)25 Text (org.apache.hadoop.io.Text)25 Scanner (org.apache.accumulo.core.client.Scanner)21 KeyExtent (org.apache.accumulo.core.data.impl.KeyExtent)21 Test (org.junit.Test)21 Connector (org.apache.accumulo.core.client.Connector)19 Mutation (org.apache.accumulo.core.data.Mutation)18 ReplicationTable (org.apache.accumulo.core.replication.ReplicationTable)18 BatchWriter (org.apache.accumulo.core.client.BatchWriter)17 TableNotFoundException (org.apache.accumulo.core.client.TableNotFoundException)17 AccumuloSecurityException (org.apache.accumulo.core.client.AccumuloSecurityException)15 ArrayList (java.util.ArrayList)14 AccumuloException (org.apache.accumulo.core.client.AccumuloException)14 Path (org.apache.hadoop.fs.Path)14 HashSet (java.util.HashSet)11 BatchWriterConfig (org.apache.accumulo.core.client.BatchWriterConfig)11 HashMap (java.util.HashMap)9