Search in sources :

Example 6 with TabletLocator

use of org.apache.accumulo.core.clientImpl.TabletLocator in project accumulo by apache.

the class AccumuloRecordReader method getSplits.

/**
 * Gets the splits of the tables that have been set on the job by reading the metadata table for
 * the specified ranges.
 */
public static InputSplit[] getSplits(JobConf job, Class<?> callingClass) throws IOException {
    validateOptions(job, callingClass);
    LinkedList<InputSplit> splits = new LinkedList<>();
    Map<String, InputTableConfig> tableConfigs = InputConfigurator.getInputTableConfigs(callingClass, job);
    try (AccumuloClient client = createClient(job, callingClass);
        var context = ((ClientContext) client)) {
        for (Map.Entry<String, InputTableConfig> tableConfigEntry : tableConfigs.entrySet()) {
            String tableName = tableConfigEntry.getKey();
            InputTableConfig tableConfig = tableConfigEntry.getValue();
            TableId tableId;
            // resolve table name to id once, and use id from this point forward
            try {
                tableId = context.getTableId(tableName);
            } catch (TableNotFoundException e) {
                throw new IOException(e);
            }
            boolean batchScan = InputConfigurator.isBatchScan(callingClass, job);
            boolean supportBatchScan = !(tableConfig.isOfflineScan() || tableConfig.shouldUseIsolatedScanners() || tableConfig.shouldUseLocalIterators());
            if (batchScan && !supportBatchScan)
                throw new IllegalArgumentException("BatchScanner optimization not available for offline" + " scan, isolated, or local iterators");
            boolean autoAdjust = tableConfig.shouldAutoAdjustRanges();
            if (batchScan && !autoAdjust)
                throw new IllegalArgumentException("AutoAdjustRanges must be enabled when using BatchScanner optimization");
            List<Range> ranges = autoAdjust ? Range.mergeOverlapping(tableConfig.getRanges()) : tableConfig.getRanges();
            if (ranges.isEmpty()) {
                ranges = new ArrayList<>(1);
                ranges.add(new Range());
            }
            // get the metadata information for these ranges
            Map<String, Map<KeyExtent, List<Range>>> binnedRanges = new HashMap<>();
            TabletLocator tl;
            try {
                if (tableConfig.isOfflineScan()) {
                    binnedRanges = binOfflineTable(job, tableId, ranges, callingClass);
                    while (binnedRanges == null) {
                        // Some tablets were still online, try again
                        // sleep randomly between 100 and 200 ms
                        sleepUninterruptibly(100 + random.nextInt(100), TimeUnit.MILLISECONDS);
                        binnedRanges = binOfflineTable(job, tableId, ranges, callingClass);
                    }
                } else {
                    tl = InputConfigurator.getTabletLocator(callingClass, job, tableId);
                    // its possible that the cache could contain complete, but old information about a
                    // tables
                    // tablets... so clear it
                    tl.invalidateCache();
                    while (!tl.binRanges(context, ranges, binnedRanges).isEmpty()) {
                        context.requireNotDeleted(tableId);
                        context.requireNotOffline(tableId, tableName);
                        binnedRanges.clear();
                        log.warn("Unable to locate bins for specified ranges. Retrying.");
                        // sleep randomly between 100 and 200 ms
                        sleepUninterruptibly(100 + random.nextInt(100), TimeUnit.MILLISECONDS);
                        tl.invalidateCache();
                    }
                }
            } catch (TableOfflineException | TableNotFoundException | AccumuloException | AccumuloSecurityException e) {
                throw new IOException(e);
            }
            HashMap<Range, ArrayList<String>> splitsToAdd = null;
            if (!autoAdjust)
                splitsToAdd = new HashMap<>();
            HashMap<String, String> hostNameCache = new HashMap<>();
            for (Map.Entry<String, Map<KeyExtent, List<Range>>> tserverBin : binnedRanges.entrySet()) {
                String ip = tserverBin.getKey().split(":", 2)[0];
                String location = hostNameCache.get(ip);
                if (location == null) {
                    InetAddress inetAddress = InetAddress.getByName(ip);
                    location = inetAddress.getCanonicalHostName();
                    hostNameCache.put(ip, location);
                }
                for (Map.Entry<KeyExtent, List<Range>> extentRanges : tserverBin.getValue().entrySet()) {
                    Range ke = extentRanges.getKey().toDataRange();
                    if (batchScan) {
                        // group ranges by tablet to be read by a BatchScanner
                        ArrayList<Range> clippedRanges = new ArrayList<>();
                        for (Range r : extentRanges.getValue()) clippedRanges.add(ke.clip(r));
                        BatchInputSplit split = new BatchInputSplit(tableName, tableId, clippedRanges, new String[] { location });
                        SplitUtils.updateSplit(split, tableConfig);
                        splits.add(split);
                    } else {
                        // not grouping by tablet
                        for (Range r : extentRanges.getValue()) {
                            if (autoAdjust) {
                                // divide ranges into smaller ranges, based on the tablets
                                RangeInputSplit split = new RangeInputSplit(tableName, tableId.canonical(), ke.clip(r), new String[] { location });
                                SplitUtils.updateSplit(split, tableConfig);
                                split.setOffline(tableConfig.isOfflineScan());
                                split.setIsolatedScan(tableConfig.shouldUseIsolatedScanners());
                                split.setUsesLocalIterators(tableConfig.shouldUseLocalIterators());
                                splits.add(split);
                            } else {
                                // don't divide ranges
                                ArrayList<String> locations = splitsToAdd.get(r);
                                if (locations == null)
                                    locations = new ArrayList<>(1);
                                locations.add(location);
                                splitsToAdd.put(r, locations);
                            }
                        }
                    }
                }
            }
            if (!autoAdjust)
                for (Map.Entry<Range, ArrayList<String>> entry : splitsToAdd.entrySet()) {
                    RangeInputSplit split = new RangeInputSplit(tableName, tableId.canonical(), entry.getKey(), entry.getValue().toArray(new String[0]));
                    SplitUtils.updateSplit(split, tableConfig);
                    split.setOffline(tableConfig.isOfflineScan());
                    split.setIsolatedScan(tableConfig.shouldUseIsolatedScanners());
                    split.setUsesLocalIterators(tableConfig.shouldUseLocalIterators());
                    splits.add(split);
                }
        }
    }
    return splits.toArray(new InputSplit[splits.size()]);
}
Also used : AccumuloClient(org.apache.accumulo.core.client.AccumuloClient) TableId(org.apache.accumulo.core.data.TableId) TableOfflineException(org.apache.accumulo.core.client.TableOfflineException) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) KeyExtent(org.apache.accumulo.core.dataImpl.KeyExtent) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) List(java.util.List) InputSplit(org.apache.hadoop.mapred.InputSplit) AccumuloException(org.apache.accumulo.core.client.AccumuloException) IOException(java.io.IOException) Range(org.apache.accumulo.core.data.Range) LinkedList(java.util.LinkedList) TabletLocator(org.apache.accumulo.core.clientImpl.TabletLocator) InputTableConfig(org.apache.accumulo.hadoopImpl.mapreduce.InputTableConfig) HashMap(java.util.HashMap) Map(java.util.Map) InetAddress(java.net.InetAddress)

Example 7 with TabletLocator

use of org.apache.accumulo.core.clientImpl.TabletLocator in project accumulo by apache.

the class AbstractInputFormat method getSplits.

/**
 * Gets the splits of the tables that have been set on the job by reading the metadata table for
 * the specified ranges.
 *
 * @return the splits from the tables based on the ranges.
 * @throws java.io.IOException
 *           if a table set on the job doesn't exist or an error occurs initializing the tablet
 *           locator
 */
@Override
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
    Level logLevel = getLogLevel(job);
    log.setLevel(logLevel);
    validateOptions(job);
    LinkedList<InputSplit> splits = new LinkedList<>();
    Map<String, org.apache.accumulo.core.client.mapreduce.InputTableConfig> tableConfigs = getInputTableConfigs(job);
    for (Map.Entry<String, org.apache.accumulo.core.client.mapreduce.InputTableConfig> tableConfigEntry : tableConfigs.entrySet()) {
        String tableName = tableConfigEntry.getKey();
        org.apache.accumulo.core.client.mapreduce.InputTableConfig tableConfig = tableConfigEntry.getValue();
        ClientContext client;
        try {
            client = InputConfigurator.client(CLASS, job);
        } catch (AccumuloException | AccumuloSecurityException e) {
            throw new IOException(e);
        }
        TableId tableId;
        // resolve table name to id once, and use id from this point forward
        try {
            tableId = client.getTableId(tableName);
        } catch (TableNotFoundException e) {
            throw new IOException(e);
        }
        boolean batchScan = InputConfigurator.isBatchScan(CLASS, job);
        boolean supportBatchScan = !(tableConfig.isOfflineScan() || tableConfig.shouldUseIsolatedScanners() || tableConfig.shouldUseLocalIterators());
        if (batchScan && !supportBatchScan)
            throw new IllegalArgumentException("BatchScanner optimization not available for offline" + " scan, isolated, or local iterators");
        boolean autoAdjust = tableConfig.shouldAutoAdjustRanges();
        if (batchScan && !autoAdjust)
            throw new IllegalArgumentException("AutoAdjustRanges must be enabled when using BatchScanner optimization");
        List<Range> ranges = autoAdjust ? Range.mergeOverlapping(tableConfig.getRanges()) : tableConfig.getRanges();
        if (ranges.isEmpty()) {
            ranges = new ArrayList<>(1);
            ranges.add(new Range());
        }
        // get the metadata information for these ranges
        Map<String, Map<KeyExtent, List<Range>>> binnedRanges = new HashMap<>();
        TabletLocator tl;
        try {
            if (tableConfig.isOfflineScan()) {
                binnedRanges = binOfflineTable(job, tableId, ranges);
                while (binnedRanges == null) {
                    // Some tablets were still online, try again
                    // sleep randomly between 100 and 200 ms
                    sleepUninterruptibly(100 + random.nextInt(100), TimeUnit.MILLISECONDS);
                    binnedRanges = binOfflineTable(job, tableId, ranges);
                }
            } else {
                tl = TabletLocator.getLocator(client, tableId);
                // its possible that the cache could contain complete, but old information about a
                // tables tablets... so clear it
                tl.invalidateCache();
                while (!tl.binRanges(client, ranges, binnedRanges).isEmpty()) {
                    client.requireNotDeleted(tableId);
                    client.requireNotOffline(tableId, tableName);
                    binnedRanges.clear();
                    log.warn("Unable to locate bins for specified ranges. Retrying.");
                    // sleep randomly between 100 and 200 ms
                    sleepUninterruptibly(100 + random.nextInt(100), TimeUnit.MILLISECONDS);
                    tl.invalidateCache();
                }
            }
        } catch (Exception e) {
            throw new IOException(e);
        }
        // all of this code will add either range per each locations or split ranges and add
        // range-location split
        // Map from Range to Array of Locations, we only use this if we're don't split
        HashMap<Range, ArrayList<String>> splitsToAdd = null;
        if (!autoAdjust)
            splitsToAdd = new HashMap<>();
        HashMap<String, String> hostNameCache = new HashMap<>();
        for (Map.Entry<String, Map<KeyExtent, List<Range>>> tserverBin : binnedRanges.entrySet()) {
            String ip = tserverBin.getKey().split(":", 2)[0];
            String location = hostNameCache.get(ip);
            if (location == null) {
                InetAddress inetAddress = InetAddress.getByName(ip);
                location = inetAddress.getCanonicalHostName();
                hostNameCache.put(ip, location);
            }
            for (Map.Entry<KeyExtent, List<Range>> extentRanges : tserverBin.getValue().entrySet()) {
                Range ke = extentRanges.getKey().toDataRange();
                if (batchScan) {
                    // group ranges by tablet to be read by a BatchScanner
                    ArrayList<Range> clippedRanges = new ArrayList<>();
                    for (Range r : extentRanges.getValue()) clippedRanges.add(ke.clip(r));
                    org.apache.accumulo.core.clientImpl.mapred.BatchInputSplit split = new org.apache.accumulo.core.clientImpl.mapred.BatchInputSplit(tableName, tableId, clippedRanges, new String[] { location });
                    org.apache.accumulo.core.clientImpl.mapreduce.SplitUtils.updateSplit(split, tableConfig, logLevel);
                    splits.add(split);
                } else {
                    // not grouping by tablet
                    for (Range r : extentRanges.getValue()) {
                        if (autoAdjust) {
                            // divide ranges into smaller ranges, based on the tablets
                            RangeInputSplit split = new RangeInputSplit(tableName, tableId.canonical(), ke.clip(r), new String[] { location });
                            org.apache.accumulo.core.clientImpl.mapreduce.SplitUtils.updateSplit(split, tableConfig, logLevel);
                            split.setOffline(tableConfig.isOfflineScan());
                            split.setIsolatedScan(tableConfig.shouldUseIsolatedScanners());
                            split.setUsesLocalIterators(tableConfig.shouldUseLocalIterators());
                            splits.add(split);
                        } else {
                            // don't divide ranges
                            ArrayList<String> locations = splitsToAdd.get(r);
                            if (locations == null)
                                locations = new ArrayList<>(1);
                            locations.add(location);
                            splitsToAdd.put(r, locations);
                        }
                    }
                }
            }
        }
        if (!autoAdjust)
            for (Map.Entry<Range, ArrayList<String>> entry : splitsToAdd.entrySet()) {
                RangeInputSplit split = new RangeInputSplit(tableName, tableId.canonical(), entry.getKey(), entry.getValue().toArray(new String[0]));
                org.apache.accumulo.core.clientImpl.mapreduce.SplitUtils.updateSplit(split, tableConfig, logLevel);
                split.setOffline(tableConfig.isOfflineScan());
                split.setIsolatedScan(tableConfig.shouldUseIsolatedScanners());
                split.setUsesLocalIterators(tableConfig.shouldUseLocalIterators());
                splits.add(split);
            }
    }
    return splits.toArray(new InputSplit[splits.size()]);
}
Also used : TableId(org.apache.accumulo.core.data.TableId) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) KeyExtent(org.apache.accumulo.core.dataImpl.KeyExtent) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) List(java.util.List) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) InputSplit(org.apache.hadoop.mapred.InputSplit) AccumuloException(org.apache.accumulo.core.client.AccumuloException) ClientContext(org.apache.accumulo.core.clientImpl.ClientContext) IOException(java.io.IOException) Range(org.apache.accumulo.core.data.Range) LinkedList(java.util.LinkedList) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) IOException(java.io.IOException) AccumuloException(org.apache.accumulo.core.client.AccumuloException) TabletLocator(org.apache.accumulo.core.clientImpl.TabletLocator) Level(org.apache.log4j.Level) Map(java.util.Map) HashMap(java.util.HashMap) InetAddress(java.net.InetAddress)

Example 8 with TabletLocator

use of org.apache.accumulo.core.clientImpl.TabletLocator in project accumulo by apache.

the class BulkImporter method importFiles.

public AssignmentStats importFiles(List<String> files) {
    int numThreads = context.getConfiguration().getCount(Property.TSERV_BULK_PROCESS_THREADS);
    int numAssignThreads = context.getConfiguration().getCount(Property.TSERV_BULK_ASSIGNMENT_THREADS);
    timer = new StopWatch<>(Timers.class);
    timer.start(Timers.TOTAL);
    final VolumeManager fs = context.getVolumeManager();
    Set<Path> paths = new HashSet<>();
    for (String file : files) {
        paths.add(new Path(file));
    }
    AssignmentStats assignmentStats = new AssignmentStats(paths.size());
    final Map<Path, List<KeyExtent>> completeFailures = Collections.synchronizedSortedMap(new TreeMap<>());
    ClientService.Client client = null;
    final TabletLocator locator = TabletLocator.getLocator(context, TableId.of(tableId));
    try {
        final Map<Path, List<TabletLocation>> assignments = Collections.synchronizedSortedMap(new TreeMap<>());
        timer.start(Timers.EXAMINE_MAP_FILES);
        ExecutorService threadPool = ThreadPools.createFixedThreadPool(numThreads, "findOverlapping", false);
        for (Path path : paths) {
            final Path mapFile = path;
            Runnable getAssignments = new Runnable() {

                @Override
                public void run() {
                    List<TabletLocation> tabletsToAssignMapFileTo = Collections.emptyList();
                    try {
                        tabletsToAssignMapFileTo = findOverlappingTablets(context, fs, locator, mapFile);
                    } catch (Exception ex) {
                        log.warn("Unable to find tablets that overlap file " + mapFile, ex);
                    }
                    log.debug("Map file {} found to overlap {} tablets", mapFile, tabletsToAssignMapFileTo.size());
                    if (tabletsToAssignMapFileTo.isEmpty()) {
                        List<KeyExtent> empty = Collections.emptyList();
                        completeFailures.put(mapFile, empty);
                    } else
                        assignments.put(mapFile, tabletsToAssignMapFileTo);
                }
            };
            threadPool.submit(getAssignments);
        }
        threadPool.shutdown();
        while (!threadPool.isTerminated()) {
            try {
                threadPool.awaitTermination(60, TimeUnit.SECONDS);
            } catch (InterruptedException e) {
                throw new RuntimeException(e);
            }
        }
        timer.stop(Timers.EXAMINE_MAP_FILES);
        assignmentStats.attemptingAssignments(assignments);
        Map<Path, List<KeyExtent>> assignmentFailures = assignMapFiles(fs, assignments, paths, numAssignThreads, numThreads);
        assignmentStats.assignmentsFailed(assignmentFailures);
        Map<Path, Integer> failureCount = new TreeMap<>();
        for (Entry<Path, List<KeyExtent>> entry : assignmentFailures.entrySet()) failureCount.put(entry.getKey(), 1);
        long sleepTime = 2_000;
        while (!assignmentFailures.isEmpty()) {
            sleepTime = Math.min(sleepTime * 2, MINUTES.toMillis(1));
            locator.invalidateCache();
            // assumption about assignment failures is that it caused by a split
            // happening or a missing location
            // 
            // for splits we need to find children key extents that cover the
            // same key range and are contiguous (no holes, no overlap)
            timer.start(Timers.SLEEP);
            sleepUninterruptibly(sleepTime, TimeUnit.MILLISECONDS);
            timer.stop(Timers.SLEEP);
            log.debug("Trying to assign {} map files that previously failed on some key extents", assignmentFailures.size());
            assignments.clear();
            // assign to
            for (Entry<Path, List<KeyExtent>> entry : assignmentFailures.entrySet()) {
                Iterator<KeyExtent> keListIter = entry.getValue().iterator();
                List<TabletLocation> tabletsToAssignMapFileTo = new ArrayList<>();
                while (keListIter.hasNext()) {
                    KeyExtent ke = keListIter.next();
                    timer.start(Timers.QUERY_METADATA);
                    try {
                        tabletsToAssignMapFileTo.addAll(findOverlappingTablets(context, fs, locator, entry.getKey(), ke));
                        keListIter.remove();
                    } catch (Exception ex) {
                        log.warn("Exception finding overlapping tablets, will retry tablet " + ke, ex);
                    }
                    timer.stop(Timers.QUERY_METADATA);
                }
                if (!tabletsToAssignMapFileTo.isEmpty())
                    assignments.put(entry.getKey(), tabletsToAssignMapFileTo);
            }
            assignmentStats.attemptingAssignments(assignments);
            Map<Path, List<KeyExtent>> assignmentFailures2 = assignMapFiles(fs, assignments, paths, numAssignThreads, numThreads);
            assignmentStats.assignmentsFailed(assignmentFailures2);
            // merge assignmentFailures2 into assignmentFailures
            for (Entry<Path, List<KeyExtent>> entry : assignmentFailures2.entrySet()) {
                assignmentFailures.get(entry.getKey()).addAll(entry.getValue());
                Integer fc = failureCount.get(entry.getKey());
                if (fc == null)
                    fc = 0;
                failureCount.put(entry.getKey(), fc + 1);
            }
            // remove map files that have no more key extents to assign
            assignmentFailures.values().removeIf(List::isEmpty);
            Set<Entry<Path, Integer>> failureIter = failureCount.entrySet();
            for (Entry<Path, Integer> entry : failureIter) {
                int retries = context.getConfiguration().getCount(Property.TSERV_BULK_RETRY);
                if (entry.getValue() > retries && assignmentFailures.get(entry.getKey()) != null) {
                    log.error("Map file {} failed more than {} times, giving up.", entry.getKey(), retries);
                    completeFailures.put(entry.getKey(), assignmentFailures.get(entry.getKey()));
                    assignmentFailures.remove(entry.getKey());
                }
            }
        }
        assignmentStats.assignmentsAbandoned(completeFailures);
        Set<Path> failedFailures = processFailures(completeFailures);
        assignmentStats.unrecoveredMapFiles(failedFailures);
        timer.stop(Timers.TOTAL);
        printReport(paths);
        return assignmentStats;
    } finally {
        if (client != null) {
            ServerClient.close(client, context);
        }
    }
}
Also used : VolumeManager(org.apache.accumulo.server.fs.VolumeManager) ArrayList(java.util.ArrayList) TKeyExtent(org.apache.accumulo.core.dataImpl.thrift.TKeyExtent) KeyExtent(org.apache.accumulo.core.dataImpl.KeyExtent) Entry(java.util.Map.Entry) TabletLocation(org.apache.accumulo.core.clientImpl.TabletLocator.TabletLocation) List(java.util.List) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Path(org.apache.hadoop.fs.Path) TabletClientService(org.apache.accumulo.core.tabletserver.thrift.TabletClientService) ClientService(org.apache.accumulo.core.clientImpl.thrift.ClientService) TreeMap(java.util.TreeMap) ThriftSecurityException(org.apache.accumulo.core.clientImpl.thrift.ThriftSecurityException) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) IOException(java.io.IOException) AccumuloException(org.apache.accumulo.core.client.AccumuloException) TabletLocator(org.apache.accumulo.core.clientImpl.TabletLocator) ExecutorService(java.util.concurrent.ExecutorService)

Aggregations

TabletLocator (org.apache.accumulo.core.clientImpl.TabletLocator)8 ArrayList (java.util.ArrayList)7 IOException (java.io.IOException)6 List (java.util.List)6 AccumuloException (org.apache.accumulo.core.client.AccumuloException)6 AccumuloSecurityException (org.apache.accumulo.core.client.AccumuloSecurityException)6 KeyExtent (org.apache.accumulo.core.dataImpl.KeyExtent)6 HashMap (java.util.HashMap)5 Map (java.util.Map)5 Range (org.apache.accumulo.core.data.Range)5 InetAddress (java.net.InetAddress)4 LinkedList (java.util.LinkedList)4 TableNotFoundException (org.apache.accumulo.core.client.TableNotFoundException)4 TableId (org.apache.accumulo.core.data.TableId)4 ClientContext (org.apache.accumulo.core.clientImpl.ClientContext)3 HashSet (java.util.HashSet)2 Entry (java.util.Map.Entry)2 TreeMap (java.util.TreeMap)2 AccumuloClient (org.apache.accumulo.core.client.AccumuloClient)2 TableOfflineException (org.apache.accumulo.core.client.TableOfflineException)2