Search in sources :

Example 1 with ScannerImpl

use of org.apache.accumulo.core.clientImpl.ScannerImpl in project accumulo by apache.

the class ManagerMetadataUtil method fixSplit.

private static KeyExtent fixSplit(ServerContext context, TableId tableId, Text metadataEntry, Text metadataPrevEndRow, Text oper, double splitRatio, ServiceLock lock) throws AccumuloException {
    if (metadataPrevEndRow == null)
        // prev end row....
        throw new AccumuloException("Split tablet does not have prev end row, something is amiss, extent = " + metadataEntry);
    // check to see if prev tablet exist in metadata tablet
    Key prevRowKey = new Key(new Text(TabletsSection.encodeRow(tableId, metadataPrevEndRow)));
    try (ScannerImpl scanner2 = new ScannerImpl(context, MetadataTable.ID, Authorizations.EMPTY)) {
        scanner2.setRange(new Range(prevRowKey, prevRowKey.followingKey(PartialKey.ROW)));
        if (scanner2.iterator().hasNext()) {
            log.info("Finishing incomplete split {} {}", metadataEntry, metadataPrevEndRow);
            List<StoredTabletFile> highDatafilesToRemove = new ArrayList<>();
            SortedMap<StoredTabletFile, DataFileValue> origDatafileSizes = new TreeMap<>();
            SortedMap<StoredTabletFile, DataFileValue> highDatafileSizes = new TreeMap<>();
            SortedMap<StoredTabletFile, DataFileValue> lowDatafileSizes = new TreeMap<>();
            Key rowKey = new Key(metadataEntry);
            try (Scanner scanner3 = new ScannerImpl(context, MetadataTable.ID, Authorizations.EMPTY)) {
                scanner3.fetchColumnFamily(DataFileColumnFamily.NAME);
                scanner3.setRange(new Range(rowKey, rowKey.followingKey(PartialKey.ROW)));
                for (Entry<Key, Value> entry : scanner3) {
                    if (entry.getKey().compareColumnFamily(DataFileColumnFamily.NAME) == 0) {
                        StoredTabletFile stf = new StoredTabletFile(entry.getKey().getColumnQualifierData().toString());
                        origDatafileSizes.put(stf, new DataFileValue(entry.getValue().get()));
                    }
                }
            }
            MetadataTableUtil.splitDatafiles(metadataPrevEndRow, splitRatio, new HashMap<>(), origDatafileSizes, lowDatafileSizes, highDatafileSizes, highDatafilesToRemove);
            MetadataTableUtil.finishSplit(metadataEntry, highDatafileSizes, highDatafilesToRemove, context, lock);
            return KeyExtent.fromMetaRow(rowKey.getRow(), metadataPrevEndRow);
        } else {
            log.info("Rolling back incomplete split {} {}", metadataEntry, metadataPrevEndRow);
            MetadataTableUtil.rollBackSplit(metadataEntry, oper, context, lock);
            return KeyExtent.fromMetaRow(metadataEntry, oper);
        }
    }
}
Also used : AccumuloException(org.apache.accumulo.core.client.AccumuloException) Scanner(org.apache.accumulo.core.client.Scanner) DataFileValue(org.apache.accumulo.core.metadata.schema.DataFileValue) ArrayList(java.util.ArrayList) Text(org.apache.hadoop.io.Text) Range(org.apache.accumulo.core.data.Range) TreeMap(java.util.TreeMap) ScannerImpl(org.apache.accumulo.core.clientImpl.ScannerImpl) DataFileValue(org.apache.accumulo.core.metadata.schema.DataFileValue) Value(org.apache.accumulo.core.data.Value) StoredTabletFile(org.apache.accumulo.core.metadata.StoredTabletFile) Key(org.apache.accumulo.core.data.Key) PartialKey(org.apache.accumulo.core.data.PartialKey)

Example 2 with ScannerImpl

use of org.apache.accumulo.core.clientImpl.ScannerImpl in project accumulo by apache.

the class AccumuloRecordReader method initialize.

/**
 * Initialize a scanner over the given input split using this task attempt configuration.
 */
public void initialize(InputSplit inSplit, JobConf job) throws IOException {
    baseSplit = (org.apache.accumulo.hadoopImpl.mapreduce.RangeInputSplit) inSplit;
    log.debug("Initializing input split: " + baseSplit);
    client = createClient(job, CLASS);
    ClientContext context = (ClientContext) client;
    Authorizations authorizations = InputConfigurator.getScanAuthorizations(CLASS, job);
    String classLoaderContext = InputConfigurator.getClassLoaderContext(CLASS, job);
    String table = baseSplit.getTableName();
    // in case the table name changed, we can still use the previous name for terms of
    // configuration, but the scanner will use the table id resolved at job setup time
    InputTableConfig tableConfig = InputConfigurator.getInputTableConfig(CLASS, job, baseSplit.getTableName());
    log.debug("Created client with user: " + context.whoami());
    log.debug("Creating scanner for table: " + table);
    log.debug("Authorizations are: " + authorizations);
    if (baseSplit instanceof BatchInputSplit) {
        BatchScanner scanner;
        BatchInputSplit multiRangeSplit = (BatchInputSplit) baseSplit;
        try {
            // Note: BatchScanner will use at most one thread per tablet, currently BatchInputSplit
            // will not span tablets
            int scanThreads = 1;
            scanner = context.createBatchScanner(baseSplit.getTableName(), authorizations, scanThreads);
            setupIterators(job, scanner, baseSplit);
            if (classLoaderContext != null) {
                scanner.setClassLoaderContext(classLoaderContext);
            }
        } catch (TableNotFoundException e) {
            throw new IOException(e);
        }
        scanner.setRanges(multiRangeSplit.getRanges());
        scannerBase = scanner;
    } else if (baseSplit instanceof RangeInputSplit) {
        split = (RangeInputSplit) baseSplit;
        Boolean isOffline = baseSplit.isOffline();
        if (isOffline == null) {
            isOffline = tableConfig.isOfflineScan();
        }
        Boolean isIsolated = baseSplit.isIsolatedScan();
        if (isIsolated == null) {
            isIsolated = tableConfig.shouldUseIsolatedScanners();
        }
        Boolean usesLocalIterators = baseSplit.usesLocalIterators();
        if (usesLocalIterators == null) {
            usesLocalIterators = tableConfig.shouldUseLocalIterators();
        }
        Scanner scanner;
        try {
            if (isOffline) {
                scanner = new OfflineScanner(context, TableId.of(baseSplit.getTableId()), authorizations);
            } else {
                scanner = new ScannerImpl(context, TableId.of(baseSplit.getTableId()), authorizations);
            }
            if (isIsolated) {
                log.info("Creating isolated scanner");
                scanner = new IsolatedScanner(scanner);
            }
            if (usesLocalIterators) {
                log.info("Using local iterators");
                scanner = new ClientSideIteratorScanner(scanner);
            }
            setupIterators(job, scanner, baseSplit);
        } catch (RuntimeException e) {
            throw new IOException(e);
        }
        scanner.setRange(baseSplit.getRange());
        scannerBase = scanner;
    } else {
        throw new IllegalArgumentException("Can not initialize from " + baseSplit.getClass());
    }
    Collection<IteratorSetting.Column> columns = baseSplit.getFetchedColumns();
    if (columns == null) {
        columns = tableConfig.getFetchedColumns();
    }
    // setup a scanner within the bounds of this split
    for (Pair<Text, Text> c : columns) {
        if (c.getSecond() != null) {
            log.debug("Fetching column " + c.getFirst() + ":" + c.getSecond());
            scannerBase.fetchColumn(c.getFirst(), c.getSecond());
        } else {
            log.debug("Fetching column family " + c.getFirst());
            scannerBase.fetchColumnFamily(c.getFirst());
        }
    }
    SamplerConfiguration samplerConfig = baseSplit.getSamplerConfiguration();
    if (samplerConfig == null) {
        samplerConfig = tableConfig.getSamplerConfiguration();
    }
    if (samplerConfig != null) {
        scannerBase.setSamplerConfiguration(samplerConfig);
    }
    Map<String, String> executionHints = baseSplit.getExecutionHints();
    if (executionHints == null || executionHints.isEmpty()) {
        executionHints = tableConfig.getExecutionHints();
    }
    if (executionHints != null) {
        scannerBase.setExecutionHints(executionHints);
    }
    scannerIterator = scannerBase.iterator();
    numKeysRead = 0;
}
Also used : BatchScanner(org.apache.accumulo.core.client.BatchScanner) OfflineScanner(org.apache.accumulo.core.clientImpl.OfflineScanner) ClientSideIteratorScanner(org.apache.accumulo.core.client.ClientSideIteratorScanner) IsolatedScanner(org.apache.accumulo.core.client.IsolatedScanner) Scanner(org.apache.accumulo.core.client.Scanner) BatchScanner(org.apache.accumulo.core.client.BatchScanner) SamplerConfiguration(org.apache.accumulo.core.client.sample.SamplerConfiguration) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) OfflineScanner(org.apache.accumulo.core.clientImpl.OfflineScanner) ClientSideIteratorScanner(org.apache.accumulo.core.client.ClientSideIteratorScanner) Authorizations(org.apache.accumulo.core.security.Authorizations) ClientContext(org.apache.accumulo.core.clientImpl.ClientContext) Text(org.apache.hadoop.io.Text) IOException(java.io.IOException) ScannerImpl(org.apache.accumulo.core.clientImpl.ScannerImpl) InputTableConfig(org.apache.accumulo.hadoopImpl.mapreduce.InputTableConfig) IsolatedScanner(org.apache.accumulo.core.client.IsolatedScanner)

Example 3 with ScannerImpl

use of org.apache.accumulo.core.clientImpl.ScannerImpl in project accumulo by apache.

the class AccumuloRecordReader method initialize.

@Override
public void initialize(InputSplit inSplit, TaskAttemptContext attempt) throws IOException {
    split = (RangeInputSplit) inSplit;
    log.debug("Initializing input split: " + split);
    Configuration conf = attempt.getConfiguration();
    client = createClient(attempt, this.CLASS);
    ClientContext context = (ClientContext) client;
    Authorizations authorizations = InputConfigurator.getScanAuthorizations(CLASS, conf);
    String classLoaderContext = InputConfigurator.getClassLoaderContext(CLASS, conf);
    String table = split.getTableName();
    // in case the table name changed, we can still use the previous name for terms of
    // configuration,
    // but the scanner will use the table id resolved at job setup time
    InputTableConfig tableConfig = InputConfigurator.getInputTableConfig(CLASS, conf, split.getTableName());
    log.debug("Creating client with user: " + client.whoami());
    log.debug("Creating scanner for table: " + table);
    log.debug("Authorizations are: " + authorizations);
    if (split instanceof BatchInputSplit) {
        BatchInputSplit batchSplit = (BatchInputSplit) split;
        BatchScanner scanner;
        try {
            // Note: BatchScanner will use at most one thread per tablet, currently BatchInputSplit
            // will not span tablets
            int scanThreads = 1;
            scanner = context.createBatchScanner(split.getTableName(), authorizations, scanThreads);
            setupIterators(attempt, scanner, split);
            if (classLoaderContext != null) {
                scanner.setClassLoaderContext(classLoaderContext);
            }
        } catch (TableNotFoundException e) {
            e.printStackTrace();
            throw new IOException(e);
        }
        scanner.setRanges(batchSplit.getRanges());
        scannerBase = scanner;
    } else {
        Scanner scanner;
        Boolean isOffline = split.isOffline();
        if (isOffline == null) {
            isOffline = tableConfig.isOfflineScan();
        }
        Boolean isIsolated = split.isIsolatedScan();
        if (isIsolated == null) {
            isIsolated = tableConfig.shouldUseIsolatedScanners();
        }
        Boolean usesLocalIterators = split.usesLocalIterators();
        if (usesLocalIterators == null) {
            usesLocalIterators = tableConfig.shouldUseLocalIterators();
        }
        try {
            if (isOffline) {
                scanner = new OfflineScanner(context, TableId.of(split.getTableId()), authorizations);
            } else {
                // Not using public API to create scanner so that we can use table ID
                // Table ID is used in case of renames during M/R job
                scanner = new ScannerImpl(context, TableId.of(split.getTableId()), authorizations);
            }
            if (isIsolated) {
                log.info("Creating isolated scanner");
                scanner = new IsolatedScanner(scanner);
            }
            if (usesLocalIterators) {
                log.info("Using local iterators");
                scanner = new ClientSideIteratorScanner(scanner);
            }
            setupIterators(attempt, scanner, split);
        } catch (RuntimeException e) {
            throw new IOException(e);
        }
        scanner.setRange(split.getRange());
        scannerBase = scanner;
    }
    Collection<IteratorSetting.Column> columns = split.getFetchedColumns();
    if (columns == null) {
        columns = tableConfig.getFetchedColumns();
    }
    // setup a scanner within the bounds of this split
    for (Pair<Text, Text> c : columns) {
        if (c.getSecond() != null) {
            log.debug("Fetching column " + c.getFirst() + ":" + c.getSecond());
            scannerBase.fetchColumn(c.getFirst(), c.getSecond());
        } else {
            log.debug("Fetching column family " + c.getFirst());
            scannerBase.fetchColumnFamily(c.getFirst());
        }
    }
    SamplerConfiguration samplerConfig = split.getSamplerConfiguration();
    if (samplerConfig == null) {
        samplerConfig = tableConfig.getSamplerConfiguration();
    }
    if (samplerConfig != null) {
        scannerBase.setSamplerConfiguration(samplerConfig);
    }
    Map<String, String> executionHints = split.getExecutionHints();
    if (executionHints == null || executionHints.isEmpty()) {
        executionHints = tableConfig.getExecutionHints();
    }
    if (executionHints != null) {
        scannerBase.setExecutionHints(executionHints);
    }
    scannerIterator = scannerBase.iterator();
    numKeysRead = 0;
}
Also used : ClientSideIteratorScanner(org.apache.accumulo.core.client.ClientSideIteratorScanner) BatchScanner(org.apache.accumulo.core.client.BatchScanner) OfflineScanner(org.apache.accumulo.core.clientImpl.OfflineScanner) ClientSideIteratorScanner(org.apache.accumulo.core.client.ClientSideIteratorScanner) IsolatedScanner(org.apache.accumulo.core.client.IsolatedScanner) Scanner(org.apache.accumulo.core.client.Scanner) Authorizations(org.apache.accumulo.core.security.Authorizations) Configuration(org.apache.hadoop.conf.Configuration) SamplerConfiguration(org.apache.accumulo.core.client.sample.SamplerConfiguration) ClientContext(org.apache.accumulo.core.clientImpl.ClientContext) BatchScanner(org.apache.accumulo.core.client.BatchScanner) SamplerConfiguration(org.apache.accumulo.core.client.sample.SamplerConfiguration) Text(org.apache.hadoop.io.Text) IOException(java.io.IOException) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) ScannerImpl(org.apache.accumulo.core.clientImpl.ScannerImpl) OfflineScanner(org.apache.accumulo.core.clientImpl.OfflineScanner) IsolatedScanner(org.apache.accumulo.core.client.IsolatedScanner)

Example 4 with ScannerImpl

use of org.apache.accumulo.core.clientImpl.ScannerImpl in project accumulo by apache.

the class MetadataTableUtil method deleteTable.

public static void deleteTable(TableId tableId, boolean insertDeletes, ServerContext context, ServiceLock lock) throws AccumuloException {
    try (Scanner ms = new ScannerImpl(context, MetadataTable.ID, Authorizations.EMPTY);
        BatchWriter bw = new BatchWriterImpl(context, MetadataTable.ID, new BatchWriterConfig().setMaxMemory(1000000).setMaxLatency(120000L, TimeUnit.MILLISECONDS).setMaxWriteThreads(2))) {
        // scan metadata for our table and delete everything we find
        Mutation m = null;
        Ample ample = context.getAmple();
        ms.setRange(new KeyExtent(tableId, null, null).toMetaRange());
        // insert deletes before deleting data from metadata... this makes the code fault tolerant
        if (insertDeletes) {
            ms.fetchColumnFamily(DataFileColumnFamily.NAME);
            ServerColumnFamily.DIRECTORY_COLUMN.fetch(ms);
            for (Entry<Key, Value> cell : ms) {
                Key key = cell.getKey();
                if (key.getColumnFamily().equals(DataFileColumnFamily.NAME)) {
                    String ref = TabletFileUtil.validate(key.getColumnQualifierData().toString());
                    bw.addMutation(ample.createDeleteMutation(ref));
                }
                if (ServerColumnFamily.DIRECTORY_COLUMN.hasColumns(key)) {
                    String uri = GcVolumeUtil.getDeleteTabletOnAllVolumesUri(tableId, cell.getValue().toString());
                    bw.addMutation(ample.createDeleteMutation(uri));
                }
            }
            bw.flush();
            ms.clearColumns();
        }
        for (Entry<Key, Value> cell : ms) {
            Key key = cell.getKey();
            if (m == null) {
                m = new Mutation(key.getRow());
                if (lock != null)
                    putLockID(context, lock, m);
            }
            if (key.getRow().compareTo(m.getRow(), 0, m.getRow().length) != 0) {
                bw.addMutation(m);
                m = new Mutation(key.getRow());
                if (lock != null)
                    putLockID(context, lock, m);
            }
            m.putDelete(key.getColumnFamily(), key.getColumnQualifier());
        }
        if (m != null)
            bw.addMutation(m);
    }
}
Also used : IsolatedScanner(org.apache.accumulo.core.client.IsolatedScanner) Scanner(org.apache.accumulo.core.client.Scanner) ScannerImpl(org.apache.accumulo.core.clientImpl.ScannerImpl) BatchWriterImpl(org.apache.accumulo.core.clientImpl.BatchWriterImpl) Value(org.apache.accumulo.core.data.Value) DataFileValue(org.apache.accumulo.core.metadata.schema.DataFileValue) BatchWriterConfig(org.apache.accumulo.core.client.BatchWriterConfig) Ample(org.apache.accumulo.core.metadata.schema.Ample) BatchWriter(org.apache.accumulo.core.client.BatchWriter) Mutation(org.apache.accumulo.core.data.Mutation) KeyExtent(org.apache.accumulo.core.dataImpl.KeyExtent) Key(org.apache.accumulo.core.data.Key)

Example 5 with ScannerImpl

use of org.apache.accumulo.core.clientImpl.ScannerImpl in project accumulo by apache.

the class SplitRecoveryIT method ensureTabletHasNoUnexpectedMetadataEntries.

private void ensureTabletHasNoUnexpectedMetadataEntries(ServerContext context, KeyExtent extent, SortedMap<StoredTabletFile, DataFileValue> expectedMapFiles) throws Exception {
    try (Scanner scanner = new ScannerImpl(context, MetadataTable.ID, Authorizations.EMPTY)) {
        scanner.setRange(extent.toMetaRange());
        HashSet<ColumnFQ> expectedColumns = new HashSet<>();
        expectedColumns.add(ServerColumnFamily.DIRECTORY_COLUMN);
        expectedColumns.add(TabletColumnFamily.PREV_ROW_COLUMN);
        expectedColumns.add(ServerColumnFamily.TIME_COLUMN);
        expectedColumns.add(ServerColumnFamily.LOCK_COLUMN);
        HashSet<Text> expectedColumnFamilies = new HashSet<>();
        expectedColumnFamilies.add(DataFileColumnFamily.NAME);
        expectedColumnFamilies.add(FutureLocationColumnFamily.NAME);
        expectedColumnFamilies.add(CurrentLocationColumnFamily.NAME);
        expectedColumnFamilies.add(LastLocationColumnFamily.NAME);
        expectedColumnFamilies.add(BulkFileColumnFamily.NAME);
        Iterator<Entry<Key, Value>> iter = scanner.iterator();
        boolean sawPer = false;
        while (iter.hasNext()) {
            Entry<Key, Value> entry = iter.next();
            Key key = entry.getKey();
            if (!key.getRow().equals(extent.toMetaRow())) {
                throw new Exception("Tablet " + extent + " contained unexpected " + MetadataTable.NAME + " entry " + key);
            }
            if (TabletColumnFamily.PREV_ROW_COLUMN.hasColumns(key)) {
                sawPer = true;
                if (!KeyExtent.fromMetaPrevRow(entry).equals(extent)) {
                    throw new Exception("Unexpected prev end row " + entry);
                }
            }
            if (expectedColumnFamilies.contains(key.getColumnFamily())) {
                continue;
            }
            if (expectedColumns.remove(new ColumnFQ(key))) {
                continue;
            }
            throw new Exception("Tablet " + extent + " contained unexpected " + MetadataTable.NAME + " entry " + key);
        }
        if (expectedColumns.size() > 1 || (expectedColumns.size() == 1)) {
            throw new Exception("Not all expected columns seen " + extent + " " + expectedColumns);
        }
        assertTrue(sawPer);
        SortedMap<StoredTabletFile, DataFileValue> fixedMapFiles = MetadataTableUtil.getFileAndLogEntries(context, extent).getSecond();
        verifySame(expectedMapFiles, fixedMapFiles);
    }
}
Also used : Scanner(org.apache.accumulo.core.client.Scanner) DataFileValue(org.apache.accumulo.core.metadata.schema.DataFileValue) Text(org.apache.hadoop.io.Text) ColumnFQ(org.apache.accumulo.core.util.ColumnFQ) ScannerImpl(org.apache.accumulo.core.clientImpl.ScannerImpl) Entry(java.util.Map.Entry) Value(org.apache.accumulo.core.data.Value) DataFileValue(org.apache.accumulo.core.metadata.schema.DataFileValue) StoredTabletFile(org.apache.accumulo.core.metadata.StoredTabletFile) Key(org.apache.accumulo.core.data.Key) HashSet(java.util.HashSet)

Aggregations

Scanner (org.apache.accumulo.core.client.Scanner)5 ScannerImpl (org.apache.accumulo.core.clientImpl.ScannerImpl)5 Text (org.apache.hadoop.io.Text)4 IsolatedScanner (org.apache.accumulo.core.client.IsolatedScanner)3 Key (org.apache.accumulo.core.data.Key)3 Value (org.apache.accumulo.core.data.Value)3 DataFileValue (org.apache.accumulo.core.metadata.schema.DataFileValue)3 IOException (java.io.IOException)2 BatchScanner (org.apache.accumulo.core.client.BatchScanner)2 ClientSideIteratorScanner (org.apache.accumulo.core.client.ClientSideIteratorScanner)2 TableNotFoundException (org.apache.accumulo.core.client.TableNotFoundException)2 SamplerConfiguration (org.apache.accumulo.core.client.sample.SamplerConfiguration)2 ClientContext (org.apache.accumulo.core.clientImpl.ClientContext)2 OfflineScanner (org.apache.accumulo.core.clientImpl.OfflineScanner)2 StoredTabletFile (org.apache.accumulo.core.metadata.StoredTabletFile)2 Authorizations (org.apache.accumulo.core.security.Authorizations)2 ArrayList (java.util.ArrayList)1 HashSet (java.util.HashSet)1 Entry (java.util.Map.Entry)1 TreeMap (java.util.TreeMap)1