use of org.apache.accumulo.core.client.SampleNotPresentException in project accumulo-examples by apache.
the class SampleExample method main.
public static void main(String[] args) throws Exception {
Opts opts = new Opts();
BatchWriterOpts bwOpts = new BatchWriterOpts();
opts.parseArgs(RandomBatchWriter.class.getName(), args, bwOpts);
Connector conn = opts.getConnector();
if (!conn.tableOperations().exists(opts.getTableName())) {
conn.tableOperations().create(opts.getTableName());
} else {
System.out.println("Table exists, not doing anything.");
return;
}
// write some data
BatchWriter bw = conn.createBatchWriter(opts.getTableName(), bwOpts.getBatchWriterConfig());
bw.addMutation(createMutation("9225", "abcde", "file://foo.txt"));
bw.addMutation(createMutation("8934", "accumulo scales", "file://accumulo_notes.txt"));
bw.addMutation(createMutation("2317", "milk, eggs, bread, parmigiano-reggiano", "file://groceries/9/txt"));
bw.addMutation(createMutation("3900", "EC2 ate my homework", "file://final_project.txt"));
bw.flush();
SamplerConfiguration sc1 = new SamplerConfiguration(RowSampler.class.getName());
sc1.setOptions(ImmutableMap.of("hasher", "murmur3_32", "modulus", "3"));
conn.tableOperations().setSamplerConfiguration(opts.getTableName(), sc1);
Scanner scanner = conn.createScanner(opts.getTableName(), Authorizations.EMPTY);
System.out.println("Scanning all data :");
print(scanner);
System.out.println();
System.out.println("Scanning with sampler configuration. Data was written before sampler was set on table, scan should fail.");
scanner.setSamplerConfiguration(sc1);
try {
print(scanner);
} catch (SampleNotPresentException e) {
System.out.println(" Saw sample not present exception as expected.");
}
System.out.println();
// compact table to recreate sample data
conn.tableOperations().compact(opts.getTableName(), new CompactionConfig().setCompactionStrategy(NO_SAMPLE_STRATEGY));
System.out.println("Scanning after compaction (compaction should have created sample data) : ");
print(scanner);
System.out.println();
// update a document in the sample data
bw.addMutation(createMutation("2317", "milk, eggs, bread, parmigiano-reggiano, butter", "file://groceries/9/txt"));
bw.close();
System.out.println("Scanning sample after updating content for docId 2317 (should see content change in sample data) : ");
print(scanner);
System.out.println();
// change tables sampling configuration...
SamplerConfiguration sc2 = new SamplerConfiguration(RowSampler.class.getName());
sc2.setOptions(ImmutableMap.of("hasher", "murmur3_32", "modulus", "2"));
conn.tableOperations().setSamplerConfiguration(opts.getTableName(), sc2);
// compact table to recreate sample data using new configuration
conn.tableOperations().compact(opts.getTableName(), new CompactionConfig().setCompactionStrategy(NO_SAMPLE_STRATEGY));
System.out.println("Scanning with old sampler configuration. Sample data was created using new configuration with a compaction. Scan should fail.");
try {
// try scanning with old sampler configuration
print(scanner);
} catch (SampleNotPresentException e) {
System.out.println(" Saw sample not present exception as expected ");
}
System.out.println();
// update expected sampler configuration on scanner
scanner.setSamplerConfiguration(sc2);
System.out.println("Scanning with new sampler configuration : ");
print(scanner);
System.out.println();
}
use of org.apache.accumulo.core.client.SampleNotPresentException in project accumulo by apache.
the class LookupTask method run.
@Override
public void run() {
MultiScanSession session = (MultiScanSession) server.getSession(scanID);
String oldThreadName = Thread.currentThread().getName();
try {
if (isCancelled() || session == null)
return;
TableConfiguration acuTableConf = server.getTableConfiguration(session.threadPoolExtent);
long maxResultsSize = acuTableConf.getAsBytes(Property.TABLE_SCAN_MAXMEM);
runState.set(ScanRunState.RUNNING);
Thread.currentThread().setName("Client: " + session.client + " User: " + session.getUser() + " Start: " + session.startTime + " Table: ");
long bytesAdded = 0;
long maxScanTime = 4000;
long startTime = System.currentTimeMillis();
List<KVEntry> results = new ArrayList<>();
Map<KeyExtent, List<Range>> failures = new HashMap<>();
List<KeyExtent> fullScans = new ArrayList<>();
KeyExtent partScan = null;
Key partNextKey = null;
boolean partNextKeyInclusive = false;
Iterator<Entry<KeyExtent, List<Range>>> iter = session.queries.entrySet().iterator();
// check the time so that the read ahead thread is not monopolized
while (iter.hasNext() && bytesAdded < maxResultsSize && (System.currentTimeMillis() - startTime) < maxScanTime) {
Entry<KeyExtent, List<Range>> entry = iter.next();
iter.remove();
// check that tablet server is serving requested tablet
Tablet tablet = server.getOnlineTablet(entry.getKey());
if (tablet == null) {
failures.put(entry.getKey(), entry.getValue());
continue;
}
Thread.currentThread().setName("Client: " + session.client + " User: " + session.getUser() + " Start: " + session.startTime + " Tablet: " + entry.getKey().toString());
LookupResult lookupResult;
try {
// canceled
if (isCancelled())
interruptFlag.set(true);
lookupResult = tablet.lookup(entry.getValue(), session.columnSet, session.auths, results, maxResultsSize - bytesAdded, session.ssiList, session.ssio, interruptFlag, session.samplerConfig, session.batchTimeOut, session.context);
// if the tablet was closed it it possible that the
// interrupt flag was set.... do not want it set for
// the next
// lookup
interruptFlag.set(false);
} catch (IOException e) {
log.warn("lookup failed for tablet " + entry.getKey(), e);
throw new RuntimeException(e);
}
bytesAdded += lookupResult.bytesAdded;
if (lookupResult.unfinishedRanges.size() > 0) {
if (lookupResult.closed) {
failures.put(entry.getKey(), lookupResult.unfinishedRanges);
} else {
session.queries.put(entry.getKey(), lookupResult.unfinishedRanges);
partScan = entry.getKey();
partNextKey = lookupResult.unfinishedRanges.get(0).getStartKey();
partNextKeyInclusive = lookupResult.unfinishedRanges.get(0).isStartKeyInclusive();
}
} else {
fullScans.add(entry.getKey());
}
}
long finishTime = System.currentTimeMillis();
session.totalLookupTime += (finishTime - startTime);
session.numEntries += results.size();
// convert everything to thrift before adding result
List<TKeyValue> retResults = new ArrayList<>();
for (KVEntry entry : results) retResults.add(new TKeyValue(entry.getKey().toThrift(), ByteBuffer.wrap(entry.getValue().get())));
Map<TKeyExtent, List<TRange>> retFailures = Translator.translate(failures, Translators.KET, new Translator.ListTranslator<>(Translators.RT));
List<TKeyExtent> retFullScans = Translator.translate(fullScans, Translators.KET);
TKeyExtent retPartScan = null;
TKey retPartNextKey = null;
if (partScan != null) {
retPartScan = partScan.toThrift();
retPartNextKey = partNextKey.toThrift();
}
// add results to queue
addResult(new MultiScanResult(retResults, retFailures, retFullScans, retPartScan, retPartNextKey, partNextKeyInclusive, session.queries.size() != 0));
} catch (IterationInterruptedException iie) {
if (!isCancelled()) {
log.warn("Iteration interrupted, when scan not cancelled", iie);
addResult(iie);
}
} catch (SampleNotPresentException e) {
addResult(e);
} catch (Throwable e) {
log.warn("exception while doing multi-scan ", e);
addResult(e);
} finally {
Thread.currentThread().setName(oldThreadName);
runState.set(ScanRunState.FINISHED);
}
}
use of org.apache.accumulo.core.client.SampleNotPresentException in project accumulo by apache.
the class NextBatchTask method run.
@Override
public void run() {
final ScanSession scanSession = (ScanSession) server.getSession(scanID);
String oldThreadName = Thread.currentThread().getName();
try {
if (isCancelled() || scanSession == null)
return;
runState.set(ScanRunState.RUNNING);
Thread.currentThread().setName("User: " + scanSession.getUser() + " Start: " + scanSession.startTime + " Client: " + scanSession.client + " Tablet: " + scanSession.extent);
Tablet tablet = server.getOnlineTablet(scanSession.extent);
if (tablet == null) {
addResult(new org.apache.accumulo.core.tabletserver.thrift.NotServingTabletException(scanSession.extent.toThrift()));
return;
}
long t1 = System.currentTimeMillis();
ScanBatch batch = scanSession.scanner.read();
long t2 = System.currentTimeMillis();
scanSession.nbTimes.addStat(t2 - t1);
// there should only be one thing on the queue at a time, so
// it should be ok to call add()
// instead of put()... if add() fails because queue is at
// capacity it means there is code
// problem somewhere
addResult(batch);
} catch (TabletClosedException e) {
addResult(new org.apache.accumulo.core.tabletserver.thrift.NotServingTabletException(scanSession.extent.toThrift()));
} catch (IterationInterruptedException iie) {
if (!isCancelled()) {
log.warn("Iteration interrupted, when scan not cancelled", iie);
addResult(iie);
}
} catch (TooManyFilesException | SampleNotPresentException e) {
addResult(e);
} catch (OutOfMemoryError ome) {
Halt.halt("Ran out of memory scanning " + scanSession.extent + " for " + scanSession.client, 1);
addResult(ome);
} catch (Throwable e) {
log.warn("exception while scanning tablet " + (scanSession == null ? "(unknown)" : scanSession.extent), e);
addResult(e);
} finally {
runState.set(ScanRunState.FINISHED);
Thread.currentThread().setName(oldThreadName);
}
}
use of org.apache.accumulo.core.client.SampleNotPresentException in project accumulo by apache.
the class ScanCommand method setupSampling.
protected void setupSampling(final String tableName, final CommandLine cl, final Shell shellState, ScannerBase scanner) throws TableNotFoundException, AccumuloException, AccumuloSecurityException {
if (getUseSample(cl)) {
SamplerConfiguration samplerConfig = shellState.getConnector().tableOperations().getSamplerConfiguration(tableName);
if (samplerConfig == null) {
throw new SampleNotPresentException("Table " + tableName + " does not have sampling configured");
}
Shell.log.debug("Using sampling configuration : " + samplerConfig);
scanner.setSamplerConfiguration(samplerConfig);
}
}
use of org.apache.accumulo.core.client.SampleNotPresentException in project accumulo by apache.
the class ThriftScanner method scan.
public static List<KeyValue> scan(ClientContext context, ScanState scanState, int timeOut) throws ScanTimedOutException, AccumuloException, AccumuloSecurityException, TableNotFoundException {
TabletLocation loc = null;
Instance instance = context.getInstance();
long startTime = System.currentTimeMillis();
String lastError = null;
String error = null;
int tooManyFilesCount = 0;
long sleepMillis = 100;
final long maxSleepTime = context.getConfiguration().getTimeInMillis(Property.GENERAL_MAX_SCANNER_RETRY_PERIOD);
List<KeyValue> results = null;
Span span = Trace.start("scan");
try {
while (results == null && !scanState.finished) {
if (Thread.currentThread().isInterrupted()) {
throw new AccumuloException("Thread interrupted");
}
if ((System.currentTimeMillis() - startTime) / 1000.0 > timeOut)
throw new ScanTimedOutException();
while (loc == null) {
long currentTime = System.currentTimeMillis();
if ((currentTime - startTime) / 1000.0 > timeOut)
throw new ScanTimedOutException();
Span locateSpan = Trace.start("scan:locateTablet");
try {
loc = TabletLocator.getLocator(context, scanState.tableId).locateTablet(context, scanState.startRow, scanState.skipStartRow, false);
if (loc == null) {
if (!Tables.exists(instance, scanState.tableId))
throw new TableDeletedException(scanState.tableId.canonicalID());
else if (Tables.getTableState(instance, scanState.tableId) == TableState.OFFLINE)
throw new TableOfflineException(instance, scanState.tableId.canonicalID());
error = "Failed to locate tablet for table : " + scanState.tableId + " row : " + scanState.startRow;
if (!error.equals(lastError))
log.debug("{}", error);
else if (log.isTraceEnabled())
log.trace("{}", error);
lastError = error;
sleepMillis = pause(sleepMillis, maxSleepTime);
} else {
// when a tablet splits we do want to continue scanning the low child
// of the split if we are already passed it
Range dataRange = loc.tablet_extent.toDataRange();
if (scanState.range.getStartKey() != null && dataRange.afterEndKey(scanState.range.getStartKey())) {
// go to the next tablet
scanState.startRow = loc.tablet_extent.getEndRow();
scanState.skipStartRow = true;
loc = null;
} else if (scanState.range.getEndKey() != null && dataRange.beforeStartKey(scanState.range.getEndKey())) {
// should not happen
throw new RuntimeException("Unexpected tablet, extent : " + loc.tablet_extent + " range : " + scanState.range + " startRow : " + scanState.startRow);
}
}
} catch (AccumuloServerException e) {
log.debug("Scan failed, server side exception : {}", e.getMessage());
throw e;
} catch (AccumuloException e) {
error = "exception from tablet loc " + e.getMessage();
if (!error.equals(lastError))
log.debug("{}", error);
else if (log.isTraceEnabled())
log.trace("{}", error);
lastError = error;
sleepMillis = pause(sleepMillis, maxSleepTime);
} finally {
locateSpan.stop();
}
}
Span scanLocation = Trace.start("scan:location");
scanLocation.data("tserver", loc.tablet_location);
try {
results = scan(loc, scanState, context);
} catch (AccumuloSecurityException e) {
Tables.clearCache(instance);
if (!Tables.exists(instance, scanState.tableId))
throw new TableDeletedException(scanState.tableId.canonicalID());
e.setTableInfo(Tables.getPrintableTableInfoFromId(instance, scanState.tableId));
throw e;
} catch (TApplicationException tae) {
throw new AccumuloServerException(loc.tablet_location, tae);
} catch (TSampleNotPresentException tsnpe) {
String message = "Table " + Tables.getPrintableTableInfoFromId(instance, scanState.tableId) + " does not have sampling configured or built";
throw new SampleNotPresentException(message, tsnpe);
} catch (NotServingTabletException e) {
error = "Scan failed, not serving tablet " + loc;
if (!error.equals(lastError))
log.debug("{}", error);
else if (log.isTraceEnabled())
log.trace("{}", error);
lastError = error;
TabletLocator.getLocator(context, scanState.tableId).invalidateCache(loc.tablet_extent);
loc = null;
// no need to try the current scan id somewhere else
scanState.scanID = null;
if (scanState.isolated)
throw new IsolationException();
sleepMillis = pause(sleepMillis, maxSleepTime);
} catch (NoSuchScanIDException e) {
error = "Scan failed, no such scan id " + scanState.scanID + " " + loc;
if (!error.equals(lastError))
log.debug("{}", error);
else if (log.isTraceEnabled())
log.trace("{}", error);
lastError = error;
if (scanState.isolated)
throw new IsolationException();
scanState.scanID = null;
} catch (TooManyFilesException e) {
error = "Tablet has too many files " + loc + " retrying...";
if (!error.equals(lastError)) {
log.debug("{}", error);
tooManyFilesCount = 0;
} else {
tooManyFilesCount++;
if (tooManyFilesCount == 300)
log.warn("{}", error);
else if (log.isTraceEnabled())
log.trace("{}", error);
}
lastError = error;
// not sure what state the scan session on the server side is
// in after this occurs, so lets be cautious and start a new
// scan session
scanState.scanID = null;
if (scanState.isolated)
throw new IsolationException();
sleepMillis = pause(sleepMillis, maxSleepTime);
} catch (TException e) {
TabletLocator.getLocator(context, scanState.tableId).invalidateCache(context.getInstance(), loc.tablet_location);
error = "Scan failed, thrift error " + e.getClass().getName() + " " + e.getMessage() + " " + loc;
if (!error.equals(lastError))
log.debug("{}", error);
else if (log.isTraceEnabled())
log.trace("{}", error);
lastError = error;
loc = null;
// do not want to continue using the same scan id, if a timeout occurred could cause a batch to be skipped
// because a thread on the server side may still be processing the timed out continue scan
scanState.scanID = null;
if (scanState.isolated)
throw new IsolationException();
sleepMillis = pause(sleepMillis, maxSleepTime);
} finally {
scanLocation.stop();
}
}
if (results != null && results.size() == 0 && scanState.finished) {
results = null;
}
return results;
} catch (InterruptedException ex) {
throw new AccumuloException(ex);
} finally {
span.stop();
}
}
Aggregations