use of org.apache.accumulo.core.tabletserver.thrift.NotServingTabletException in project accumulo by apache.
the class TableOperationsImpl method addSplits.
private void addSplits(String tableName, SortedSet<Text> partitionKeys, Table.ID tableId) throws AccumuloException, AccumuloSecurityException, TableNotFoundException, AccumuloServerException {
TabletLocator tabLocator = TabletLocator.getLocator(context, tableId);
for (Text split : partitionKeys) {
boolean successful = false;
int attempt = 0;
long locationFailures = 0;
while (!successful) {
if (attempt > 0)
sleepUninterruptibly(100, TimeUnit.MILLISECONDS);
attempt++;
TabletLocation tl = tabLocator.locateTablet(context, split, false, false);
if (tl == null) {
if (!Tables.exists(context.getInstance(), tableId))
throw new TableNotFoundException(tableId.canonicalID(), tableName, null);
else if (Tables.getTableState(context.getInstance(), tableId) == TableState.OFFLINE)
throw new TableOfflineException(context.getInstance(), tableId.canonicalID());
continue;
}
HostAndPort address = HostAndPort.fromString(tl.tablet_location);
try {
TabletClientService.Client client = ThriftUtil.getTServerClient(address, context);
try {
OpTimer timer = null;
if (log.isTraceEnabled()) {
log.trace("tid={} Splitting tablet {} on {} at {}", Thread.currentThread().getId(), tl.tablet_extent, address, split);
timer = new OpTimer().start();
}
client.splitTablet(Tracer.traceInfo(), context.rpcCreds(), tl.tablet_extent.toThrift(), TextUtil.getByteBuffer(split));
// just split it, might as well invalidate it in the cache
tabLocator.invalidateCache(tl.tablet_extent);
if (timer != null) {
timer.stop();
log.trace("Split tablet in {}", String.format("%.3f secs", timer.scale(TimeUnit.SECONDS)));
}
} finally {
ThriftUtil.returnClient(client);
}
} catch (TApplicationException tae) {
throw new AccumuloServerException(address.toString(), tae);
} catch (TTransportException e) {
tabLocator.invalidateCache(context.getInstance(), tl.tablet_location);
continue;
} catch (ThriftSecurityException e) {
Tables.clearCache(context.getInstance());
if (!Tables.exists(context.getInstance(), tableId))
throw new TableNotFoundException(tableId.canonicalID(), tableName, null);
throw new AccumuloSecurityException(e.user, e.code, e);
} catch (NotServingTabletException e) {
// Do not silently spin when we repeatedly fail to get the location for a tablet
locationFailures++;
if (5 == locationFailures || 0 == locationFailures % 50) {
log.warn("Having difficulty locating hosting tabletserver for split {} on table {}. Seen {} failures.", split, tableName, locationFailures);
}
tabLocator.invalidateCache(tl.tablet_extent);
continue;
} catch (TException e) {
tabLocator.invalidateCache(context.getInstance(), tl.tablet_location);
continue;
}
successful = true;
}
}
}
use of org.apache.accumulo.core.tabletserver.thrift.NotServingTabletException in project accumulo by apache.
the class Writer method update.
public void update(Mutation m) throws AccumuloException, AccumuloSecurityException, ConstraintViolationException, TableNotFoundException {
checkArgument(m != null, "m is null");
if (m.size() == 0)
throw new IllegalArgumentException("Can not add empty mutations");
while (true) {
TabletLocation tabLoc = TabletLocator.getLocator(context, tableId).locateTablet(context, new Text(m.getRow()), false, true);
if (tabLoc == null) {
log.trace("No tablet location found for row {}", new String(m.getRow(), UTF_8));
sleepUninterruptibly(500, TimeUnit.MILLISECONDS);
continue;
}
final HostAndPort parsedLocation = HostAndPort.fromString(tabLoc.tablet_location);
try {
updateServer(context, m, tabLoc.tablet_extent, parsedLocation);
return;
} catch (NotServingTabletException e) {
log.trace("Not serving tablet, server = {}", parsedLocation);
TabletLocator.getLocator(context, tableId).invalidateCache(tabLoc.tablet_extent);
} catch (ConstraintViolationException cve) {
log.error("error sending update to {}", parsedLocation, cve);
// probably do not need to invalidate cache, but it does not hurt
TabletLocator.getLocator(context, tableId).invalidateCache(tabLoc.tablet_extent);
throw cve;
} catch (TException e) {
log.error("error sending update to {}", parsedLocation, e);
TabletLocator.getLocator(context, tableId).invalidateCache(tabLoc.tablet_extent);
}
sleepUninterruptibly(500, TimeUnit.MILLISECONDS);
}
}
use of org.apache.accumulo.core.tabletserver.thrift.NotServingTabletException in project accumulo by apache.
the class TabletGroupWatcher method sendSplitRequest.
private void sendSplitRequest(MergeInfo info, TabletState state, TabletLocationState tls) {
// Already split?
if (!info.getState().equals(MergeState.SPLITTING))
return;
// Merges don't split
if (!info.isDelete())
return;
// Online and ready to split?
if (!state.equals(TabletState.HOSTED))
return;
// Does this extent cover the end points of the delete?
KeyExtent range = info.getExtent();
if (tls.extent.overlaps(range)) {
for (Text splitPoint : new Text[] { range.getPrevEndRow(), range.getEndRow() }) {
if (splitPoint == null)
continue;
if (!tls.extent.contains(splitPoint))
continue;
if (splitPoint.equals(tls.extent.getEndRow()))
continue;
if (splitPoint.equals(tls.extent.getPrevEndRow()))
continue;
try {
TServerConnection conn;
conn = this.master.tserverSet.getConnection(tls.current);
if (conn != null) {
Master.log.info("Asking {} to split {} at {}", tls.current, tls.extent, splitPoint);
conn.splitTablet(this.master.masterLock, tls.extent, splitPoint);
} else {
Master.log.warn("Not connected to server {}", tls.current);
}
} catch (NotServingTabletException e) {
Master.log.debug("Error asking tablet server to split a tablet: ", e);
} catch (Exception e) {
Master.log.warn("Error asking tablet server to split a tablet: ", e);
}
}
}
}
use of org.apache.accumulo.core.tabletserver.thrift.NotServingTabletException in project accumulo by apache.
the class MetadataLocationObtainer method lookupTablet.
@Override
public TabletLocations lookupTablet(ClientContext context, TabletLocation src, Text row, Text stopRow, TabletLocator parent) throws AccumuloSecurityException, AccumuloException {
try {
OpTimer timer = null;
if (log.isTraceEnabled()) {
log.trace("tid={} Looking up in {} row={} extent={} tserver={}", Thread.currentThread().getId(), src.tablet_extent.getTableId(), TextUtil.truncate(row), src.tablet_extent, src.tablet_location);
timer = new OpTimer().start();
}
Range range = new Range(row, true, stopRow, true);
TreeMap<Key, Value> encodedResults = new TreeMap<>();
TreeMap<Key, Value> results = new TreeMap<>();
// Use the whole row iterator so that a partial mutations is not read. The code that extracts locations for tablets does a sanity check to ensure there is
// only one location. Reading a partial mutation could make it appear there are multiple locations when there are not.
List<IterInfo> serverSideIteratorList = new ArrayList<>();
serverSideIteratorList.add(new IterInfo(10000, WholeRowIterator.class.getName(), "WRI"));
Map<String, Map<String, String>> serverSideIteratorOptions = Collections.emptyMap();
boolean more = ThriftScanner.getBatchFromServer(context, range, src.tablet_extent, src.tablet_location, encodedResults, locCols, serverSideIteratorList, serverSideIteratorOptions, Constants.SCAN_BATCH_SIZE, Authorizations.EMPTY, false, 0L, null);
decodeRows(encodedResults, results);
if (more && results.size() == 1) {
range = new Range(results.lastKey().followingKey(PartialKey.ROW_COLFAM_COLQUAL_COLVIS_TIME), true, new Key(stopRow).followingKey(PartialKey.ROW), false);
encodedResults.clear();
more = ThriftScanner.getBatchFromServer(context, range, src.tablet_extent, src.tablet_location, encodedResults, locCols, serverSideIteratorList, serverSideIteratorOptions, Constants.SCAN_BATCH_SIZE, Authorizations.EMPTY, false, 0L, null);
decodeRows(encodedResults, results);
}
if (timer != null) {
timer.stop();
log.trace("tid={} Got {} results from {} in {}", Thread.currentThread().getId(), results.size(), src.tablet_extent, String.format("%.3f secs", timer.scale(TimeUnit.SECONDS)));
}
return MetadataLocationObtainer.getMetadataLocationEntries(results);
} catch (AccumuloServerException ase) {
if (log.isTraceEnabled())
log.trace("{} lookup failed, {} server side exception", src.tablet_extent.getTableId(), src.tablet_location);
throw ase;
} catch (NotServingTabletException e) {
if (log.isTraceEnabled())
log.trace("{} lookup failed, {} not serving {}", src.tablet_extent.getTableId(), src.tablet_location, src.tablet_extent);
parent.invalidateCache(src.tablet_extent);
} catch (AccumuloException e) {
if (log.isTraceEnabled())
log.trace("{} lookup failed", src.tablet_extent.getTableId(), e);
parent.invalidateCache(context.getInstance(), src.tablet_location);
}
return null;
}
use of org.apache.accumulo.core.tabletserver.thrift.NotServingTabletException in project accumulo by apache.
the class ThriftScanner method scan.
public static List<KeyValue> scan(ClientContext context, ScanState scanState, int timeOut) throws ScanTimedOutException, AccumuloException, AccumuloSecurityException, TableNotFoundException {
TabletLocation loc = null;
Instance instance = context.getInstance();
long startTime = System.currentTimeMillis();
String lastError = null;
String error = null;
int tooManyFilesCount = 0;
long sleepMillis = 100;
final long maxSleepTime = context.getConfiguration().getTimeInMillis(Property.GENERAL_MAX_SCANNER_RETRY_PERIOD);
List<KeyValue> results = null;
Span span = Trace.start("scan");
try {
while (results == null && !scanState.finished) {
if (Thread.currentThread().isInterrupted()) {
throw new AccumuloException("Thread interrupted");
}
if ((System.currentTimeMillis() - startTime) / 1000.0 > timeOut)
throw new ScanTimedOutException();
while (loc == null) {
long currentTime = System.currentTimeMillis();
if ((currentTime - startTime) / 1000.0 > timeOut)
throw new ScanTimedOutException();
Span locateSpan = Trace.start("scan:locateTablet");
try {
loc = TabletLocator.getLocator(context, scanState.tableId).locateTablet(context, scanState.startRow, scanState.skipStartRow, false);
if (loc == null) {
if (!Tables.exists(instance, scanState.tableId))
throw new TableDeletedException(scanState.tableId.canonicalID());
else if (Tables.getTableState(instance, scanState.tableId) == TableState.OFFLINE)
throw new TableOfflineException(instance, scanState.tableId.canonicalID());
error = "Failed to locate tablet for table : " + scanState.tableId + " row : " + scanState.startRow;
if (!error.equals(lastError))
log.debug("{}", error);
else if (log.isTraceEnabled())
log.trace("{}", error);
lastError = error;
sleepMillis = pause(sleepMillis, maxSleepTime);
} else {
// when a tablet splits we do want to continue scanning the low child
// of the split if we are already passed it
Range dataRange = loc.tablet_extent.toDataRange();
if (scanState.range.getStartKey() != null && dataRange.afterEndKey(scanState.range.getStartKey())) {
// go to the next tablet
scanState.startRow = loc.tablet_extent.getEndRow();
scanState.skipStartRow = true;
loc = null;
} else if (scanState.range.getEndKey() != null && dataRange.beforeStartKey(scanState.range.getEndKey())) {
// should not happen
throw new RuntimeException("Unexpected tablet, extent : " + loc.tablet_extent + " range : " + scanState.range + " startRow : " + scanState.startRow);
}
}
} catch (AccumuloServerException e) {
log.debug("Scan failed, server side exception : {}", e.getMessage());
throw e;
} catch (AccumuloException e) {
error = "exception from tablet loc " + e.getMessage();
if (!error.equals(lastError))
log.debug("{}", error);
else if (log.isTraceEnabled())
log.trace("{}", error);
lastError = error;
sleepMillis = pause(sleepMillis, maxSleepTime);
} finally {
locateSpan.stop();
}
}
Span scanLocation = Trace.start("scan:location");
scanLocation.data("tserver", loc.tablet_location);
try {
results = scan(loc, scanState, context);
} catch (AccumuloSecurityException e) {
Tables.clearCache(instance);
if (!Tables.exists(instance, scanState.tableId))
throw new TableDeletedException(scanState.tableId.canonicalID());
e.setTableInfo(Tables.getPrintableTableInfoFromId(instance, scanState.tableId));
throw e;
} catch (TApplicationException tae) {
throw new AccumuloServerException(loc.tablet_location, tae);
} catch (TSampleNotPresentException tsnpe) {
String message = "Table " + Tables.getPrintableTableInfoFromId(instance, scanState.tableId) + " does not have sampling configured or built";
throw new SampleNotPresentException(message, tsnpe);
} catch (NotServingTabletException e) {
error = "Scan failed, not serving tablet " + loc;
if (!error.equals(lastError))
log.debug("{}", error);
else if (log.isTraceEnabled())
log.trace("{}", error);
lastError = error;
TabletLocator.getLocator(context, scanState.tableId).invalidateCache(loc.tablet_extent);
loc = null;
// no need to try the current scan id somewhere else
scanState.scanID = null;
if (scanState.isolated)
throw new IsolationException();
sleepMillis = pause(sleepMillis, maxSleepTime);
} catch (NoSuchScanIDException e) {
error = "Scan failed, no such scan id " + scanState.scanID + " " + loc;
if (!error.equals(lastError))
log.debug("{}", error);
else if (log.isTraceEnabled())
log.trace("{}", error);
lastError = error;
if (scanState.isolated)
throw new IsolationException();
scanState.scanID = null;
} catch (TooManyFilesException e) {
error = "Tablet has too many files " + loc + " retrying...";
if (!error.equals(lastError)) {
log.debug("{}", error);
tooManyFilesCount = 0;
} else {
tooManyFilesCount++;
if (tooManyFilesCount == 300)
log.warn("{}", error);
else if (log.isTraceEnabled())
log.trace("{}", error);
}
lastError = error;
// not sure what state the scan session on the server side is
// in after this occurs, so lets be cautious and start a new
// scan session
scanState.scanID = null;
if (scanState.isolated)
throw new IsolationException();
sleepMillis = pause(sleepMillis, maxSleepTime);
} catch (TException e) {
TabletLocator.getLocator(context, scanState.tableId).invalidateCache(context.getInstance(), loc.tablet_location);
error = "Scan failed, thrift error " + e.getClass().getName() + " " + e.getMessage() + " " + loc;
if (!error.equals(lastError))
log.debug("{}", error);
else if (log.isTraceEnabled())
log.trace("{}", error);
lastError = error;
loc = null;
// do not want to continue using the same scan id, if a timeout occurred could cause a batch to be skipped
// because a thread on the server side may still be processing the timed out continue scan
scanState.scanID = null;
if (scanState.isolated)
throw new IsolationException();
sleepMillis = pause(sleepMillis, maxSleepTime);
} finally {
scanLocation.stop();
}
}
if (results != null && results.size() == 0 && scanState.finished) {
results = null;
}
return results;
} catch (InterruptedException ex) {
throw new AccumuloException(ex);
} finally {
span.stop();
}
}
Aggregations