use of org.apache.accumulo.core.client.mock.MockConnector in project Gaffer by gchq.
the class IngestUtils method createSplitsFile.
/**
* Get the existing splits from a table in Accumulo and write a splits file.
* The number of splits is returned.
*
* @param conn - An existing connection to an Accumulo instance
* @param table - The table name
* @param fs - The FileSystem in which to create the splits file
* @param splitsFile - A Path for the output splits file
* @param maxSplits - The maximum number of splits
* @return The number of splits in the table
* @throws IOException for any IO issues reading from the file system. Other accumulo exceptions are caught and wrapped in an IOException.
*/
public static int createSplitsFile(final Connector conn, final String table, final FileSystem fs, final Path splitsFile, final int maxSplits) throws IOException {
LOGGER.info("Creating splits file in location {} from table {} with maximum splits {}", splitsFile, table, maxSplits);
// Get the splits from the table
Collection<Text> splits;
try {
splits = conn.tableOperations().listSplits(table, maxSplits);
} catch (TableNotFoundException | AccumuloSecurityException | AccumuloException e) {
throw new IOException(e.getMessage(), e);
}
// This should have returned at most maxSplits splits, but this is not implemented properly in MockInstance.
if (splits.size() > maxSplits) {
if (conn instanceof MockConnector) {
LOGGER.info("Manually reducing the number of splits to {} due to MockInstance not implementing" + " listSplits(table, maxSplits) properly", maxSplits);
} else {
LOGGER.info("Manually reducing the number of splits to {} (number of splits was {})", maxSplits, splits.size());
}
final Collection<Text> filteredSplits = new TreeSet<>();
final int outputEveryNth = splits.size() / maxSplits;
LOGGER.info("Outputting every {}-th split from {} total", outputEveryNth, splits.size());
int i = 0;
for (final Text text : splits) {
if (i % outputEveryNth == 0) {
filteredSplits.add(text);
}
i++;
if (filteredSplits.size() >= maxSplits) {
break;
}
}
splits = filteredSplits;
}
LOGGER.info("Found {} splits from table {}", splits.size(), table);
try (final PrintStream out = new PrintStream(new BufferedOutputStream(fs.create(splitsFile, true)), false, CommonConstants.UTF_8)) {
// Write the splits to file
if (splits.isEmpty()) {
out.close();
return 0;
}
for (final Text split : splits) {
out.println(new String(Base64.encodeBase64(split.getBytes()), CommonConstants.UTF_8));
}
}
return splits.size();
}
use of org.apache.accumulo.core.client.mock.MockConnector in project gora by apache.
the class AccumuloStore method getPartitions.
@Override
public List<PartitionQuery<K, T>> getPartitions(Query<K, T> query) throws IOException {
try {
TabletLocator tl;
if (conn instanceof MockConnector)
tl = new MockTabletLocator();
else
tl = TabletLocator.getLocator(new ClientContext(conn.getInstance(), credentials, AccumuloConfiguration.getTableConfiguration(conn, Tables.getTableId(conn.getInstance(), mapping.tableName))), new Text(Tables.getTableId(conn.getInstance(), mapping.tableName)));
Map<String, Map<KeyExtent, List<Range>>> binnedRanges = new HashMap<>();
tl.invalidateCache();
while (tl.binRanges(new ClientContext(conn.getInstance(), credentials, AccumuloConfiguration.getTableConfiguration(conn, Tables.getTableId(conn.getInstance(), mapping.tableName))), Collections.singletonList(createRange(query)), binnedRanges).size() > 0) {
// TODO log?
if (!Tables.exists(conn.getInstance(), Tables.getTableId(conn.getInstance(), mapping.tableName)))
throw new TableDeletedException(Tables.getTableId(conn.getInstance(), mapping.tableName));
else if (Tables.getTableState(conn.getInstance(), Tables.getTableId(conn.getInstance(), mapping.tableName)) == TableState.OFFLINE)
throw new TableOfflineException(conn.getInstance(), Tables.getTableId(conn.getInstance(), mapping.tableName));
UtilWaitThread.sleep(100);
tl.invalidateCache();
}
List<PartitionQuery<K, T>> ret = new ArrayList<>();
Text startRow = null;
Text endRow = null;
if (query.getStartKey() != null)
startRow = new Text(toBytes(query.getStartKey()));
if (query.getEndKey() != null)
endRow = new Text(toBytes(query.getEndKey()));
//hadoop expects hostnames, accumulo keeps track of IPs... so need to convert
HashMap<String, String> hostNameCache = new HashMap<>();
for (Entry<String, Map<KeyExtent, List<Range>>> entry : binnedRanges.entrySet()) {
String ip = entry.getKey().split(":", 2)[0];
String location = hostNameCache.get(ip);
if (location == null) {
InetAddress inetAddress = InetAddress.getByName(ip);
location = inetAddress.getHostName();
hostNameCache.put(ip, location);
}
Map<KeyExtent, List<Range>> tablets = entry.getValue();
for (KeyExtent ke : tablets.keySet()) {
K startKey = null;
if (startRow == null || !ke.contains(startRow)) {
if (ke.getPrevEndRow() != null) {
startKey = followingKey(encoder, getKeyClass(), getBytes(ke.getPrevEndRow()));
}
} else {
startKey = fromBytes(getKeyClass(), getBytes(startRow));
}
K endKey = null;
if (endRow == null || !ke.contains(endRow)) {
if (ke.getEndRow() != null)
endKey = lastPossibleKey(encoder, getKeyClass(), getBytes(ke.getEndRow()));
} else {
endKey = fromBytes(getKeyClass(), getBytes(endRow));
}
PartitionQueryImpl<K, T> pqi = new PartitionQueryImpl<>(query, startKey, endKey, location);
pqi.setConf(getConf());
ret.add(pqi);
}
}
return ret;
} catch (TableNotFoundException | AccumuloException | AccumuloSecurityException e) {
throw new IOException(e);
}
}
Aggregations