use of org.apache.hudi.common.util.RateLimiter in project hudi by apache.
the class SparkHoodieHBaseIndex method updateLocationFunction.
private Function2<Integer, Iterator<WriteStatus>, Iterator<WriteStatus>> updateLocationFunction() {
return (partition, statusIterator) -> {
List<WriteStatus> writeStatusList = new ArrayList<>();
// Grab the global HBase connection
synchronized (SparkHoodieHBaseIndex.class) {
if (hbaseConnection == null || hbaseConnection.isClosed()) {
hbaseConnection = getHBaseConnection();
}
}
final long startTimeForPutsTask = DateTime.now().getMillis();
LOG.info("startTimeForPutsTask for this task: " + startTimeForPutsTask);
try (BufferedMutator mutator = hbaseConnection.getBufferedMutator(TableName.valueOf(tableName))) {
final RateLimiter limiter = RateLimiter.create(multiPutBatchSize, TimeUnit.SECONDS);
while (statusIterator.hasNext()) {
WriteStatus writeStatus = statusIterator.next();
List<Mutation> mutations = new ArrayList<>();
try {
long numOfInserts = writeStatus.getStat().getNumInserts();
LOG.info("Num of inserts in this WriteStatus: " + numOfInserts);
LOG.info("Total inserts in this job: " + this.totalNumInserts);
LOG.info("multiPutBatchSize for this job: " + this.multiPutBatchSize);
// Any calls beyond `multiPutBatchSize` within a second will be rate limited
for (HoodieRecord rec : writeStatus.getWrittenRecords()) {
if (!writeStatus.isErrored(rec.getKey())) {
Option<HoodieRecordLocation> loc = rec.getNewLocation();
if (loc.isPresent()) {
if (rec.getCurrentLocation() != null) {
// This is an update, no need to update index
continue;
}
Put put = new Put(Bytes.toBytes(rec.getRecordKey()));
put.addColumn(SYSTEM_COLUMN_FAMILY, COMMIT_TS_COLUMN, Bytes.toBytes(loc.get().getInstantTime()));
put.addColumn(SYSTEM_COLUMN_FAMILY, FILE_NAME_COLUMN, Bytes.toBytes(loc.get().getFileId()));
put.addColumn(SYSTEM_COLUMN_FAMILY, PARTITION_PATH_COLUMN, Bytes.toBytes(rec.getPartitionPath()));
mutations.add(put);
} else {
// Delete existing index for a deleted record
Delete delete = new Delete(Bytes.toBytes(rec.getRecordKey()));
mutations.add(delete);
}
}
if (mutations.size() < multiPutBatchSize) {
continue;
}
doMutations(mutator, mutations, limiter);
}
// process remaining puts and deletes, if any
doMutations(mutator, mutations, limiter);
} catch (Exception e) {
Exception we = new Exception("Error updating index for " + writeStatus, e);
LOG.error(we);
writeStatus.setGlobalError(we);
}
writeStatusList.add(writeStatus);
}
final long endPutsTime = DateTime.now().getMillis();
LOG.info("hbase puts task time for this task: " + (endPutsTime - startTimeForPutsTask));
} catch (IOException e) {
throw new HoodieIndexException("Failed to Update Index locations because of exception with HBase Client", e);
}
return writeStatusList.iterator();
};
}
use of org.apache.hudi.common.util.RateLimiter in project hudi by apache.
the class SparkHoodieHBaseIndex method rollbackCommit.
@Override
public boolean rollbackCommit(String instantTime) {
int multiGetBatchSize = config.getHbaseIndexGetBatchSize();
boolean rollbackSync = config.getHBaseIndexRollbackSync();
if (!config.getHBaseIndexRollbackSync()) {
// Default Rollback in HbaseIndex is managed via method {@link #checkIfValidCommit()}
return true;
}
synchronized (SparkHoodieHBaseIndex.class) {
if (hbaseConnection == null || hbaseConnection.isClosed()) {
hbaseConnection = getHBaseConnection();
}
}
try (HTable hTable = (HTable) hbaseConnection.getTable(TableName.valueOf(tableName));
BufferedMutator mutator = hbaseConnection.getBufferedMutator(TableName.valueOf(tableName))) {
final RateLimiter limiter = RateLimiter.create(multiPutBatchSize, TimeUnit.SECONDS);
Long rollbackTime = HoodieActiveTimeline.parseDateFromInstantTime(instantTime).getTime();
Long currentTime = new Date().getTime();
Scan scan = new Scan();
scan.addFamily(SYSTEM_COLUMN_FAMILY);
scan.setTimeRange(rollbackTime, currentTime);
ResultScanner scanner = hTable.getScanner(scan);
Iterator<Result> scannerIterator = scanner.iterator();
List<Get> statements = new ArrayList<>();
List<Result> currentVersionResults = new ArrayList<Result>();
List<Mutation> mutations = new ArrayList<>();
while (scannerIterator.hasNext()) {
Result result = scannerIterator.next();
currentVersionResults.add(result);
statements.add(generateStatement(Bytes.toString(result.getRow()), 0L, rollbackTime - 1));
if (scannerIterator.hasNext() && statements.size() < multiGetBatchSize) {
continue;
}
Result[] lastVersionResults = hTable.get(statements);
for (int i = 0; i < lastVersionResults.length; i++) {
Result lastVersionResult = lastVersionResults[i];
if (null == lastVersionResult.getRow() && rollbackSync) {
Result currentVersionResult = currentVersionResults.get(i);
Delete delete = new Delete(currentVersionResult.getRow());
mutations.add(delete);
}
if (null != lastVersionResult.getRow()) {
String oldPath = new String(lastVersionResult.getValue(SYSTEM_COLUMN_FAMILY, PARTITION_PATH_COLUMN));
String nowPath = new String(currentVersionResults.get(i).getValue(SYSTEM_COLUMN_FAMILY, PARTITION_PATH_COLUMN));
if (!oldPath.equals(nowPath) || rollbackSync) {
Put put = new Put(lastVersionResult.getRow());
put.addColumn(SYSTEM_COLUMN_FAMILY, COMMIT_TS_COLUMN, lastVersionResult.getValue(SYSTEM_COLUMN_FAMILY, COMMIT_TS_COLUMN));
put.addColumn(SYSTEM_COLUMN_FAMILY, FILE_NAME_COLUMN, lastVersionResult.getValue(SYSTEM_COLUMN_FAMILY, FILE_NAME_COLUMN));
put.addColumn(SYSTEM_COLUMN_FAMILY, PARTITION_PATH_COLUMN, lastVersionResult.getValue(SYSTEM_COLUMN_FAMILY, PARTITION_PATH_COLUMN));
mutations.add(put);
}
}
}
doMutations(mutator, mutations, limiter);
currentVersionResults.clear();
statements.clear();
mutations.clear();
}
} catch (Exception e) {
LOG.error("hbase index roll back failed", e);
return false;
}
return true;
}
Aggregations