use of io.cdap.cdap.api.dataset.DataSetException in project cdap by caskdata.
the class PartitionedFileSetDataset method fixPartitions.
/**
* This method can bring a partitioned file set in sync with explore. It scans the partition table and adds
* every partition to explore. It will start multiple transactions, processing a batch of partitions in each
* transaction. Optionally, it can disable and re-enable explore first, that is, drop and recreate the Hive table.
* @param transactional the Transactional for executing transactions
* @param datasetName the name of the dataset to fix
* @param doDisable whether to disable and re-enable explore first
* @param partitionsPerTx how many partitions to process per transaction
* @param verbose whether to log verbosely. If true, this will log a message for every partition; otherwise it
* will only log a report of how many partitions were added / could not be added.
*/
@Beta
@SuppressWarnings("unused")
public static void fixPartitions(Transactional transactional, final String datasetName, boolean doDisable, final int partitionsPerTx, final boolean verbose) {
if (doDisable) {
try {
transactional.execute(new TxRunnable() {
@Override
public void run(io.cdap.cdap.api.data.DatasetContext context) throws Exception {
PartitionedFileSetDataset pfs = context.getDataset(datasetName);
pfs.disableExplore();
// truncating = true, because this is like truncating
pfs.enableExplore(true);
}
});
} catch (TransactionFailureException e) {
throw new DataSetException("Unable to disable and enable Explore", e.getCause());
} catch (RuntimeException e) {
if (e.getCause() instanceof TransactionFailureException) {
throw new DataSetException("Unable to disable and enable Explore", e.getCause().getCause());
}
throw e;
}
}
final AtomicReference<PartitionKey> startKey = new AtomicReference<>();
final AtomicLong errorCount = new AtomicLong(0L);
final AtomicLong successCount = new AtomicLong(0L);
do {
try {
transactional.execute(new TxRunnable() {
@Override
public void run(io.cdap.cdap.api.data.DatasetContext context) throws Exception {
final PartitionedFileSetDataset pfs = context.getDataset(datasetName);
// compute start row for the scan, reset remembered start key to null
byte[] startRow = startKey.get() == null ? null : generateRowKey(startKey.get(), pfs.getPartitioning());
startKey.set(null);
PartitionConsumer consumer = new PartitionConsumer() {
int count = 0;
@Override
public void consume(PartitionKey key, String path, @Nullable PartitionMetadata metadata) {
if (count >= partitionsPerTx) {
// reached the limit: remember this key as the start for the next round
startKey.set(key);
return;
}
try {
pfs.addPartitionToExplore(key, path);
successCount.incrementAndGet();
if (verbose) {
LOG.info("Added partition {} with path {}", key, path);
}
} catch (DataSetException e) {
errorCount.incrementAndGet();
if (verbose) {
LOG.warn(e.getMessage(), e);
}
}
count++;
}
};
pfs.getPartitions(null, consumer, false, startRow, null, partitionsPerTx + 1);
}
});
} catch (TransactionConflictException e) {
throw new DataSetException("Transaction conflict while reading partitions. This should never happen. " + "Make sure that no other programs are using this dataset at the same time.");
} catch (TransactionFailureException e) {
throw new DataSetException("Transaction failure: " + e.getMessage(), e.getCause());
} catch (RuntimeException e) {
// this looks like duplication but is needed in case this is run from a worker: see CDAP-6837
if (e.getCause() instanceof TransactionConflictException) {
throw new DataSetException("Transaction conflict while reading partitions. This should never happen. " + "Make sure that no other programs are using this dataset at the same time.");
} else if (e.getCause() instanceof TransactionFailureException) {
throw new DataSetException("Transaction failure: " + e.getMessage(), e.getCause().getCause());
} else {
throw e;
}
}
} while (// if it is null, then we consumed less than the limit in this round -> done
startKey.get() != null);
LOG.info("Added {} partitions, failed to add {} partitions.", successCount.get(), errorCount.get());
}
use of io.cdap.cdap.api.dataset.DataSetException in project cdap by caskdata.
the class PartitionedFileSetDataset method postTxCommit.
@Override
public void postTxCommit() {
// simply delete the quarantine directory for this transaction
try {
Location quarantine = getQuarantineLocation();
if (quarantine.exists()) {
boolean deleteSuccess = quarantine.delete(true);
if (!deleteSuccess) {
throw new DataSetException(String.format("Error deleting quarantine location %s.", quarantine));
}
}
} catch (IOException e) {
throw new DataSetException(String.format("Error deleting quarantine location for tx %s.", tx.getWritePointer()), e);
}
this.tx = null;
super.postTxCommit();
}
use of io.cdap.cdap.api.dataset.DataSetException in project cdap by caskdata.
the class PartitionedFileSetDataset method addPartition.
public void addPartition(PartitionKey key, String path, Map<String, String> metadata, boolean filesCreated, boolean allowAppend) {
byte[] rowKey = generateRowKey(key, partitioning);
Row row = partitionsTable.get(rowKey);
boolean appending = !row.isEmpty();
if (appending && !allowAppend) {
throw new PartitionAlreadyExistsException(getName(), key);
}
if (appending) {
// this can happen if user originally created the partition with a custom relative path
String existingPath = Bytes.toString(row.get(RELATIVE_PATH));
if (!path.equals(existingPath)) {
throw new DataSetException(String.format("Attempting to append to Dataset '%s', to partition '%s' with a " + "different path. Original path: '%s'. New path: '%s'", getName(), key.toString(), existingPath, path));
}
}
LOG.debug("{} partition with key {} and path {} to dataset {}", appending ? "Appending to" : "Creating", key, path, getName());
AddPartitionOperation operation = new AddPartitionOperation(key, path, filesCreated);
operationsInThisTx.add(operation);
Put put = new Put(rowKey);
byte[] nowInMillis = Bytes.toBytes(System.currentTimeMillis());
if (!appending) {
put.add(RELATIVE_PATH, Bytes.toBytes(path));
put.add(CREATION_TIME_COL, nowInMillis);
}
put.add(LAST_MODIFICATION_TIME_COL, nowInMillis);
// we allow updates, because an update will only happen if its an append
addMetadataToPut(row, metadata, put, true);
// index each row by its transaction's write pointer
put.add(WRITE_PTR_COL, tx.getWritePointer());
partitionsTable.put(put);
if (!appending) {
addPartitionToExplore(key, path);
operation.setExplorePartitionCreated();
}
}
use of io.cdap.cdap.api.dataset.DataSetException in project cdap by caskdata.
the class BufferingTable method internalIncrementAndGet.
@ReadWrite
protected Row internalIncrementAndGet(byte[] row, byte[][] columns, long[] amounts) {
// Logic:
// * fetching current values
// * updating values
// * updating in-memory store
// * returning updated values as result
// NOTE: there is more efficient way to do it, but for now we want more simple implementation, not over-optimizing
Map<byte[], byte[]> rowMap;
try {
rowMap = getRowMap(row, columns);
reportRead(1);
} catch (Exception e) {
LOG.debug("incrementAndGet failed for table: " + getTransactionAwareName() + ", row: " + Bytes.toStringBinary(row), e);
throw new DataSetException("incrementAndGet failed", e);
}
byte[][] updatedValues = new byte[columns.length][];
NavigableMap<byte[], byte[]> result = Maps.newTreeMap(Bytes.BYTES_COMPARATOR);
for (int i = 0; i < columns.length; i++) {
byte[] column = columns[i];
byte[] val = rowMap.get(column);
// converting to long
long longVal;
if (val == null) {
longVal = 0L;
} else {
if (val.length != Bytes.SIZEOF_LONG) {
throw new NumberFormatException("Attempted to increment a value that is not convertible to long," + " row: " + Bytes.toStringBinary(row) + " column: " + Bytes.toStringBinary(column));
}
longVal = Bytes.toLong(val);
}
longVal += amounts[i];
updatedValues[i] = Bytes.toBytes(longVal);
result.put(column, updatedValues[i]);
}
putInternal(row, columns, updatedValues);
reportWrite(1, getSize(row) + getSize(columns) + getSize(amounts));
return new Result(row, result);
}
use of io.cdap.cdap.api.dataset.DataSetException in project cdap by caskdata.
the class BufferingTable method scan.
@ReadOnly
@Override
public Scanner scan(Scan scan) {
ensureTransactionIsStarted();
NavigableMap<byte[], NavigableMap<byte[], Update>> bufferMap = scanBuffer(scan);
try {
return new BufferingScanner(bufferMap, scanPersisted(scan));
} catch (Exception e) {
LOG.debug("scan failed for table: " + getTransactionAwareName() + ", scan: " + scan.toString(), e);
throw new DataSetException("scan failed", e);
}
}
Aggregations