use of io.cdap.cdap.api.dataset.DataSetException in project cdap by cdapio.
the class PartitionedFileSetAdmin method truncate.
@Override
public void truncate() throws IOException {
super.truncate();
// should fix this.
if (FileSetProperties.isExploreEnabled(spec.getProperties())) {
ExploreFacade exploreFacade = exploreFacadeProvider.get();
if (exploreFacade != null) {
DatasetId instanceId = new DatasetId(context.getNamespaceId(), spec.getName());
try {
exploreFacade.disableExploreDataset(instanceId, spec);
exploreFacade.enableExploreDataset(instanceId, spec, true);
} catch (Exception e) {
throw new DataSetException(String.format("Unable to reset explore on dataset %s", instanceId), e);
}
}
}
}
use of io.cdap.cdap.api.dataset.DataSetException in project cdap by cdapio.
the class PartitionedFileSetDataset method assertNotExists.
// Throws PartitionAlreadyExistsException if the partition key already exists.
// Otherwise, returns the rowkey corresponding to the PartitionKey.
@ReadOnly
byte[] assertNotExists(PartitionKey key, boolean supportNonTransactional) {
byte[] rowKey = generateRowKey(key, partitioning);
if (tx == null && supportNonTransactional) {
if (LOG.isWarnEnabled()) {
StringBuilder sb = new StringBuilder();
for (StackTraceElement stackTraceElement : Thread.currentThread().getStackTrace()) {
sb.append("\n\tat ").append(stackTraceElement.toString());
}
SAMPLING_LOG.warn("Operation should be performed within a transaction. " + "This operation may require a transaction in the future. {}", sb);
}
// to handle backwards compatibility (user might have called PartitionedFileSet#getPartitionOutput outside
// of a transaction), we can't check partition existence via the partitionsTable. As an fallback approach,
// check the filesystem.
Location partitionLocation = files.getLocation(getOutputPath(key));
if (exists(partitionLocation)) {
throw new DataSetException(String.format("Location %s for partition key %s already exists: ", partitionLocation, key));
}
} else {
Row row = partitionsTable.get(rowKey);
if (!row.isEmpty()) {
throw new PartitionAlreadyExistsException(getName(), key);
}
}
return rowKey;
}
use of io.cdap.cdap.api.dataset.DataSetException in project cdap by cdapio.
the class PartitionedFileSetDataset method getOutputFormatConfiguration.
@Override
public Map<String, String> getOutputFormatConfiguration() {
checkNotExternal();
// copy the output properties of the embedded file set to the output arguments
Map<String, String> outputArgs = new HashMap<>(files.getOutputFormatConfiguration());
// we set the file set's output path in the definition's getDataset(), so there is no need to configure it again.
// here we just want to validate that an output partition key or dynamic partitioner was specified in the arguments.
PartitionKey outputKey = PartitionedFileSetArguments.getOutputPartitionKey(runtimeArguments, getPartitioning());
if (outputKey == null) {
String dynamicPartitionerClassName = PartitionedFileSetArguments.getDynamicPartitioner(runtimeArguments);
if (dynamicPartitionerClassName == null) {
throw new DataSetException("Either a Partition key or a DynamicPartitioner class must be given as a runtime argument.");
}
copyDynamicPartitionerArguments(runtimeArguments, outputArgs);
outputArgs.put(Constants.Dataset.Partitioned.HCONF_ATTR_OUTPUT_FORMAT_CLASS_NAME, files.getOutputFormatClassName());
outputArgs.put(Constants.Dataset.Partitioned.HCONF_ATTR_OUTPUT_DATASET, getName());
} else {
assertNotExists(outputKey, true);
}
return ImmutableMap.copyOf(outputArgs);
}
use of io.cdap.cdap.api.dataset.DataSetException in project cdap by cdapio.
the class PartitionedFileSetDataset method fixPartitions.
/**
* This method can bring a partitioned file set in sync with explore. It scans the partition table and adds
* every partition to explore. It will start multiple transactions, processing a batch of partitions in each
* transaction. Optionally, it can disable and re-enable explore first, that is, drop and recreate the Hive table.
* @param transactional the Transactional for executing transactions
* @param datasetName the name of the dataset to fix
* @param doDisable whether to disable and re-enable explore first
* @param partitionsPerTx how many partitions to process per transaction
* @param verbose whether to log verbosely. If true, this will log a message for every partition; otherwise it
* will only log a report of how many partitions were added / could not be added.
*/
@Beta
@SuppressWarnings("unused")
public static void fixPartitions(Transactional transactional, final String datasetName, boolean doDisable, final int partitionsPerTx, final boolean verbose) {
if (doDisable) {
try {
transactional.execute(new TxRunnable() {
@Override
public void run(io.cdap.cdap.api.data.DatasetContext context) throws Exception {
PartitionedFileSetDataset pfs = context.getDataset(datasetName);
pfs.disableExplore();
// truncating = true, because this is like truncating
pfs.enableExplore(true);
}
});
} catch (TransactionFailureException e) {
throw new DataSetException("Unable to disable and enable Explore", e.getCause());
} catch (RuntimeException e) {
if (e.getCause() instanceof TransactionFailureException) {
throw new DataSetException("Unable to disable and enable Explore", e.getCause().getCause());
}
throw e;
}
}
final AtomicReference<PartitionKey> startKey = new AtomicReference<>();
final AtomicLong errorCount = new AtomicLong(0L);
final AtomicLong successCount = new AtomicLong(0L);
do {
try {
transactional.execute(new TxRunnable() {
@Override
public void run(io.cdap.cdap.api.data.DatasetContext context) throws Exception {
final PartitionedFileSetDataset pfs = context.getDataset(datasetName);
// compute start row for the scan, reset remembered start key to null
byte[] startRow = startKey.get() == null ? null : generateRowKey(startKey.get(), pfs.getPartitioning());
startKey.set(null);
PartitionConsumer consumer = new PartitionConsumer() {
int count = 0;
@Override
public void consume(PartitionKey key, String path, @Nullable PartitionMetadata metadata) {
if (count >= partitionsPerTx) {
// reached the limit: remember this key as the start for the next round
startKey.set(key);
return;
}
try {
pfs.addPartitionToExplore(key, path);
successCount.incrementAndGet();
if (verbose) {
LOG.info("Added partition {} with path {}", key, path);
}
} catch (DataSetException e) {
errorCount.incrementAndGet();
if (verbose) {
LOG.warn(e.getMessage(), e);
}
}
count++;
}
};
pfs.getPartitions(null, consumer, false, startRow, null, partitionsPerTx + 1);
}
});
} catch (TransactionConflictException e) {
throw new DataSetException("Transaction conflict while reading partitions. This should never happen. " + "Make sure that no other programs are using this dataset at the same time.");
} catch (TransactionFailureException e) {
throw new DataSetException("Transaction failure: " + e.getMessage(), e.getCause());
} catch (RuntimeException e) {
// this looks like duplication but is needed in case this is run from a worker: see CDAP-6837
if (e.getCause() instanceof TransactionConflictException) {
throw new DataSetException("Transaction conflict while reading partitions. This should never happen. " + "Make sure that no other programs are using this dataset at the same time.");
} else if (e.getCause() instanceof TransactionFailureException) {
throw new DataSetException("Transaction failure: " + e.getMessage(), e.getCause().getCause());
} else {
throw e;
}
}
} while (// if it is null, then we consumed less than the limit in this round -> done
startKey.get() != null);
LOG.info("Added {} partitions, failed to add {} partitions.", successCount.get(), errorCount.get());
}
use of io.cdap.cdap.api.dataset.DataSetException in project cdap by cdapio.
the class PartitionedFileSetDataset method dropPartition.
@WriteOnly
@Override
public void dropPartition(PartitionKey key) {
byte[] rowKey = generateRowKey(key, partitioning);
PartitionDetail partition = getPartition(key);
if (partition == null) {
// silently ignore non-existing partitions
return;
}
// TODO: make DDL operations transactional [CDAP-1393]
dropPartitionFromExplore(key);
partitionsTable.delete(rowKey);
if (!isExternal) {
Location partitionLocation = partition.getLocation();
try {
if (partitionLocation.exists()) {
Location dstLocation = getQuarantineLocation().append(partition.getRelativePath());
Location dstParent = Locations.getParent(dstLocation);
// shouldn't be null, since dstLocation was created by appending to a location, so it must have a parent
Preconditions.checkNotNull(dstParent);
// before moving into quarantine, we need to ensure that parent location exists
if (!dstParent.exists()) {
if (!dstParent.mkdirs()) {
throw new DataSetException(String.format("Failed to create parent directory %s", dstParent));
}
}
partitionLocation.renameTo(dstLocation);
}
} catch (IOException ioe) {
throw new DataSetException(String.format("Failed to move location %s into quarantine", partitionLocation), ioe);
}
operationsInThisTx.add(new DropPartitionOperation(key, partition.getRelativePath()));
}
}
Aggregations