Search in sources :

Example 51 with DataSetException

use of io.cdap.cdap.api.dataset.DataSetException in project cdap by cdapio.

the class PartitionedFileSetAdmin method truncate.

@Override
public void truncate() throws IOException {
    super.truncate();
    // should fix this.
    if (FileSetProperties.isExploreEnabled(spec.getProperties())) {
        ExploreFacade exploreFacade = exploreFacadeProvider.get();
        if (exploreFacade != null) {
            DatasetId instanceId = new DatasetId(context.getNamespaceId(), spec.getName());
            try {
                exploreFacade.disableExploreDataset(instanceId, spec);
                exploreFacade.enableExploreDataset(instanceId, spec, true);
            } catch (Exception e) {
                throw new DataSetException(String.format("Unable to reset explore on dataset %s", instanceId), e);
            }
        }
    }
}
Also used : DataSetException(io.cdap.cdap.api.dataset.DataSetException) ExploreFacade(io.cdap.cdap.explore.client.ExploreFacade) DataSetException(io.cdap.cdap.api.dataset.DataSetException) IOException(java.io.IOException) DatasetId(io.cdap.cdap.proto.id.DatasetId)

Example 52 with DataSetException

use of io.cdap.cdap.api.dataset.DataSetException in project cdap by cdapio.

the class PartitionedFileSetDataset method assertNotExists.

// Throws PartitionAlreadyExistsException if the partition key already exists.
// Otherwise, returns the rowkey corresponding to the PartitionKey.
@ReadOnly
byte[] assertNotExists(PartitionKey key, boolean supportNonTransactional) {
    byte[] rowKey = generateRowKey(key, partitioning);
    if (tx == null && supportNonTransactional) {
        if (LOG.isWarnEnabled()) {
            StringBuilder sb = new StringBuilder();
            for (StackTraceElement stackTraceElement : Thread.currentThread().getStackTrace()) {
                sb.append("\n\tat ").append(stackTraceElement.toString());
            }
            SAMPLING_LOG.warn("Operation should be performed within a transaction. " + "This operation may require a transaction in the future. {}", sb);
        }
        // to handle backwards compatibility (user might have called PartitionedFileSet#getPartitionOutput outside
        // of a transaction), we can't check partition existence via the partitionsTable. As an fallback approach,
        // check the filesystem.
        Location partitionLocation = files.getLocation(getOutputPath(key));
        if (exists(partitionLocation)) {
            throw new DataSetException(String.format("Location %s for partition key %s already exists: ", partitionLocation, key));
        }
    } else {
        Row row = partitionsTable.get(rowKey);
        if (!row.isEmpty()) {
            throw new PartitionAlreadyExistsException(getName(), key);
        }
    }
    return rowKey;
}
Also used : DataSetException(io.cdap.cdap.api.dataset.DataSetException) Row(io.cdap.cdap.api.dataset.table.Row) PartitionAlreadyExistsException(io.cdap.cdap.api.dataset.lib.PartitionAlreadyExistsException) Location(org.apache.twill.filesystem.Location) ReadOnly(io.cdap.cdap.api.annotation.ReadOnly)

Example 53 with DataSetException

use of io.cdap.cdap.api.dataset.DataSetException in project cdap by cdapio.

the class PartitionedFileSetDataset method getOutputFormatConfiguration.

@Override
public Map<String, String> getOutputFormatConfiguration() {
    checkNotExternal();
    // copy the output properties of the embedded file set to the output arguments
    Map<String, String> outputArgs = new HashMap<>(files.getOutputFormatConfiguration());
    // we set the file set's output path in the definition's getDataset(), so there is no need to configure it again.
    // here we just want to validate that an output partition key or dynamic partitioner was specified in the arguments.
    PartitionKey outputKey = PartitionedFileSetArguments.getOutputPartitionKey(runtimeArguments, getPartitioning());
    if (outputKey == null) {
        String dynamicPartitionerClassName = PartitionedFileSetArguments.getDynamicPartitioner(runtimeArguments);
        if (dynamicPartitionerClassName == null) {
            throw new DataSetException("Either a Partition key or a DynamicPartitioner class must be given as a runtime argument.");
        }
        copyDynamicPartitionerArguments(runtimeArguments, outputArgs);
        outputArgs.put(Constants.Dataset.Partitioned.HCONF_ATTR_OUTPUT_FORMAT_CLASS_NAME, files.getOutputFormatClassName());
        outputArgs.put(Constants.Dataset.Partitioned.HCONF_ATTR_OUTPUT_DATASET, getName());
    } else {
        assertNotExists(outputKey, true);
    }
    return ImmutableMap.copyOf(outputArgs);
}
Also used : DataSetException(io.cdap.cdap.api.dataset.DataSetException) HashMap(java.util.HashMap) PartitionKey(io.cdap.cdap.api.dataset.lib.PartitionKey)

Example 54 with DataSetException

use of io.cdap.cdap.api.dataset.DataSetException in project cdap by cdapio.

the class PartitionedFileSetDataset method fixPartitions.

/**
 * This method can bring a partitioned file set in sync with explore. It scans the partition table and adds
 * every partition to explore. It will start multiple transactions, processing a batch of partitions in each
 * transaction. Optionally, it can disable and re-enable explore first, that is, drop and recreate the Hive table.
 * @param transactional the Transactional for executing transactions
 * @param datasetName the name of the dataset to fix
 * @param doDisable whether to disable and re-enable explore first
 * @param partitionsPerTx how many partitions to process per transaction
 * @param verbose whether to log verbosely. If true, this will log a message for every partition; otherwise it
 *                will only log a report of how many partitions were added / could not be added.
 */
@Beta
@SuppressWarnings("unused")
public static void fixPartitions(Transactional transactional, final String datasetName, boolean doDisable, final int partitionsPerTx, final boolean verbose) {
    if (doDisable) {
        try {
            transactional.execute(new TxRunnable() {

                @Override
                public void run(io.cdap.cdap.api.data.DatasetContext context) throws Exception {
                    PartitionedFileSetDataset pfs = context.getDataset(datasetName);
                    pfs.disableExplore();
                    // truncating = true, because this is like truncating
                    pfs.enableExplore(true);
                }
            });
        } catch (TransactionFailureException e) {
            throw new DataSetException("Unable to disable and enable Explore", e.getCause());
        } catch (RuntimeException e) {
            if (e.getCause() instanceof TransactionFailureException) {
                throw new DataSetException("Unable to disable and enable Explore", e.getCause().getCause());
            }
            throw e;
        }
    }
    final AtomicReference<PartitionKey> startKey = new AtomicReference<>();
    final AtomicLong errorCount = new AtomicLong(0L);
    final AtomicLong successCount = new AtomicLong(0L);
    do {
        try {
            transactional.execute(new TxRunnable() {

                @Override
                public void run(io.cdap.cdap.api.data.DatasetContext context) throws Exception {
                    final PartitionedFileSetDataset pfs = context.getDataset(datasetName);
                    // compute start row for the scan, reset remembered start key to null
                    byte[] startRow = startKey.get() == null ? null : generateRowKey(startKey.get(), pfs.getPartitioning());
                    startKey.set(null);
                    PartitionConsumer consumer = new PartitionConsumer() {

                        int count = 0;

                        @Override
                        public void consume(PartitionKey key, String path, @Nullable PartitionMetadata metadata) {
                            if (count >= partitionsPerTx) {
                                // reached the limit: remember this key as the start for the next round
                                startKey.set(key);
                                return;
                            }
                            try {
                                pfs.addPartitionToExplore(key, path);
                                successCount.incrementAndGet();
                                if (verbose) {
                                    LOG.info("Added partition {} with path {}", key, path);
                                }
                            } catch (DataSetException e) {
                                errorCount.incrementAndGet();
                                if (verbose) {
                                    LOG.warn(e.getMessage(), e);
                                }
                            }
                            count++;
                        }
                    };
                    pfs.getPartitions(null, consumer, false, startRow, null, partitionsPerTx + 1);
                }
            });
        } catch (TransactionConflictException e) {
            throw new DataSetException("Transaction conflict while reading partitions. This should never happen. " + "Make sure that no other programs are using this dataset at the same time.");
        } catch (TransactionFailureException e) {
            throw new DataSetException("Transaction failure: " + e.getMessage(), e.getCause());
        } catch (RuntimeException e) {
            // this looks like duplication but is needed in case this is run from a worker: see CDAP-6837
            if (e.getCause() instanceof TransactionConflictException) {
                throw new DataSetException("Transaction conflict while reading partitions. This should never happen. " + "Make sure that no other programs are using this dataset at the same time.");
            } else if (e.getCause() instanceof TransactionFailureException) {
                throw new DataSetException("Transaction failure: " + e.getMessage(), e.getCause().getCause());
            } else {
                throw e;
            }
        }
    } while (// if it is null, then we consumed less than the limit in this round -> done
    startKey.get() != null);
    LOG.info("Added {} partitions, failed to add {} partitions.", successCount.get(), errorCount.get());
}
Also used : PartitionMetadata(io.cdap.cdap.api.dataset.lib.PartitionMetadata) TransactionConflictException(org.apache.tephra.TransactionConflictException) AtomicReference(java.util.concurrent.atomic.AtomicReference) TransactionFailureException(org.apache.tephra.TransactionFailureException) DataSetException(io.cdap.cdap.api.dataset.DataSetException) PartitionNotFoundException(io.cdap.cdap.api.dataset.PartitionNotFoundException) TransactionConflictException(org.apache.tephra.TransactionConflictException) PartitionAlreadyExistsException(io.cdap.cdap.api.dataset.lib.PartitionAlreadyExistsException) IOException(java.io.IOException) TransactionFailureException(org.apache.tephra.TransactionFailureException) AtomicLong(java.util.concurrent.atomic.AtomicLong) DataSetException(io.cdap.cdap.api.dataset.DataSetException) TxRunnable(io.cdap.cdap.api.TxRunnable) PartitionKey(io.cdap.cdap.api.dataset.lib.PartitionKey) PartitionMetadata(io.cdap.cdap.api.dataset.lib.PartitionMetadata) Beta(io.cdap.cdap.api.annotation.Beta)

Example 55 with DataSetException

use of io.cdap.cdap.api.dataset.DataSetException in project cdap by cdapio.

the class PartitionedFileSetDataset method dropPartition.

@WriteOnly
@Override
public void dropPartition(PartitionKey key) {
    byte[] rowKey = generateRowKey(key, partitioning);
    PartitionDetail partition = getPartition(key);
    if (partition == null) {
        // silently ignore non-existing partitions
        return;
    }
    // TODO: make DDL operations transactional [CDAP-1393]
    dropPartitionFromExplore(key);
    partitionsTable.delete(rowKey);
    if (!isExternal) {
        Location partitionLocation = partition.getLocation();
        try {
            if (partitionLocation.exists()) {
                Location dstLocation = getQuarantineLocation().append(partition.getRelativePath());
                Location dstParent = Locations.getParent(dstLocation);
                // shouldn't be null, since dstLocation was created by appending to a location, so it must have a parent
                Preconditions.checkNotNull(dstParent);
                // before moving into quarantine, we need to ensure that parent location exists
                if (!dstParent.exists()) {
                    if (!dstParent.mkdirs()) {
                        throw new DataSetException(String.format("Failed to create parent directory %s", dstParent));
                    }
                }
                partitionLocation.renameTo(dstLocation);
            }
        } catch (IOException ioe) {
            throw new DataSetException(String.format("Failed to move location %s into quarantine", partitionLocation), ioe);
        }
        operationsInThisTx.add(new DropPartitionOperation(key, partition.getRelativePath()));
    }
}
Also used : DataSetException(io.cdap.cdap.api.dataset.DataSetException) IOException(java.io.IOException) PartitionDetail(io.cdap.cdap.api.dataset.lib.PartitionDetail) Location(org.apache.twill.filesystem.Location) WriteOnly(io.cdap.cdap.api.annotation.WriteOnly)

Aggregations

DataSetException (io.cdap.cdap.api.dataset.DataSetException)74 IOException (java.io.IOException)54 ReadOnly (io.cdap.cdap.api.annotation.ReadOnly)14 Map (java.util.Map)12 TransactionFailureException (org.apache.tephra.TransactionFailureException)12 Location (org.apache.twill.filesystem.Location)12 PartitionKey (io.cdap.cdap.api.dataset.lib.PartitionKey)10 Result (io.cdap.cdap.api.dataset.table.Result)10 NavigableMap (java.util.NavigableMap)10 Test (org.junit.Test)10 PartitionAlreadyExistsException (io.cdap.cdap.api.dataset.lib.PartitionAlreadyExistsException)8 TimePartitionedFileSet (io.cdap.cdap.api.dataset.lib.TimePartitionedFileSet)8 Put (org.apache.hadoop.hbase.client.Put)8 ImmutableMap (com.google.common.collect.ImmutableMap)6 WriteOnly (io.cdap.cdap.api.annotation.WriteOnly)6 DatasetManagementException (io.cdap.cdap.api.dataset.DatasetManagementException)6 PartitionedFileSet (io.cdap.cdap.api.dataset.lib.PartitionedFileSet)6 Put (io.cdap.cdap.api.dataset.table.Put)6 Row (io.cdap.cdap.api.dataset.table.Row)6 UnauthorizedException (io.cdap.cdap.security.spi.authorization.UnauthorizedException)6