Search in sources :

Example 41 with DataSetException

use of io.cdap.cdap.api.dataset.DataSetException in project cdap by caskdata.

the class PartitionedFileSetDataset method fixPartitions.

/**
 * This method can bring a partitioned file set in sync with explore. It scans the partition table and adds
 * every partition to explore. It will start multiple transactions, processing a batch of partitions in each
 * transaction. Optionally, it can disable and re-enable explore first, that is, drop and recreate the Hive table.
 * @param transactional the Transactional for executing transactions
 * @param datasetName the name of the dataset to fix
 * @param doDisable whether to disable and re-enable explore first
 * @param partitionsPerTx how many partitions to process per transaction
 * @param verbose whether to log verbosely. If true, this will log a message for every partition; otherwise it
 *                will only log a report of how many partitions were added / could not be added.
 */
@Beta
@SuppressWarnings("unused")
public static void fixPartitions(Transactional transactional, final String datasetName, boolean doDisable, final int partitionsPerTx, final boolean verbose) {
    if (doDisable) {
        try {
            transactional.execute(new TxRunnable() {

                @Override
                public void run(io.cdap.cdap.api.data.DatasetContext context) throws Exception {
                    PartitionedFileSetDataset pfs = context.getDataset(datasetName);
                    pfs.disableExplore();
                    // truncating = true, because this is like truncating
                    pfs.enableExplore(true);
                }
            });
        } catch (TransactionFailureException e) {
            throw new DataSetException("Unable to disable and enable Explore", e.getCause());
        } catch (RuntimeException e) {
            if (e.getCause() instanceof TransactionFailureException) {
                throw new DataSetException("Unable to disable and enable Explore", e.getCause().getCause());
            }
            throw e;
        }
    }
    final AtomicReference<PartitionKey> startKey = new AtomicReference<>();
    final AtomicLong errorCount = new AtomicLong(0L);
    final AtomicLong successCount = new AtomicLong(0L);
    do {
        try {
            transactional.execute(new TxRunnable() {

                @Override
                public void run(io.cdap.cdap.api.data.DatasetContext context) throws Exception {
                    final PartitionedFileSetDataset pfs = context.getDataset(datasetName);
                    // compute start row for the scan, reset remembered start key to null
                    byte[] startRow = startKey.get() == null ? null : generateRowKey(startKey.get(), pfs.getPartitioning());
                    startKey.set(null);
                    PartitionConsumer consumer = new PartitionConsumer() {

                        int count = 0;

                        @Override
                        public void consume(PartitionKey key, String path, @Nullable PartitionMetadata metadata) {
                            if (count >= partitionsPerTx) {
                                // reached the limit: remember this key as the start for the next round
                                startKey.set(key);
                                return;
                            }
                            try {
                                pfs.addPartitionToExplore(key, path);
                                successCount.incrementAndGet();
                                if (verbose) {
                                    LOG.info("Added partition {} with path {}", key, path);
                                }
                            } catch (DataSetException e) {
                                errorCount.incrementAndGet();
                                if (verbose) {
                                    LOG.warn(e.getMessage(), e);
                                }
                            }
                            count++;
                        }
                    };
                    pfs.getPartitions(null, consumer, false, startRow, null, partitionsPerTx + 1);
                }
            });
        } catch (TransactionConflictException e) {
            throw new DataSetException("Transaction conflict while reading partitions. This should never happen. " + "Make sure that no other programs are using this dataset at the same time.");
        } catch (TransactionFailureException e) {
            throw new DataSetException("Transaction failure: " + e.getMessage(), e.getCause());
        } catch (RuntimeException e) {
            // this looks like duplication but is needed in case this is run from a worker: see CDAP-6837
            if (e.getCause() instanceof TransactionConflictException) {
                throw new DataSetException("Transaction conflict while reading partitions. This should never happen. " + "Make sure that no other programs are using this dataset at the same time.");
            } else if (e.getCause() instanceof TransactionFailureException) {
                throw new DataSetException("Transaction failure: " + e.getMessage(), e.getCause().getCause());
            } else {
                throw e;
            }
        }
    } while (// if it is null, then we consumed less than the limit in this round -> done
    startKey.get() != null);
    LOG.info("Added {} partitions, failed to add {} partitions.", successCount.get(), errorCount.get());
}
Also used : PartitionMetadata(io.cdap.cdap.api.dataset.lib.PartitionMetadata) TransactionConflictException(org.apache.tephra.TransactionConflictException) AtomicReference(java.util.concurrent.atomic.AtomicReference) TransactionFailureException(org.apache.tephra.TransactionFailureException) DataSetException(io.cdap.cdap.api.dataset.DataSetException) PartitionNotFoundException(io.cdap.cdap.api.dataset.PartitionNotFoundException) TransactionConflictException(org.apache.tephra.TransactionConflictException) PartitionAlreadyExistsException(io.cdap.cdap.api.dataset.lib.PartitionAlreadyExistsException) IOException(java.io.IOException) TransactionFailureException(org.apache.tephra.TransactionFailureException) AtomicLong(java.util.concurrent.atomic.AtomicLong) DataSetException(io.cdap.cdap.api.dataset.DataSetException) TxRunnable(io.cdap.cdap.api.TxRunnable) PartitionKey(io.cdap.cdap.api.dataset.lib.PartitionKey) PartitionMetadata(io.cdap.cdap.api.dataset.lib.PartitionMetadata) Beta(io.cdap.cdap.api.annotation.Beta)

Example 42 with DataSetException

use of io.cdap.cdap.api.dataset.DataSetException in project cdap by caskdata.

the class PartitionedFileSetDataset method postTxCommit.

@Override
public void postTxCommit() {
    // simply delete the quarantine directory for this transaction
    try {
        Location quarantine = getQuarantineLocation();
        if (quarantine.exists()) {
            boolean deleteSuccess = quarantine.delete(true);
            if (!deleteSuccess) {
                throw new DataSetException(String.format("Error deleting quarantine location %s.", quarantine));
            }
        }
    } catch (IOException e) {
        throw new DataSetException(String.format("Error deleting quarantine location for tx %s.", tx.getWritePointer()), e);
    }
    this.tx = null;
    super.postTxCommit();
}
Also used : DataSetException(io.cdap.cdap.api.dataset.DataSetException) IOException(java.io.IOException) Location(org.apache.twill.filesystem.Location)

Example 43 with DataSetException

use of io.cdap.cdap.api.dataset.DataSetException in project cdap by caskdata.

the class PartitionedFileSetDataset method addPartition.

public void addPartition(PartitionKey key, String path, Map<String, String> metadata, boolean filesCreated, boolean allowAppend) {
    byte[] rowKey = generateRowKey(key, partitioning);
    Row row = partitionsTable.get(rowKey);
    boolean appending = !row.isEmpty();
    if (appending && !allowAppend) {
        throw new PartitionAlreadyExistsException(getName(), key);
    }
    if (appending) {
        // this can happen if user originally created the partition with a custom relative path
        String existingPath = Bytes.toString(row.get(RELATIVE_PATH));
        if (!path.equals(existingPath)) {
            throw new DataSetException(String.format("Attempting to append to Dataset '%s', to partition '%s' with a " + "different path. Original path: '%s'. New path: '%s'", getName(), key.toString(), existingPath, path));
        }
    }
    LOG.debug("{} partition with key {} and path {} to dataset {}", appending ? "Appending to" : "Creating", key, path, getName());
    AddPartitionOperation operation = new AddPartitionOperation(key, path, filesCreated);
    operationsInThisTx.add(operation);
    Put put = new Put(rowKey);
    byte[] nowInMillis = Bytes.toBytes(System.currentTimeMillis());
    if (!appending) {
        put.add(RELATIVE_PATH, Bytes.toBytes(path));
        put.add(CREATION_TIME_COL, nowInMillis);
    }
    put.add(LAST_MODIFICATION_TIME_COL, nowInMillis);
    // we allow updates, because an update will only happen if its an append
    addMetadataToPut(row, metadata, put, true);
    // index each row by its transaction's write pointer
    put.add(WRITE_PTR_COL, tx.getWritePointer());
    partitionsTable.put(put);
    if (!appending) {
        addPartitionToExplore(key, path);
        operation.setExplorePartitionCreated();
    }
}
Also used : DataSetException(io.cdap.cdap.api.dataset.DataSetException) Row(io.cdap.cdap.api.dataset.table.Row) PartitionAlreadyExistsException(io.cdap.cdap.api.dataset.lib.PartitionAlreadyExistsException) Put(io.cdap.cdap.api.dataset.table.Put)

Example 44 with DataSetException

use of io.cdap.cdap.api.dataset.DataSetException in project cdap by caskdata.

the class BufferingTable method internalIncrementAndGet.

@ReadWrite
protected Row internalIncrementAndGet(byte[] row, byte[][] columns, long[] amounts) {
    // Logic:
    // * fetching current values
    // * updating values
    // * updating in-memory store
    // * returning updated values as result
    // NOTE: there is more efficient way to do it, but for now we want more simple implementation, not over-optimizing
    Map<byte[], byte[]> rowMap;
    try {
        rowMap = getRowMap(row, columns);
        reportRead(1);
    } catch (Exception e) {
        LOG.debug("incrementAndGet failed for table: " + getTransactionAwareName() + ", row: " + Bytes.toStringBinary(row), e);
        throw new DataSetException("incrementAndGet failed", e);
    }
    byte[][] updatedValues = new byte[columns.length][];
    NavigableMap<byte[], byte[]> result = Maps.newTreeMap(Bytes.BYTES_COMPARATOR);
    for (int i = 0; i < columns.length; i++) {
        byte[] column = columns[i];
        byte[] val = rowMap.get(column);
        // converting to long
        long longVal;
        if (val == null) {
            longVal = 0L;
        } else {
            if (val.length != Bytes.SIZEOF_LONG) {
                throw new NumberFormatException("Attempted to increment a value that is not convertible to long," + " row: " + Bytes.toStringBinary(row) + " column: " + Bytes.toStringBinary(column));
            }
            longVal = Bytes.toLong(val);
        }
        longVal += amounts[i];
        updatedValues[i] = Bytes.toBytes(longVal);
        result.put(column, updatedValues[i]);
    }
    putInternal(row, columns, updatedValues);
    reportWrite(1, getSize(row) + getSize(columns) + getSize(amounts));
    return new Result(row, result);
}
Also used : DataSetException(io.cdap.cdap.api.dataset.DataSetException) DataSetException(io.cdap.cdap.api.dataset.DataSetException) IOException(java.io.IOException) Result(io.cdap.cdap.api.dataset.table.Result) ReadWrite(io.cdap.cdap.api.annotation.ReadWrite)

Example 45 with DataSetException

use of io.cdap.cdap.api.dataset.DataSetException in project cdap by caskdata.

the class BufferingTable method scan.

@ReadOnly
@Override
public Scanner scan(Scan scan) {
    ensureTransactionIsStarted();
    NavigableMap<byte[], NavigableMap<byte[], Update>> bufferMap = scanBuffer(scan);
    try {
        return new BufferingScanner(bufferMap, scanPersisted(scan));
    } catch (Exception e) {
        LOG.debug("scan failed for table: " + getTransactionAwareName() + ", scan: " + scan.toString(), e);
        throw new DataSetException("scan failed", e);
    }
}
Also used : NavigableMap(java.util.NavigableMap) DataSetException(io.cdap.cdap.api.dataset.DataSetException) DataSetException(io.cdap.cdap.api.dataset.DataSetException) IOException(java.io.IOException) ReadOnly(io.cdap.cdap.api.annotation.ReadOnly)

Aggregations

DataSetException (io.cdap.cdap.api.dataset.DataSetException)74 IOException (java.io.IOException)54 ReadOnly (io.cdap.cdap.api.annotation.ReadOnly)14 Map (java.util.Map)12 TransactionFailureException (org.apache.tephra.TransactionFailureException)12 Location (org.apache.twill.filesystem.Location)12 PartitionKey (io.cdap.cdap.api.dataset.lib.PartitionKey)10 Result (io.cdap.cdap.api.dataset.table.Result)10 NavigableMap (java.util.NavigableMap)10 Test (org.junit.Test)10 PartitionAlreadyExistsException (io.cdap.cdap.api.dataset.lib.PartitionAlreadyExistsException)8 TimePartitionedFileSet (io.cdap.cdap.api.dataset.lib.TimePartitionedFileSet)8 Put (org.apache.hadoop.hbase.client.Put)8 ImmutableMap (com.google.common.collect.ImmutableMap)6 WriteOnly (io.cdap.cdap.api.annotation.WriteOnly)6 DatasetManagementException (io.cdap.cdap.api.dataset.DatasetManagementException)6 PartitionedFileSet (io.cdap.cdap.api.dataset.lib.PartitionedFileSet)6 Put (io.cdap.cdap.api.dataset.table.Put)6 Row (io.cdap.cdap.api.dataset.table.Row)6 UnauthorizedException (io.cdap.cdap.security.spi.authorization.UnauthorizedException)6