Search in sources :

Example 41 with Row

use of io.cdap.cdap.api.dataset.table.Row in project cdap by cdapio.

the class PartitionedFileSetDataset method scannerToPartitions.

/**
 * While applying a partition filter and a limit, parse partitions from the rows of a scanner and add them to a list.
 * Note that multiple partitions can have the same transaction write pointer. For each set of partitions with the same
 * write pointer, we either add the entire set or exclude the entire set. The limit is applied after adding each such
 * set of partitions to the list.
 *
 * @param scanner the scanner on the partitions table from which to read partitions
 * @param partitions list to add the qualifying partitions to
 * @param limit limit, which once reached, partitions committed by other transactions will not be added.
 *              The limit is checked after adding consuming all partitions of a transaction, so
 *              the total number of consumed partitions may be greater than this limit.
 * @param predicate predicate to apply before adding to the partitions list
 * @return Transaction ID of the partition that we reached in the scanner, but did not add to the list. This value
 *         can be useful in future scans.
 */
@Nullable
private Long scannerToPartitions(Scanner scanner, List<PartitionDetail> partitions, int limit, Predicate<PartitionDetail> predicate) {
    Long prevTxId = null;
    Row row;
    while ((row = scanner.next()) != null) {
        PartitionKey key = parseRowKey(row.getRow(), partitioning);
        String relativePath = Bytes.toString(row.get(RELATIVE_PATH));
        Long txId = Bytes.toLong(row.get(WRITE_PTR_COL));
        // by a transaction or none, since we keep our marker based upon transaction id.
        if (prevTxId != null && !prevTxId.equals(txId)) {
            if (partitions.size() >= limit) {
                return txId;
            }
        }
        prevTxId = txId;
        BasicPartitionDetail partitionDetail = new BasicPartitionDetail(PartitionedFileSetDataset.this, relativePath, key, metadataFromRow(row));
        if (!predicate.apply(partitionDetail)) {
            continue;
        }
        partitions.add(partitionDetail);
    }
    return null;
}
Also used : AtomicLong(java.util.concurrent.atomic.AtomicLong) PartitionKey(io.cdap.cdap.api.dataset.lib.PartitionKey) Row(io.cdap.cdap.api.dataset.table.Row) Nullable(javax.annotation.Nullable)

Example 42 with Row

use of io.cdap.cdap.api.dataset.table.Row in project cdap by cdapio.

the class PartitionedFileSetDataset method removeMetadata.

@Override
public void removeMetadata(PartitionKey key, Set<String> metadataKeys) {
    final byte[] rowKey = generateRowKey(key, partitioning);
    Row row = partitionsTable.get(rowKey);
    if (row.isEmpty()) {
        throw new PartitionNotFoundException(key, getName());
    }
    int i = 0;
    byte[][] deleteColumns = new byte[metadataKeys.size()][];
    for (String metadataKey : metadataKeys) {
        deleteColumns[i++] = columnKeyFromMetadataKey(metadataKey);
    }
    partitionsTable.delete(rowKey, deleteColumns);
}
Also used : PartitionNotFoundException(io.cdap.cdap.api.dataset.PartitionNotFoundException) Row(io.cdap.cdap.api.dataset.table.Row)

Example 43 with Row

use of io.cdap.cdap.api.dataset.table.Row in project cdap by cdapio.

the class PartitionedFileSetDataset method getPartition.

@ReadOnly
@Override
public PartitionDetail getPartition(PartitionKey key) {
    byte[] rowKey = generateRowKey(key, partitioning);
    Row row = partitionsTable.get(rowKey);
    if (row.isEmpty()) {
        return null;
    }
    byte[] pathBytes = row.get(RELATIVE_PATH);
    if (pathBytes == null) {
        return null;
    }
    return new BasicPartitionDetail(this, Bytes.toString(pathBytes), key, metadataFromRow(row));
}
Also used : Row(io.cdap.cdap.api.dataset.table.Row) ReadOnly(io.cdap.cdap.api.annotation.ReadOnly)

Example 44 with Row

use of io.cdap.cdap.api.dataset.table.Row in project cdap by cdapio.

the class PartitionedFileSetDataset method getPartitions.

private void getPartitions(@Nullable PartitionFilter filter, PartitionConsumer consumer, boolean decodeMetadata, @Nullable byte[] startKey, @Nullable byte[] endKey, long limit) {
    long count = 0L;
    try (Scanner scanner = partitionsTable.scan(startKey, endKey)) {
        while (count < limit) {
            Row row = scanner.next();
            if (row == null) {
                break;
            }
            PartitionKey key;
            try {
                key = parseRowKey(row.getRow(), partitioning);
            } catch (IllegalArgumentException e) {
                LOG.debug(String.format("Failed to parse row key for partitioned file set '%s': %s", getName(), Bytes.toStringBinary(row.getRow())));
                continue;
            }
            if (filter != null && !filter.match(key)) {
                continue;
            }
            byte[] pathBytes = row.get(RELATIVE_PATH);
            if (pathBytes != null) {
                consumer.consume(key, Bytes.toString(pathBytes), decodeMetadata ? metadataFromRow(row) : null);
            }
            count++;
        }
        if (count == 0) {
            warnIfInvalidPartitionFilter(filter, partitioning);
        }
    }
}
Also used : Scanner(io.cdap.cdap.api.dataset.table.Scanner) PartitionKey(io.cdap.cdap.api.dataset.lib.PartitionKey) Row(io.cdap.cdap.api.dataset.table.Row)

Example 45 with Row

use of io.cdap.cdap.api.dataset.table.Row in project cdap by cdapio.

the class PartitionedFileSetDataset method addPartition.

public void addPartition(PartitionKey key, String path, Map<String, String> metadata, boolean filesCreated, boolean allowAppend) {
    byte[] rowKey = generateRowKey(key, partitioning);
    Row row = partitionsTable.get(rowKey);
    boolean appending = !row.isEmpty();
    if (appending && !allowAppend) {
        throw new PartitionAlreadyExistsException(getName(), key);
    }
    if (appending) {
        // this can happen if user originally created the partition with a custom relative path
        String existingPath = Bytes.toString(row.get(RELATIVE_PATH));
        if (!path.equals(existingPath)) {
            throw new DataSetException(String.format("Attempting to append to Dataset '%s', to partition '%s' with a " + "different path. Original path: '%s'. New path: '%s'", getName(), key.toString(), existingPath, path));
        }
    }
    LOG.debug("{} partition with key {} and path {} to dataset {}", appending ? "Appending to" : "Creating", key, path, getName());
    AddPartitionOperation operation = new AddPartitionOperation(key, path, filesCreated);
    operationsInThisTx.add(operation);
    Put put = new Put(rowKey);
    byte[] nowInMillis = Bytes.toBytes(System.currentTimeMillis());
    if (!appending) {
        put.add(RELATIVE_PATH, Bytes.toBytes(path));
        put.add(CREATION_TIME_COL, nowInMillis);
    }
    put.add(LAST_MODIFICATION_TIME_COL, nowInMillis);
    // we allow updates, because an update will only happen if its an append
    addMetadataToPut(row, metadata, put, true);
    // index each row by its transaction's write pointer
    put.add(WRITE_PTR_COL, tx.getWritePointer());
    partitionsTable.put(put);
    if (!appending) {
        addPartitionToExplore(key, path);
        operation.setExplorePartitionCreated();
    }
}
Also used : DataSetException(io.cdap.cdap.api.dataset.DataSetException) Row(io.cdap.cdap.api.dataset.table.Row) PartitionAlreadyExistsException(io.cdap.cdap.api.dataset.lib.PartitionAlreadyExistsException) Put(io.cdap.cdap.api.dataset.table.Put)

Aggregations

Row (io.cdap.cdap.api.dataset.table.Row)166 Scanner (io.cdap.cdap.api.dataset.table.Scanner)81 Test (org.junit.Test)50 Table (io.cdap.cdap.api.dataset.table.Table)34 Put (io.cdap.cdap.api.dataset.table.Put)29 ArrayList (java.util.ArrayList)26 TransactionExecutor (org.apache.tephra.TransactionExecutor)26 Get (io.cdap.cdap.api.dataset.table.Get)24 Schema (io.cdap.cdap.api.data.schema.Schema)21 HashMap (java.util.HashMap)19 MDSKey (io.cdap.cdap.data2.dataset2.lib.table.MDSKey)16 Transaction (org.apache.tephra.Transaction)16 TransactionAware (org.apache.tephra.TransactionAware)16 IOException (java.io.IOException)14 Map (java.util.Map)14 StructuredRecord (io.cdap.cdap.api.data.format.StructuredRecord)13 DatasetAdmin (io.cdap.cdap.api.dataset.DatasetAdmin)12 WriteOnly (io.cdap.cdap.api.annotation.WriteOnly)10 DimensionValue (io.cdap.cdap.api.dataset.lib.cube.DimensionValue)10 HBaseTable (io.cdap.cdap.data2.dataset2.lib.table.hbase.HBaseTable)10