Search in sources :

Example 96 with Row

use of io.cdap.cdap.api.dataset.table.Row in project cdap by caskdata.

the class PartitionedFileSetDataset method setMetadata.

private void setMetadata(PartitionKey key, Map<String, String> metadata, boolean allowUpdates) {
    final byte[] rowKey = generateRowKey(key, partitioning);
    Row row = partitionsTable.get(rowKey);
    if (row.isEmpty()) {
        throw new PartitionNotFoundException(key, getName());
    }
    Put put = new Put(rowKey);
    addMetadataToPut(row, metadata, put, allowUpdates);
    partitionsTable.put(put);
}
Also used : PartitionNotFoundException(io.cdap.cdap.api.dataset.PartitionNotFoundException) Row(io.cdap.cdap.api.dataset.table.Row) Put(io.cdap.cdap.api.dataset.table.Put)

Example 97 with Row

use of io.cdap.cdap.api.dataset.table.Row in project cdap by caskdata.

the class PartitionedFileSetDataset method scannerToPartitions.

/**
 * While applying a partition filter and a limit, parse partitions from the rows of a scanner and add them to a list.
 * Note that multiple partitions can have the same transaction write pointer. For each set of partitions with the same
 * write pointer, we either add the entire set or exclude the entire set. The limit is applied after adding each such
 * set of partitions to the list.
 *
 * @param scanner the scanner on the partitions table from which to read partitions
 * @param partitions list to add the qualifying partitions to
 * @param limit limit, which once reached, partitions committed by other transactions will not be added.
 *              The limit is checked after adding consuming all partitions of a transaction, so
 *              the total number of consumed partitions may be greater than this limit.
 * @param predicate predicate to apply before adding to the partitions list
 * @return Transaction ID of the partition that we reached in the scanner, but did not add to the list. This value
 *         can be useful in future scans.
 */
@Nullable
private Long scannerToPartitions(Scanner scanner, List<PartitionDetail> partitions, int limit, Predicate<PartitionDetail> predicate) {
    Long prevTxId = null;
    Row row;
    while ((row = scanner.next()) != null) {
        PartitionKey key = parseRowKey(row.getRow(), partitioning);
        String relativePath = Bytes.toString(row.get(RELATIVE_PATH));
        Long txId = Bytes.toLong(row.get(WRITE_PTR_COL));
        // by a transaction or none, since we keep our marker based upon transaction id.
        if (prevTxId != null && !prevTxId.equals(txId)) {
            if (partitions.size() >= limit) {
                return txId;
            }
        }
        prevTxId = txId;
        BasicPartitionDetail partitionDetail = new BasicPartitionDetail(PartitionedFileSetDataset.this, relativePath, key, metadataFromRow(row));
        if (!predicate.apply(partitionDetail)) {
            continue;
        }
        partitions.add(partitionDetail);
    }
    return null;
}
Also used : AtomicLong(java.util.concurrent.atomic.AtomicLong) PartitionKey(io.cdap.cdap.api.dataset.lib.PartitionKey) Row(io.cdap.cdap.api.dataset.table.Row) Nullable(javax.annotation.Nullable)

Example 98 with Row

use of io.cdap.cdap.api.dataset.table.Row in project cdap by caskdata.

the class PartitionedFileSetDataset method addPartition.

public void addPartition(PartitionKey key, String path, Map<String, String> metadata, boolean filesCreated, boolean allowAppend) {
    byte[] rowKey = generateRowKey(key, partitioning);
    Row row = partitionsTable.get(rowKey);
    boolean appending = !row.isEmpty();
    if (appending && !allowAppend) {
        throw new PartitionAlreadyExistsException(getName(), key);
    }
    if (appending) {
        // this can happen if user originally created the partition with a custom relative path
        String existingPath = Bytes.toString(row.get(RELATIVE_PATH));
        if (!path.equals(existingPath)) {
            throw new DataSetException(String.format("Attempting to append to Dataset '%s', to partition '%s' with a " + "different path. Original path: '%s'. New path: '%s'", getName(), key.toString(), existingPath, path));
        }
    }
    LOG.debug("{} partition with key {} and path {} to dataset {}", appending ? "Appending to" : "Creating", key, path, getName());
    AddPartitionOperation operation = new AddPartitionOperation(key, path, filesCreated);
    operationsInThisTx.add(operation);
    Put put = new Put(rowKey);
    byte[] nowInMillis = Bytes.toBytes(System.currentTimeMillis());
    if (!appending) {
        put.add(RELATIVE_PATH, Bytes.toBytes(path));
        put.add(CREATION_TIME_COL, nowInMillis);
    }
    put.add(LAST_MODIFICATION_TIME_COL, nowInMillis);
    // we allow updates, because an update will only happen if its an append
    addMetadataToPut(row, metadata, put, true);
    // index each row by its transaction's write pointer
    put.add(WRITE_PTR_COL, tx.getWritePointer());
    partitionsTable.put(put);
    if (!appending) {
        addPartitionToExplore(key, path);
        operation.setExplorePartitionCreated();
    }
}
Also used : DataSetException(io.cdap.cdap.api.dataset.DataSetException) Row(io.cdap.cdap.api.dataset.table.Row) PartitionAlreadyExistsException(io.cdap.cdap.api.dataset.lib.PartitionAlreadyExistsException) Put(io.cdap.cdap.api.dataset.table.Put)

Example 99 with Row

use of io.cdap.cdap.api.dataset.table.Row in project cdap by caskdata.

the class PartitionedFileSetDataset method getPartition.

@ReadOnly
@Override
public PartitionDetail getPartition(PartitionKey key) {
    byte[] rowKey = generateRowKey(key, partitioning);
    Row row = partitionsTable.get(rowKey);
    if (row.isEmpty()) {
        return null;
    }
    byte[] pathBytes = row.get(RELATIVE_PATH);
    if (pathBytes == null) {
        return null;
    }
    return new BasicPartitionDetail(this, Bytes.toString(pathBytes), key, metadataFromRow(row));
}
Also used : Row(io.cdap.cdap.api.dataset.table.Row) ReadOnly(io.cdap.cdap.api.annotation.ReadOnly)

Example 100 with Row

use of io.cdap.cdap.api.dataset.table.Row in project cdap by caskdata.

the class MetadataStoreDataset method getKV.

/**
 * Get all non-null values with the given ids for default COLUMN in a map
 *
 * @param ids set of the mds keys
 * @return a map of the deserialized value of the result
 */
protected Map<MDSKey, byte[]> getKV(Set<MDSKey> ids) {
    Map<MDSKey, byte[]> resultMap = new HashMap<>();
    List<Get> getList = new ArrayList<>();
    for (MDSKey id : ids) {
        getList.add(new Get(id.getKey()));
    }
    List<Row> rowList = table.get(getList);
    for (Row row : rowList) {
        if (row.isEmpty()) {
            continue;
        }
        byte[] value = row.get(COLUMN);
        if (value == null) {
            continue;
        }
        MDSKey key = new MDSKey(row.getRow());
        resultMap.put(key, value);
    }
    return resultMap;
}
Also used : HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Get(io.cdap.cdap.api.dataset.table.Get) ArrayList(java.util.ArrayList) Row(io.cdap.cdap.api.dataset.table.Row)

Aggregations

Row (io.cdap.cdap.api.dataset.table.Row)166 Scanner (io.cdap.cdap.api.dataset.table.Scanner)81 Test (org.junit.Test)50 Table (io.cdap.cdap.api.dataset.table.Table)34 Put (io.cdap.cdap.api.dataset.table.Put)29 ArrayList (java.util.ArrayList)26 TransactionExecutor (org.apache.tephra.TransactionExecutor)26 Get (io.cdap.cdap.api.dataset.table.Get)24 Schema (io.cdap.cdap.api.data.schema.Schema)21 HashMap (java.util.HashMap)19 MDSKey (io.cdap.cdap.data2.dataset2.lib.table.MDSKey)16 Transaction (org.apache.tephra.Transaction)16 TransactionAware (org.apache.tephra.TransactionAware)16 IOException (java.io.IOException)14 Map (java.util.Map)14 StructuredRecord (io.cdap.cdap.api.data.format.StructuredRecord)13 DatasetAdmin (io.cdap.cdap.api.dataset.DatasetAdmin)12 WriteOnly (io.cdap.cdap.api.annotation.WriteOnly)10 DimensionValue (io.cdap.cdap.api.dataset.lib.cube.DimensionValue)10 HBaseTable (io.cdap.cdap.data2.dataset2.lib.table.hbase.HBaseTable)10