use of io.cdap.cdap.api.dataset.table.Row in project cdap by caskdata.
the class PartitionedFileSetDataset method setMetadata.
private void setMetadata(PartitionKey key, Map<String, String> metadata, boolean allowUpdates) {
final byte[] rowKey = generateRowKey(key, partitioning);
Row row = partitionsTable.get(rowKey);
if (row.isEmpty()) {
throw new PartitionNotFoundException(key, getName());
}
Put put = new Put(rowKey);
addMetadataToPut(row, metadata, put, allowUpdates);
partitionsTable.put(put);
}
use of io.cdap.cdap.api.dataset.table.Row in project cdap by caskdata.
the class PartitionedFileSetDataset method scannerToPartitions.
/**
* While applying a partition filter and a limit, parse partitions from the rows of a scanner and add them to a list.
* Note that multiple partitions can have the same transaction write pointer. For each set of partitions with the same
* write pointer, we either add the entire set or exclude the entire set. The limit is applied after adding each such
* set of partitions to the list.
*
* @param scanner the scanner on the partitions table from which to read partitions
* @param partitions list to add the qualifying partitions to
* @param limit limit, which once reached, partitions committed by other transactions will not be added.
* The limit is checked after adding consuming all partitions of a transaction, so
* the total number of consumed partitions may be greater than this limit.
* @param predicate predicate to apply before adding to the partitions list
* @return Transaction ID of the partition that we reached in the scanner, but did not add to the list. This value
* can be useful in future scans.
*/
@Nullable
private Long scannerToPartitions(Scanner scanner, List<PartitionDetail> partitions, int limit, Predicate<PartitionDetail> predicate) {
Long prevTxId = null;
Row row;
while ((row = scanner.next()) != null) {
PartitionKey key = parseRowKey(row.getRow(), partitioning);
String relativePath = Bytes.toString(row.get(RELATIVE_PATH));
Long txId = Bytes.toLong(row.get(WRITE_PTR_COL));
// by a transaction or none, since we keep our marker based upon transaction id.
if (prevTxId != null && !prevTxId.equals(txId)) {
if (partitions.size() >= limit) {
return txId;
}
}
prevTxId = txId;
BasicPartitionDetail partitionDetail = new BasicPartitionDetail(PartitionedFileSetDataset.this, relativePath, key, metadataFromRow(row));
if (!predicate.apply(partitionDetail)) {
continue;
}
partitions.add(partitionDetail);
}
return null;
}
use of io.cdap.cdap.api.dataset.table.Row in project cdap by caskdata.
the class PartitionedFileSetDataset method addPartition.
public void addPartition(PartitionKey key, String path, Map<String, String> metadata, boolean filesCreated, boolean allowAppend) {
byte[] rowKey = generateRowKey(key, partitioning);
Row row = partitionsTable.get(rowKey);
boolean appending = !row.isEmpty();
if (appending && !allowAppend) {
throw new PartitionAlreadyExistsException(getName(), key);
}
if (appending) {
// this can happen if user originally created the partition with a custom relative path
String existingPath = Bytes.toString(row.get(RELATIVE_PATH));
if (!path.equals(existingPath)) {
throw new DataSetException(String.format("Attempting to append to Dataset '%s', to partition '%s' with a " + "different path. Original path: '%s'. New path: '%s'", getName(), key.toString(), existingPath, path));
}
}
LOG.debug("{} partition with key {} and path {} to dataset {}", appending ? "Appending to" : "Creating", key, path, getName());
AddPartitionOperation operation = new AddPartitionOperation(key, path, filesCreated);
operationsInThisTx.add(operation);
Put put = new Put(rowKey);
byte[] nowInMillis = Bytes.toBytes(System.currentTimeMillis());
if (!appending) {
put.add(RELATIVE_PATH, Bytes.toBytes(path));
put.add(CREATION_TIME_COL, nowInMillis);
}
put.add(LAST_MODIFICATION_TIME_COL, nowInMillis);
// we allow updates, because an update will only happen if its an append
addMetadataToPut(row, metadata, put, true);
// index each row by its transaction's write pointer
put.add(WRITE_PTR_COL, tx.getWritePointer());
partitionsTable.put(put);
if (!appending) {
addPartitionToExplore(key, path);
operation.setExplorePartitionCreated();
}
}
use of io.cdap.cdap.api.dataset.table.Row in project cdap by caskdata.
the class PartitionedFileSetDataset method getPartition.
@ReadOnly
@Override
public PartitionDetail getPartition(PartitionKey key) {
byte[] rowKey = generateRowKey(key, partitioning);
Row row = partitionsTable.get(rowKey);
if (row.isEmpty()) {
return null;
}
byte[] pathBytes = row.get(RELATIVE_PATH);
if (pathBytes == null) {
return null;
}
return new BasicPartitionDetail(this, Bytes.toString(pathBytes), key, metadataFromRow(row));
}
use of io.cdap.cdap.api.dataset.table.Row in project cdap by caskdata.
the class MetadataStoreDataset method getKV.
/**
* Get all non-null values with the given ids for default COLUMN in a map
*
* @param ids set of the mds keys
* @return a map of the deserialized value of the result
*/
protected Map<MDSKey, byte[]> getKV(Set<MDSKey> ids) {
Map<MDSKey, byte[]> resultMap = new HashMap<>();
List<Get> getList = new ArrayList<>();
for (MDSKey id : ids) {
getList.add(new Get(id.getKey()));
}
List<Row> rowList = table.get(getList);
for (Row row : rowList) {
if (row.isEmpty()) {
continue;
}
byte[] value = row.get(COLUMN);
if (value == null) {
continue;
}
MDSKey key = new MDSKey(row.getRow());
resultMap.put(key, value);
}
return resultMap;
}
Aggregations