use of co.cask.cdap.api.dataset.table.Row in project cdap by caskdata.
the class DefaultPreviewStore method get.
@Override
public Map<String, List<JsonElement>> get(ApplicationId applicationId, String tracerName) {
// PreviewStore is a singleton and we have to create gson for each operation since gson is not thread safe.
Gson gson = new GsonBuilder().registerTypeAdapter(Schema.class, new SchemaTypeAdapter()).create();
byte[] startRowKey = new MDSKey.Builder().add(applicationId.getNamespace()).add(applicationId.getApplication()).add(tracerName).build().getKey();
byte[] stopRowKey = new MDSKey(Bytes.stopKeyForPrefix(startRowKey)).getKey();
Map<String, List<JsonElement>> result = new HashMap<>();
try (Scanner scanner = table.scan(startRowKey, stopRowKey, null, null, null)) {
Row indexRow;
while ((indexRow = scanner.next()) != null) {
Map<byte[], byte[]> columns = indexRow.getColumns();
String propertyName = Bytes.toString(columns.get(PROPERTY));
JsonElement value = gson.fromJson(Bytes.toString(columns.get(VALUE)), JsonElement.class);
List<JsonElement> values = result.get(propertyName);
if (values == null) {
values = new ArrayList<>();
result.put(propertyName, values);
}
values.add(value);
}
} catch (IOException e) {
String message = String.format("Error while reading preview data for application '%s' and tracer '%s'.", applicationId, tracerName);
throw new RuntimeException(message, e);
}
return result;
}
use of co.cask.cdap.api.dataset.table.Row in project cdap by caskdata.
the class MapReduceWithPartitionedTest method testTimePartitionedWithMR.
@Test
public void testTimePartitionedWithMR() throws Exception {
final ApplicationWithPrograms app = deployApp(AppWithTimePartitionedFileSet.class);
// write a value to the input table
final Table table = datasetCache.getDataset(AppWithTimePartitionedFileSet.INPUT);
Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) table).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() {
table.put(Bytes.toBytes("x"), AppWithTimePartitionedFileSet.ONLY_COLUMN, Bytes.toBytes("1"));
}
});
final long time = DATE_FORMAT.parse("1/15/15 11:15 am").getTime();
final long time5 = time + TimeUnit.MINUTES.toMillis(5);
// run the partition writer m/r with this output partition time
Map<String, String> runtimeArguments = Maps.newHashMap();
Map<String, String> outputArgs = Maps.newHashMap();
TimePartitionedFileSetArguments.setOutputPartitionTime(outputArgs, time);
final ImmutableMap<String, String> assignedMetadata = ImmutableMap.of("region", "13", "data.source.name", "input", "data.source.type", "table");
TimePartitionedFileSetArguments.setOutputPartitionMetadata(outputArgs, assignedMetadata);
runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, TIME_PARTITIONED, outputArgs));
Assert.assertTrue(runProgram(app, AppWithTimePartitionedFileSet.PartitionWriter.class, new BasicArguments(runtimeArguments)));
// this should have created a partition in the tpfs
final TimePartitionedFileSet tpfs = datasetCache.getDataset(TIME_PARTITIONED);
Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) tpfs).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() {
TimePartitionDetail partition = tpfs.getPartitionByTime(time);
Assert.assertNotNull(partition);
String path = partition.getRelativePath();
Assert.assertNotNull(path);
Assert.assertTrue(path.contains("2015-01-15/11-15"));
Assert.assertEquals(assignedMetadata, partition.getMetadata().asMap());
}
});
// delete the data in the input table and write a new row
Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) table).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() {
table.delete(Bytes.toBytes("x"));
table.put(Bytes.toBytes("y"), AppWithTimePartitionedFileSet.ONLY_COLUMN, Bytes.toBytes("2"));
}
});
// now run the m/r again with a new partition time, say 5 minutes later
TimePartitionedFileSetArguments.setOutputPartitionTime(outputArgs, time5);
runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, TIME_PARTITIONED, outputArgs));
// make the mapreduce add the partition in destroy, to validate that this does not fail the job
runtimeArguments.put(AppWithTimePartitionedFileSet.COMPAT_ADD_PARTITION, "true");
Assert.assertTrue(runProgram(app, AppWithTimePartitionedFileSet.PartitionWriter.class, new BasicArguments(runtimeArguments)));
// this should have created a partition in the tpfs
Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) tpfs).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() {
Partition partition = tpfs.getPartitionByTime(time5);
Assert.assertNotNull(partition);
String path = partition.getRelativePath();
Assert.assertNotNull(path);
Assert.assertTrue(path.contains("2015-01-15/11-20"));
}
});
// now run a map/reduce that reads all the partitions
runtimeArguments = Maps.newHashMap();
Map<String, String> inputArgs = Maps.newHashMap();
TimePartitionedFileSetArguments.setInputStartTime(inputArgs, time - TimeUnit.MINUTES.toMillis(5));
TimePartitionedFileSetArguments.setInputEndTime(inputArgs, time5 + TimeUnit.MINUTES.toMillis(5));
runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, TIME_PARTITIONED, inputArgs));
runtimeArguments.put(AppWithTimePartitionedFileSet.ROW_TO_WRITE, "a");
Assert.assertTrue(runProgram(app, AppWithTimePartitionedFileSet.PartitionReader.class, new BasicArguments(runtimeArguments)));
// this should have read both partitions - and written both x and y to row a
final Table output = datasetCache.getDataset(AppWithTimePartitionedFileSet.OUTPUT);
Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) output).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() {
Row row = output.get(Bytes.toBytes("a"));
Assert.assertEquals("1", row.getString("x"));
Assert.assertEquals("2", row.getString("y"));
}
});
// now run a map/reduce that reads a range of the partitions, namely the first one
TimePartitionedFileSetArguments.setInputStartTime(inputArgs, time - TimeUnit.MINUTES.toMillis(5));
TimePartitionedFileSetArguments.setInputEndTime(inputArgs, time + TimeUnit.MINUTES.toMillis(2));
runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, TIME_PARTITIONED, inputArgs));
runtimeArguments.put(AppWithTimePartitionedFileSet.ROW_TO_WRITE, "b");
Assert.assertTrue(runProgram(app, AppWithTimePartitionedFileSet.PartitionReader.class, new BasicArguments(runtimeArguments)));
// this should have read the first partition only - and written only x to row b
Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) output).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() {
Row row = output.get(Bytes.toBytes("b"));
Assert.assertEquals("1", row.getString("x"));
Assert.assertNull(row.get("y"));
}
});
// now run a map/reduce that reads no partitions (because the range matches nothing)
TimePartitionedFileSetArguments.setInputStartTime(inputArgs, time - TimeUnit.MINUTES.toMillis(10));
TimePartitionedFileSetArguments.setInputEndTime(inputArgs, time - TimeUnit.MINUTES.toMillis(9));
runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, TIME_PARTITIONED, inputArgs));
runtimeArguments.put(AppWithTimePartitionedFileSet.ROW_TO_WRITE, "n");
Assert.assertTrue(runProgram(app, AppWithTimePartitionedFileSet.PartitionReader.class, new BasicArguments(runtimeArguments)));
// this should have read no partitions - and written nothing to row n
Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) output).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() {
Row row = output.get(Bytes.toBytes("n"));
Assert.assertTrue(row.isEmpty());
}
});
}
use of co.cask.cdap.api.dataset.table.Row in project cdap by caskdata.
the class ArtifactStore method getArtifacts.
private List<ArtifactDetail> getArtifacts(Table metaTable, ArtifactRange range, int limit, ArtifactSortOrder order) throws IOException, ArtifactRangeNotFoundException, ArtifactNotFoundException {
List<ArtifactDetail> artifacts = Lists.newArrayList();
ArtifactKey artifactKey = new ArtifactKey(range.getNamespace(), range.getName());
Row row = metaTable.get(artifactKey.getRowKey());
addArtifacts(artifacts, row, limit, order, range);
return Collections.unmodifiableList(artifacts);
}
use of co.cask.cdap.api.dataset.table.Row in project cdap by caskdata.
the class LevelDBTableCore method deleteRange.
public void deleteRange(byte[] startRow, byte[] stopRow, @Nullable FuzzyRowFilter filter, @Nullable byte[][] columns) throws IOException {
if (columns != null) {
if (columns.length == 0) {
return;
}
columns = Arrays.copyOf(columns, columns.length);
Arrays.sort(columns, Bytes.BYTES_COMPARATOR);
}
DB db = getDB();
DBIterator iterator = db.iterator();
seekToStart(iterator, startRow);
byte[] endKey = stopRow == null ? null : createEndKey(stopRow);
DBIterator deleteIterator = db.iterator();
seekToStart(deleteIterator, startRow);
// todo make configurable
final int deletesPerRound = 1024;
try (Scanner scanner = new LevelDBScanner(iterator, endKey, filter, columns, null)) {
Row rowValues;
WriteBatch batch = db.createWriteBatch();
int deletesInBatch = 0;
// go through all matching cells and delete them in batches.
while ((rowValues = scanner.next()) != null) {
byte[] row = rowValues.getRow();
for (byte[] column : rowValues.getColumns().keySet()) {
addToDeleteBatch(batch, deleteIterator, row, column);
deletesInBatch++;
// perform the deletes when we have built up a batch.
if (deletesInBatch >= deletesPerRound) {
// delete all the entries that were found
db.write(batch, getWriteOptions());
batch = db.createWriteBatch();
deletesInBatch = 0;
}
}
}
// perform any outstanding deletes
if (deletesInBatch > 0) {
db.write(batch, getWriteOptions());
}
} finally {
deleteIterator.close();
}
}
use of co.cask.cdap.api.dataset.table.Row in project cdap by caskdata.
the class MetadataStoreDataset method scan.
/**
* Run a scan on MDS.
*
* @param startId scan start key
* @param stopId scan stop key
* @param typeOfT type of value
* @param function function to process each element returned from scan.
* If function.apply returns false then the scan is stopped.
* Also, function.apply should not return null.
* @param <T> type of value
*/
public <T> void scan(MDSKey startId, @Nullable MDSKey stopId, Type typeOfT, Function<KeyValue<T>, Boolean> function) {
byte[] startKey = startId.getKey();
byte[] stopKey = stopId == null ? Bytes.stopKeyForPrefix(startKey) : stopId.getKey();
try (Scanner scan = table.scan(startKey, stopKey)) {
Row next;
while ((next = scan.next()) != null) {
byte[] columnValue = next.get(COLUMN);
if (columnValue == null) {
continue;
}
T value = deserialize(columnValue, typeOfT);
MDSKey key = new MDSKey(next.getRow());
//noinspection ConstantConditions
if (!function.apply(new KeyValue<>(key, value))) {
break;
}
}
}
}
Aggregations