Search in sources :

Example 46 with Row

use of co.cask.cdap.api.dataset.table.Row in project cdap by caskdata.

the class DefaultPreviewStore method get.

@Override
public Map<String, List<JsonElement>> get(ApplicationId applicationId, String tracerName) {
    // PreviewStore is a singleton and we have to create gson for each operation since gson is not thread safe.
    Gson gson = new GsonBuilder().registerTypeAdapter(Schema.class, new SchemaTypeAdapter()).create();
    byte[] startRowKey = new MDSKey.Builder().add(applicationId.getNamespace()).add(applicationId.getApplication()).add(tracerName).build().getKey();
    byte[] stopRowKey = new MDSKey(Bytes.stopKeyForPrefix(startRowKey)).getKey();
    Map<String, List<JsonElement>> result = new HashMap<>();
    try (Scanner scanner = table.scan(startRowKey, stopRowKey, null, null, null)) {
        Row indexRow;
        while ((indexRow = scanner.next()) != null) {
            Map<byte[], byte[]> columns = indexRow.getColumns();
            String propertyName = Bytes.toString(columns.get(PROPERTY));
            JsonElement value = gson.fromJson(Bytes.toString(columns.get(VALUE)), JsonElement.class);
            List<JsonElement> values = result.get(propertyName);
            if (values == null) {
                values = new ArrayList<>();
                result.put(propertyName, values);
            }
            values.add(value);
        }
    } catch (IOException e) {
        String message = String.format("Error while reading preview data for application '%s' and tracer '%s'.", applicationId, tracerName);
        throw new RuntimeException(message, e);
    }
    return result;
}
Also used : Scanner(co.cask.cdap.api.dataset.table.Scanner) GsonBuilder(com.google.gson.GsonBuilder) HashMap(java.util.HashMap) Schema(co.cask.cdap.api.data.schema.Schema) Gson(com.google.gson.Gson) MDSKey(co.cask.cdap.data2.dataset2.lib.table.MDSKey) IOException(java.io.IOException) SchemaTypeAdapter(co.cask.cdap.internal.io.SchemaTypeAdapter) JsonElement(com.google.gson.JsonElement) ArrayList(java.util.ArrayList) List(java.util.List) Row(co.cask.cdap.api.dataset.table.Row)

Example 47 with Row

use of co.cask.cdap.api.dataset.table.Row in project cdap by caskdata.

the class MapReduceWithPartitionedTest method testTimePartitionedWithMR.

@Test
public void testTimePartitionedWithMR() throws Exception {
    final ApplicationWithPrograms app = deployApp(AppWithTimePartitionedFileSet.class);
    // write a value to the input table
    final Table table = datasetCache.getDataset(AppWithTimePartitionedFileSet.INPUT);
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) table).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            table.put(Bytes.toBytes("x"), AppWithTimePartitionedFileSet.ONLY_COLUMN, Bytes.toBytes("1"));
        }
    });
    final long time = DATE_FORMAT.parse("1/15/15 11:15 am").getTime();
    final long time5 = time + TimeUnit.MINUTES.toMillis(5);
    // run the partition writer m/r with this output partition time
    Map<String, String> runtimeArguments = Maps.newHashMap();
    Map<String, String> outputArgs = Maps.newHashMap();
    TimePartitionedFileSetArguments.setOutputPartitionTime(outputArgs, time);
    final ImmutableMap<String, String> assignedMetadata = ImmutableMap.of("region", "13", "data.source.name", "input", "data.source.type", "table");
    TimePartitionedFileSetArguments.setOutputPartitionMetadata(outputArgs, assignedMetadata);
    runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, TIME_PARTITIONED, outputArgs));
    Assert.assertTrue(runProgram(app, AppWithTimePartitionedFileSet.PartitionWriter.class, new BasicArguments(runtimeArguments)));
    // this should have created a partition in the tpfs
    final TimePartitionedFileSet tpfs = datasetCache.getDataset(TIME_PARTITIONED);
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) tpfs).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            TimePartitionDetail partition = tpfs.getPartitionByTime(time);
            Assert.assertNotNull(partition);
            String path = partition.getRelativePath();
            Assert.assertNotNull(path);
            Assert.assertTrue(path.contains("2015-01-15/11-15"));
            Assert.assertEquals(assignedMetadata, partition.getMetadata().asMap());
        }
    });
    // delete the data in the input table and write a new row
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) table).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            table.delete(Bytes.toBytes("x"));
            table.put(Bytes.toBytes("y"), AppWithTimePartitionedFileSet.ONLY_COLUMN, Bytes.toBytes("2"));
        }
    });
    // now run the m/r again with a new partition time, say 5 minutes later
    TimePartitionedFileSetArguments.setOutputPartitionTime(outputArgs, time5);
    runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, TIME_PARTITIONED, outputArgs));
    // make the mapreduce add the partition in destroy, to validate that this does not fail the job
    runtimeArguments.put(AppWithTimePartitionedFileSet.COMPAT_ADD_PARTITION, "true");
    Assert.assertTrue(runProgram(app, AppWithTimePartitionedFileSet.PartitionWriter.class, new BasicArguments(runtimeArguments)));
    // this should have created a partition in the tpfs
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) tpfs).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            Partition partition = tpfs.getPartitionByTime(time5);
            Assert.assertNotNull(partition);
            String path = partition.getRelativePath();
            Assert.assertNotNull(path);
            Assert.assertTrue(path.contains("2015-01-15/11-20"));
        }
    });
    // now run a map/reduce that reads all the partitions
    runtimeArguments = Maps.newHashMap();
    Map<String, String> inputArgs = Maps.newHashMap();
    TimePartitionedFileSetArguments.setInputStartTime(inputArgs, time - TimeUnit.MINUTES.toMillis(5));
    TimePartitionedFileSetArguments.setInputEndTime(inputArgs, time5 + TimeUnit.MINUTES.toMillis(5));
    runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, TIME_PARTITIONED, inputArgs));
    runtimeArguments.put(AppWithTimePartitionedFileSet.ROW_TO_WRITE, "a");
    Assert.assertTrue(runProgram(app, AppWithTimePartitionedFileSet.PartitionReader.class, new BasicArguments(runtimeArguments)));
    // this should have read both partitions - and written both x and y to row a
    final Table output = datasetCache.getDataset(AppWithTimePartitionedFileSet.OUTPUT);
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) output).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            Row row = output.get(Bytes.toBytes("a"));
            Assert.assertEquals("1", row.getString("x"));
            Assert.assertEquals("2", row.getString("y"));
        }
    });
    // now run a map/reduce that reads a range of the partitions, namely the first one
    TimePartitionedFileSetArguments.setInputStartTime(inputArgs, time - TimeUnit.MINUTES.toMillis(5));
    TimePartitionedFileSetArguments.setInputEndTime(inputArgs, time + TimeUnit.MINUTES.toMillis(2));
    runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, TIME_PARTITIONED, inputArgs));
    runtimeArguments.put(AppWithTimePartitionedFileSet.ROW_TO_WRITE, "b");
    Assert.assertTrue(runProgram(app, AppWithTimePartitionedFileSet.PartitionReader.class, new BasicArguments(runtimeArguments)));
    // this should have read the first partition only - and written only x to row b
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) output).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            Row row = output.get(Bytes.toBytes("b"));
            Assert.assertEquals("1", row.getString("x"));
            Assert.assertNull(row.get("y"));
        }
    });
    // now run a map/reduce that reads no partitions (because the range matches nothing)
    TimePartitionedFileSetArguments.setInputStartTime(inputArgs, time - TimeUnit.MINUTES.toMillis(10));
    TimePartitionedFileSetArguments.setInputEndTime(inputArgs, time - TimeUnit.MINUTES.toMillis(9));
    runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, TIME_PARTITIONED, inputArgs));
    runtimeArguments.put(AppWithTimePartitionedFileSet.ROW_TO_WRITE, "n");
    Assert.assertTrue(runProgram(app, AppWithTimePartitionedFileSet.PartitionReader.class, new BasicArguments(runtimeArguments)));
    // this should have read no partitions - and written nothing to row n
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) output).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            Row row = output.get(Bytes.toBytes("n"));
            Assert.assertTrue(row.isEmpty());
        }
    });
}
Also used : Partition(co.cask.cdap.api.dataset.lib.Partition) Table(co.cask.cdap.api.dataset.table.Table) TransactionExecutor(org.apache.tephra.TransactionExecutor) ApplicationWithPrograms(co.cask.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms) TransactionAware(org.apache.tephra.TransactionAware) BasicArguments(co.cask.cdap.internal.app.runtime.BasicArguments) TimePartitionDetail(co.cask.cdap.api.dataset.lib.TimePartitionDetail) Row(co.cask.cdap.api.dataset.table.Row) TimePartitionedFileSet(co.cask.cdap.api.dataset.lib.TimePartitionedFileSet) Test(org.junit.Test)

Example 48 with Row

use of co.cask.cdap.api.dataset.table.Row in project cdap by caskdata.

the class ArtifactStore method getArtifacts.

private List<ArtifactDetail> getArtifacts(Table metaTable, ArtifactRange range, int limit, ArtifactSortOrder order) throws IOException, ArtifactRangeNotFoundException, ArtifactNotFoundException {
    List<ArtifactDetail> artifacts = Lists.newArrayList();
    ArtifactKey artifactKey = new ArtifactKey(range.getNamespace(), range.getName());
    Row row = metaTable.get(artifactKey.getRowKey());
    addArtifacts(artifacts, row, limit, order, range);
    return Collections.unmodifiableList(artifacts);
}
Also used : Row(co.cask.cdap.api.dataset.table.Row)

Example 49 with Row

use of co.cask.cdap.api.dataset.table.Row in project cdap by caskdata.

the class LevelDBTableCore method deleteRange.

public void deleteRange(byte[] startRow, byte[] stopRow, @Nullable FuzzyRowFilter filter, @Nullable byte[][] columns) throws IOException {
    if (columns != null) {
        if (columns.length == 0) {
            return;
        }
        columns = Arrays.copyOf(columns, columns.length);
        Arrays.sort(columns, Bytes.BYTES_COMPARATOR);
    }
    DB db = getDB();
    DBIterator iterator = db.iterator();
    seekToStart(iterator, startRow);
    byte[] endKey = stopRow == null ? null : createEndKey(stopRow);
    DBIterator deleteIterator = db.iterator();
    seekToStart(deleteIterator, startRow);
    // todo make configurable
    final int deletesPerRound = 1024;
    try (Scanner scanner = new LevelDBScanner(iterator, endKey, filter, columns, null)) {
        Row rowValues;
        WriteBatch batch = db.createWriteBatch();
        int deletesInBatch = 0;
        // go through all matching cells and delete them in batches.
        while ((rowValues = scanner.next()) != null) {
            byte[] row = rowValues.getRow();
            for (byte[] column : rowValues.getColumns().keySet()) {
                addToDeleteBatch(batch, deleteIterator, row, column);
                deletesInBatch++;
                // perform the deletes when we have built up a batch.
                if (deletesInBatch >= deletesPerRound) {
                    // delete all the entries that were found
                    db.write(batch, getWriteOptions());
                    batch = db.createWriteBatch();
                    deletesInBatch = 0;
                }
            }
        }
        // perform any outstanding deletes
        if (deletesInBatch > 0) {
            db.write(batch, getWriteOptions());
        }
    } finally {
        deleteIterator.close();
    }
}
Also used : DBIterator(org.iq80.leveldb.DBIterator) Scanner(co.cask.cdap.api.dataset.table.Scanner) Row(co.cask.cdap.api.dataset.table.Row) WriteBatch(org.iq80.leveldb.WriteBatch) DB(org.iq80.leveldb.DB)

Example 50 with Row

use of co.cask.cdap.api.dataset.table.Row in project cdap by caskdata.

the class MetadataStoreDataset method scan.

/**
   * Run a scan on MDS.
   *
   * @param startId  scan start key
   * @param stopId   scan stop key
   * @param typeOfT  type of value
   * @param function function to process each element returned from scan.
   *                 If function.apply returns false then the scan is stopped.
   *                 Also, function.apply should not return null.
   * @param <T>      type of value
   */
public <T> void scan(MDSKey startId, @Nullable MDSKey stopId, Type typeOfT, Function<KeyValue<T>, Boolean> function) {
    byte[] startKey = startId.getKey();
    byte[] stopKey = stopId == null ? Bytes.stopKeyForPrefix(startKey) : stopId.getKey();
    try (Scanner scan = table.scan(startKey, stopKey)) {
        Row next;
        while ((next = scan.next()) != null) {
            byte[] columnValue = next.get(COLUMN);
            if (columnValue == null) {
                continue;
            }
            T value = deserialize(columnValue, typeOfT);
            MDSKey key = new MDSKey(next.getRow());
            //noinspection ConstantConditions
            if (!function.apply(new KeyValue<>(key, value))) {
                break;
            }
        }
    }
}
Also used : Scanner(co.cask.cdap.api.dataset.table.Scanner) Row(co.cask.cdap.api.dataset.table.Row)

Aggregations

Row (co.cask.cdap.api.dataset.table.Row)111 Scanner (co.cask.cdap.api.dataset.table.Scanner)60 Test (org.junit.Test)23 Table (co.cask.cdap.api.dataset.table.Table)20 Get (co.cask.cdap.api.dataset.table.Get)16 ArrayList (java.util.ArrayList)16 TransactionExecutor (org.apache.tephra.TransactionExecutor)16 Map (java.util.Map)15 Put (co.cask.cdap.api.dataset.table.Put)14 HashMap (java.util.HashMap)10 Scan (co.cask.cdap.api.dataset.table.Scan)9 TransactionAware (org.apache.tephra.TransactionAware)9 MDSKey (co.cask.cdap.data2.dataset2.lib.table.MDSKey)8 QueueEntryRow (co.cask.cdap.data2.transaction.queue.QueueEntryRow)8 DatasetId (co.cask.cdap.proto.id.DatasetId)8 IOException (java.io.IOException)8 ImmutableMap (com.google.common.collect.ImmutableMap)7 Transaction (org.apache.tephra.Transaction)7 WriteOnly (co.cask.cdap.api.annotation.WriteOnly)6 Schema (co.cask.cdap.api.data.schema.Schema)6