Search in sources :

Example 86 with Row

use of co.cask.cdap.api.dataset.table.Row in project cdap by caskdata.

the class BufferingTableTest method testMultiGetIncludesBuffer.

@Test
public void testMultiGetIncludesBuffer() throws Exception {
    DatasetAdmin admin = getTableAdmin(CONTEXT1, MY_TABLE);
    admin.create();
    try {
        // persist some data
        BufferingTable table = getTable(CONTEXT1, MY_TABLE);
        Transaction tx1 = txClient.startShort();
        table.startTx(tx1);
        // writing a couple rows
        // table should look like the following, with everything in the buffer
        //          c1    c2    c3    c4
        // r1       1     2     3     -
        // r2       -     3     2     1
        table.put(R1, a(C1, C2, C3), lb(1, 2, 3));
        table.put(R2, a(C2, C3, C4), lb(3, 2, 1));
        // check that multi-get can see buffered writes
        List<Row> rows = table.get(Lists.newArrayList(new Get(R1), new Get(R2)));
        Assert.assertEquals(2, rows.size());
        TableAssert.assertRow(rows.get(0), R1, a(C1, C2, C3), lb(1, 2, 3));
        TableAssert.assertRow(rows.get(1), R2, a(C2, C3, C4), lb(3, 2, 1));
        // check multi-get with gets that specify columns, and one get that should return an empty row
        rows = table.get(Lists.newArrayList(new Get(R1, C2, C3), new Get(R2, C2, C3), new Get(R3)));
        Assert.assertEquals(3, rows.size());
        TableAssert.assertRow(rows.get(0), R1, a(C2, C3), lb(2, 3));
        TableAssert.assertRow(rows.get(1), R2, a(C2, C3), lb(3, 2));
        Assert.assertTrue(rows.get(2).isEmpty());
        // persist changes
        Collection<byte[]> txChanges = table.getTxChanges();
        Assert.assertTrue(txClient.canCommit(tx1, txChanges));
        Assert.assertTrue(table.commitTx());
        Assert.assertTrue(txClient.commit(tx1));
        table.postTxCommit();
        // start another transaction
        Transaction tx2 = txClient.startShort();
        table.startTx(tx2);
        // now add another row, delete a row, and change some column values
        // table should look like the following
        //         c1    c2    c3    c4    c5
        // r1      -     -     3     2     -
        // r3      -     -     -     -     1
        table.put(R1, a(C2, C3, C4), lb(4, 3, 2));
        table.delete(R1, a(C1, C2));
        table.delete(R2);
        table.put(R3, C5, L1);
        // verify multi-get sees persisted data with buffer applied on top
        rows = table.get(Lists.newArrayList(new Get(R1), new Get(R2), new Get(R3)));
        Assert.assertEquals(3, rows.size());
        TableAssert.assertRow(rows.get(0), R1, a(C3, C4), lb(3, 2));
        Assert.assertTrue(rows.get(1).isEmpty());
        TableAssert.assertRow(rows.get(2), R3, a(C5), lb(1));
        // pretend there was a write conflict and rollback changes
        Assert.assertTrue(table.rollbackTx());
        txClient.abort(tx2);
        // start another transaction and make sure it can't see what was done before
        Transaction tx3 = txClient.startShort();
        table.startTx(tx3);
        rows = table.get(Lists.newArrayList(new Get(R1), new Get(R2)));
        Assert.assertEquals(2, rows.size());
        TableAssert.assertRow(rows.get(0), R1, a(C1, C2, C3), lb(1, 2, 3));
        TableAssert.assertRow(rows.get(1), R2, a(C2, C3, C4), lb(3, 2, 1));
    } finally {
        admin.drop();
    }
}
Also used : Transaction(org.apache.tephra.Transaction) Get(co.cask.cdap.api.dataset.table.Get) DatasetAdmin(co.cask.cdap.api.dataset.DatasetAdmin) Row(co.cask.cdap.api.dataset.table.Row) Test(org.junit.Test)

Example 87 with Row

use of co.cask.cdap.api.dataset.table.Row in project cdap by caskdata.

the class DataQualityAppTest method testDefaultConfig.

@Test
public void testDefaultConfig() throws Exception {
    Map<String, Set<String>> testMap = new HashMap<>();
    Set<String> testSet = new HashSet<>();
    testSet.add("DiscreteValuesHistogram");
    testMap.put("content_length", testSet);
    DataQualityApp.DataQualityConfig config = new DataQualityApp.DataQualityConfig(WORKFLOW_SCHEDULE_MINUTES, getStreamSource(), "dataQuality", testMap);
    ApplicationId appId = NamespaceId.DEFAULT.app("newApp");
    AppRequest<DataQualityApp.DataQualityConfig> appRequest = new AppRequest<>(new ArtifactSummary(appArtifact.getArtifact(), appArtifact.getVersion()), config);
    ApplicationManager applicationManager = deployApplication(appId, appRequest);
    MapReduceManager mrManager = applicationManager.getMapReduceManager("FieldAggregator").start();
    mrManager.waitForRun(ProgramRunStatus.COMPLETED, 180, TimeUnit.SECONDS);
    Table logDataStore = (Table) getDataset("dataQuality").get();
    DiscreteValuesHistogram discreteValuesHistogramAggregationFunction = new DiscreteValuesHistogram();
    Row row;
    try (Scanner scanner = logDataStore.scan(null, null)) {
        while ((row = scanner.next()) != null) {
            if (Bytes.toString(row.getRow()).contains("content_length")) {
                Map<byte[], byte[]> columnsMapBytes = row.getColumns();
                byte[] output = columnsMapBytes.get(Bytes.toBytes("DiscreteValuesHistogram"));
                if (output != null) {
                    discreteValuesHistogramAggregationFunction.combine(output);
                }
            }
        }
    }
    Map<String, Integer> outputMap = discreteValuesHistogramAggregationFunction.retrieveAggregation();
    Map<String, Integer> expectedMap = Maps.newHashMap();
    expectedMap.put("256", 3);
    Assert.assertEquals(expectedMap, outputMap);
}
Also used : Scanner(co.cask.cdap.api.dataset.table.Scanner) ApplicationManager(co.cask.cdap.test.ApplicationManager) Set(java.util.Set) HashSet(java.util.HashSet) Table(co.cask.cdap.api.dataset.table.Table) MapReduceManager(co.cask.cdap.test.MapReduceManager) HashMap(java.util.HashMap) AppRequest(co.cask.cdap.proto.artifact.AppRequest) ArtifactSummary(co.cask.cdap.api.artifact.ArtifactSummary) DiscreteValuesHistogram(co.cask.cdap.dq.functions.DiscreteValuesHistogram) Row(co.cask.cdap.api.dataset.table.Row) ApplicationId(co.cask.cdap.proto.id.ApplicationId) HashSet(java.util.HashSet) DataQualityApp(co.cask.cdap.dq.DataQualityApp) Test(org.junit.Test)

Example 88 with Row

use of co.cask.cdap.api.dataset.table.Row in project cdap by caskdata.

the class TopKCollector method readWordAssocs.

/**
   * Returns the top words associated with the specified word and the number
   * of times the words have appeared together.
   * @param word the word of interest
   * @param limit the number of associations to return, at most
   * @return a map of the top associated words to their co-occurrence count
   */
@ReadOnly
public Map<String, Long> readWordAssocs(String word, int limit) {
    // Retrieve all columns of the word’s row
    Row result = this.table.get(new Get(word));
    TopKCollector collector = new TopKCollector(limit);
    if (!result.isEmpty()) {
        // Iterate over all columns
        for (Map.Entry<byte[], byte[]> entry : result.getColumns().entrySet()) {
            collector.add(Bytes.toLong(entry.getValue()), Bytes.toString(entry.getKey()));
        }
    }
    return collector.getTopK();
}
Also used : Get(co.cask.cdap.api.dataset.table.Get) Row(co.cask.cdap.api.dataset.table.Row) TreeMap(java.util.TreeMap) Map(java.util.Map) ReadOnly(co.cask.cdap.api.annotation.ReadOnly)

Example 89 with Row

use of co.cask.cdap.api.dataset.table.Row in project cdap by caskdata.

the class IndexedTable method incrementAndGet.

/**
   * Increments (atomically) the specified row and columns by the specified amounts, and returns the new values.
   * Note that performing this operation on an indexed column will generally have a negative impact on performance,
   * since up to three writes will need to be performed for every increment (one removing the index for the previous,
   * pre-increment value, one adding the index for the incremented value, and one for the increment itself).
   *
   * @see Table#incrementAndGet(byte[], byte[][], long[])
   */
@ReadWrite
@Override
public Row incrementAndGet(byte[] row, byte[][] columns, long[] amounts) {
    if (columns.length != amounts.length) {
        throw new IllegalArgumentException("Size of columns and amounts arguments must match");
    }
    Row existingRow = table.get(row, columns);
    byte[][] updatedValues = new byte[columns.length][];
    NavigableMap<byte[], byte[]> result = new TreeMap<>(Bytes.BYTES_COMPARATOR);
    for (int i = 0; i < columns.length; i++) {
        long existingValue = 0L;
        byte[] existingBytes = existingRow.get(columns[i]);
        if (existingBytes != null) {
            if (existingBytes.length != Bytes.SIZEOF_LONG) {
                throw new NumberFormatException("Attempted to increment a value that is not convertible to long," + " row: " + Bytes.toStringBinary(row) + " column: " + Bytes.toStringBinary(columns[i]));
            }
            existingValue = Bytes.toLong(existingBytes);
            if (indexedColumns.contains(columns[i])) {
                index.delete(createIndexKey(row, columns[i], existingBytes), IDX_COL);
            }
        }
        updatedValues[i] = Bytes.toBytes(existingValue + amounts[i]);
        result.put(columns[i], updatedValues[i]);
        if (indexedColumns.contains(columns[i])) {
            index.put(createIndexKey(row, columns[i], updatedValues[i]), IDX_COL, row);
        }
    }
    table.put(row, columns, updatedValues);
    return new Result(row, result);
}
Also used : Row(co.cask.cdap.api.dataset.table.Row) TreeMap(java.util.TreeMap) Result(co.cask.cdap.api.dataset.table.Result) ReadWrite(co.cask.cdap.api.annotation.ReadWrite)

Example 90 with Row

use of co.cask.cdap.api.dataset.table.Row in project cdap by caskdata.

the class IndexedTable method delete.

@WriteOnly
@Override
public void delete(byte[] row, byte[][] columns) {
    Row existingRow = table.get(row, columns);
    if (existingRow.isEmpty()) {
        // no row to delete
        return;
    }
    // delete all index entries
    deleteIndexEntries(existingRow);
    // delete the row's columns
    table.delete(row, columns);
}
Also used : Row(co.cask.cdap.api.dataset.table.Row) WriteOnly(co.cask.cdap.api.annotation.WriteOnly)

Aggregations

Row (co.cask.cdap.api.dataset.table.Row)111 Scanner (co.cask.cdap.api.dataset.table.Scanner)60 Test (org.junit.Test)23 Table (co.cask.cdap.api.dataset.table.Table)20 Get (co.cask.cdap.api.dataset.table.Get)16 ArrayList (java.util.ArrayList)16 TransactionExecutor (org.apache.tephra.TransactionExecutor)16 Map (java.util.Map)15 Put (co.cask.cdap.api.dataset.table.Put)14 HashMap (java.util.HashMap)10 Scan (co.cask.cdap.api.dataset.table.Scan)9 TransactionAware (org.apache.tephra.TransactionAware)9 MDSKey (co.cask.cdap.data2.dataset2.lib.table.MDSKey)8 QueueEntryRow (co.cask.cdap.data2.transaction.queue.QueueEntryRow)8 DatasetId (co.cask.cdap.proto.id.DatasetId)8 IOException (java.io.IOException)8 ImmutableMap (com.google.common.collect.ImmutableMap)7 Transaction (org.apache.tephra.Transaction)7 WriteOnly (co.cask.cdap.api.annotation.WriteOnly)6 Schema (co.cask.cdap.api.data.schema.Schema)6