Search in sources :

Example 91 with Row

use of io.cdap.cdap.api.dataset.table.Row in project cdap by caskdata.

the class BufferingTableTest method testChangingParamsAndReturnValues.

@Test
public void testChangingParamsAndReturnValues() throws Exception {
    // The test verifies that one can re-use byte arrays passed as parameters to write methods of a table without
    // affecting the stored data.
    // Also, one can re-use (modify) returned data from the table without affecting the stored data.
    DatasetProperties props = TableProperties.builder().setReadlessIncrementSupport(isReadlessIncrementSupported()).build();
    DatasetAdmin admin = getTableAdmin(CONTEXT1, MY_TABLE, props);
    admin.create();
    try (BufferingTable table = getTable(CONTEXT1, MY_TABLE, props)) {
        // writing some data: we'll need it to test delete later
        Transaction tx = txClient.startShort();
        table.startTx(tx);
        table.put(new byte[] { 0 }, new byte[] { 9 }, new byte[] { 8 });
        table.commitTx();
        txClient.commitOrThrow(tx);
        // start new for in-mem buffer behavior testing
        tx = txClient.startShort();
        table.startTx(tx);
        // write some data but not commit
        byte[] rowParam = new byte[] { 1 };
        byte[] colParam = new byte[] { 2 };
        byte[] valParam = Bytes.toBytes(3L);
        table.put(rowParam, colParam, valParam);
        verify123(table);
        // change passed earlier byte arrays in place, this should not affect stored previously values
        rowParam[0]++;
        colParam[0]++;
        valParam[0]++;
        verify123(table);
        // try get row and change returned values in place, which should not affect the data stored
        Row getRow = table.get(new byte[] { 1 });
        Map<byte[], byte[]> getRowResult = getRow.getColumns();
        Assert.assertEquals(1, getRowResult.size());
        byte[] colFromGetRow = getRowResult.keySet().iterator().next();
        byte[] valFromGetRow = getRowResult.get(colFromGetRow);
        getRowResult.remove(new byte[] { 2 });
        Assert.assertArrayEquals(new byte[] { 2 }, colFromGetRow);
        Assert.assertArrayEquals(Bytes.toBytes(3L), valFromGetRow);
        colFromGetRow[0]++;
        valFromGetRow[0]++;
        verify123(table);
        // try get set of columns in a row and change returned values in place, which should not affect the data stored
        Row getColumnSetRow = table.get(new byte[] { 1 });
        Map<byte[], byte[]> getColumnSetResult = getColumnSetRow.getColumns();
        Assert.assertEquals(1, getColumnSetResult.size());
        byte[] colFromGetColumnSet = getColumnSetResult.keySet().iterator().next();
        byte[] valFromGetColumnSet = getColumnSetResult.values().iterator().next();
        getColumnSetResult.remove(new byte[] { 2 });
        Assert.assertArrayEquals(new byte[] { 2 }, colFromGetColumnSet);
        Assert.assertArrayEquals(Bytes.toBytes(3L), valFromGetColumnSet);
        colFromGetColumnSet[0]++;
        valFromGetColumnSet[0]++;
        verify123(table);
        // try get column and change returned value in place, which should not affect the data stored
        byte[] valFromGetColumn = table.get(new byte[] { 1 }, new byte[] { 2 });
        Assert.assertArrayEquals(Bytes.toBytes(3L), valFromGetColumn);
        valFromGetColumn[0]++;
        verify123(table);
        // try scan and change returned value in place, which should not affect the data stored
        Scanner scan = table.scan(new byte[] { 1 }, null);
        Row next = scan.next();
        Assert.assertNotNull(next);
        byte[] rowFromScan = next.getRow();
        Assert.assertArrayEquals(new byte[] { 1 }, rowFromScan);
        Map<byte[], byte[]> cols = next.getColumns();
        Assert.assertEquals(1, cols.size());
        byte[] colFromScan = cols.keySet().iterator().next();
        Assert.assertArrayEquals(new byte[] { 2 }, colFromScan);
        byte[] valFromScan = next.get(new byte[] { 2 });
        Assert.assertNotNull(valFromScan);
        Assert.assertArrayEquals(Bytes.toBytes(3L), valFromScan);
        Assert.assertNull(scan.next());
        cols.remove(new byte[] { 2 });
        rowFromScan[0]++;
        colFromScan[0]++;
        valFromScan[0]++;
        verify123(table);
        // try delete and change params in place: this should not affect stored data
        rowParam = new byte[] { 1 };
        colParam = new byte[] { 2 };
        table.delete(rowParam, colParam);
        Assert.assertNull(table.get(new byte[] { 1 }, new byte[] { 2 }));
        Assert.assertArrayEquals(new byte[] { 8 }, table.get(new byte[] { 0 }, new byte[] { 9 }));
        rowParam[0] = 0;
        colParam[0] = 9;
        Assert.assertNull(table.get(new byte[] { 1 }, new byte[] { 2 }));
        Assert.assertArrayEquals(new byte[] { 8 }, table.get(new byte[] { 0 }, new byte[] { 9 }));
        // try increment column and change params in place: this should not affect stored data
        byte[] rowIncParam = new byte[] { 1 };
        byte[] colIncParam = new byte[] { 2 };
        table.increment(rowIncParam, colIncParam, 3);
        verify123(table);
        rowIncParam[0]++;
        colIncParam[0]++;
        verify123(table);
        // try increment set of columns and change params in place, try also to change values in returned map: this all
        // should not affect stored data.
        rowIncParam = new byte[] { 1 };
        colIncParam = new byte[] { 2 };
        table.increment(rowIncParam, colIncParam, -1);
        table.increment(rowIncParam, new byte[][] { colIncParam }, new long[] { 1 });
        verify123(table);
        rowIncParam[0]++;
        colIncParam[0]++;
        verify123(table);
        // try increment and change returned values: should not affect the stored data
        rowIncParam = new byte[] { 1 };
        colIncParam = new byte[] { 2 };
        table.increment(rowIncParam, colIncParam, -1);
        Row countersRow = table.incrementAndGet(rowIncParam, new byte[][] { colIncParam }, new long[] { 1 });
        Map<byte[], byte[]> counters = countersRow.getColumns();
        Assert.assertEquals(1, counters.size());
        byte[] colFromInc = counters.keySet().iterator().next();
        Assert.assertArrayEquals(new byte[] { 2 }, colFromInc);
        Assert.assertEquals(3, Bytes.toLong(counters.get(colFromInc)));
        counters.remove(new byte[] { 2 });
        colFromInc[0]++;
        verify123(table);
        // try increment write and change params in place: this should not affect stored data
        rowIncParam = new byte[] { 1 };
        colIncParam = new byte[] { 2 };
        table.increment(rowIncParam, colIncParam, -1);
        table.increment(rowIncParam, new byte[][] { colIncParam }, new long[] { 1 });
        verify123(table);
        rowIncParam[0]++;
        colIncParam[0]++;
        verify123(table);
        // try compareAndSwap and change params in place: this should not affect stored data
        byte[] rowSwapParam = new byte[] { 1 };
        byte[] colSwapParam = new byte[] { 2 };
        byte[] valSwapParam = Bytes.toBytes(3L);
        table.compareAndSwap(rowSwapParam, colSwapParam, Bytes.toBytes(3L), Bytes.toBytes(4L));
        table.compareAndSwap(rowSwapParam, colSwapParam, Bytes.toBytes(4L), valSwapParam);
        verify123(table);
        rowSwapParam[0]++;
        colSwapParam[0]++;
        valSwapParam[0]++;
        verify123(table);
    // We don't care to persist changes and commit tx here: we tested what we wanted
    } finally {
        admin.drop();
    }
}
Also used : Scanner(io.cdap.cdap.api.dataset.table.Scanner) Transaction(org.apache.tephra.Transaction) DatasetProperties(io.cdap.cdap.api.dataset.DatasetProperties) DatasetAdmin(io.cdap.cdap.api.dataset.DatasetAdmin) Row(io.cdap.cdap.api.dataset.table.Row) Test(org.junit.Test)

Example 92 with Row

use of io.cdap.cdap.api.dataset.table.Row in project cdap by caskdata.

the class IndexedTable method delete.

@WriteOnly
@Override
public void delete(byte[] row) {
    Row existingRow = table.get(row);
    if (existingRow.isEmpty()) {
        // no row to delete
        return;
    }
    // delete all index entries
    deleteIndexEntries(existingRow);
    // delete the row
    table.delete(row);
}
Also used : Row(io.cdap.cdap.api.dataset.table.Row) WriteOnly(io.cdap.cdap.api.annotation.WriteOnly)

Example 93 with Row

use of io.cdap.cdap.api.dataset.table.Row in project cdap by caskdata.

the class IndexedTable method put.

/**
 * Writes a put to the data table. If any of the columns in the {@link Put} are configured to be indexed, the
 * appropriate indexes will be updated with the indexed values referencing the data table row.
 *
 * @param put The put operation to store
 */
@WriteOnly
@Override
public void put(Put put) {
    // if different value exists, remove current index ref
    // add a new index ref unless same value already exists
    byte[] dataRow = put.getRow();
    // find which values need to be indexed
    Map<byte[], byte[]> putColumns = put.getValues();
    Set<byte[]> colsToIndex = new TreeSet<>(Bytes.BYTES_COMPARATOR);
    for (Map.Entry<byte[], byte[]> putEntry : putColumns.entrySet()) {
        if (indexedColumns.contains(putEntry.getKey())) {
            colsToIndex.add(putEntry.getKey());
        }
    }
    if (!colsToIndex.isEmpty()) {
        // first read the existing indexed values to find which have changed and need to be updated
        Row existingRow = table.get(dataRow, colsToIndex.toArray(new byte[colsToIndex.size()][]));
        for (Map.Entry<byte[], byte[]> entry : existingRow.getColumns().entrySet()) {
            if (!Arrays.equals(entry.getValue(), putColumns.get(entry.getKey()))) {
                index.delete(createIndexKey(dataRow, entry.getKey(), entry.getValue()), IDX_COL);
            } else {
                // value already indexed
                colsToIndex.remove(entry.getKey());
            }
        }
        // add new index entries for all values that have changed or did not exist
        for (byte[] col : colsToIndex) {
            index.put(createIndexKey(dataRow, col, putColumns.get(col)), IDX_COL, dataRow);
        }
    }
    // store the data row
    table.put(put);
}
Also used : TreeSet(java.util.TreeSet) Row(io.cdap.cdap.api.dataset.table.Row) Map(java.util.Map) NavigableMap(java.util.NavigableMap) TreeMap(java.util.TreeMap) WriteOnly(io.cdap.cdap.api.annotation.WriteOnly)

Example 94 with Row

use of io.cdap.cdap.api.dataset.table.Row in project cdap by caskdata.

the class LookupTransform method transform.

@Override
public void transform(StructuredRecord input, Emitter<StructuredRecord> emitter) throws Exception {
    T lookedUpValue = lookup.lookup((String) input.get(config.lookupKey));
    // for the output schema, copy all the input fields, and add the 'destinationField'
    List<Schema.Field> outFields = new ArrayList<>();
    for (Schema.Field field : input.getSchema().getFields()) {
        outFields.add(field);
    }
    if (lookedUpValue instanceof String) {
        outFields.add(Schema.Field.of(config.destinationField, Schema.of(Schema.Type.STRING)));
    } else if (lookedUpValue instanceof Row) {
        Row lookedupRow = (Row) lookedUpValue;
        for (byte[] column : lookedupRow.getColumns().keySet()) {
            outFields.add(Schema.Field.of(Bytes.toString(column), Schema.of(Schema.Type.STRING)));
        }
    } else {
        throw new IllegalArgumentException("Unexpected value type: " + lookedUpValue.getClass());
    }
    Schema outSchema = Schema.recordOf(input.getSchema().getRecordName(), outFields);
    // copy all the values
    StructuredRecord.Builder outputBuilder = StructuredRecord.builder(outSchema);
    for (Schema.Field inField : input.getSchema().getFields()) {
        if (inField.getName().equals(config.lookupKey)) {
            if (lookedUpValue instanceof String) {
                outputBuilder.set(config.destinationField, lookedUpValue);
            } else {
                // due to the check above, we know its a Row
                Row lookedupRow = (Row) lookedUpValue;
                for (Map.Entry<byte[], byte[]> entry : lookedupRow.getColumns().entrySet()) {
                    outputBuilder.set(Bytes.toString(entry.getKey()), Bytes.toString(entry.getValue()));
                }
            }
        }
        // what if the destinationField already exists?
        outputBuilder.set(inField.getName(), input.get(inField.getName()));
    }
    emitter.emit(outputBuilder.build());
}
Also used : Schema(io.cdap.cdap.api.data.schema.Schema) ArrayList(java.util.ArrayList) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) PluginPropertyField(io.cdap.cdap.api.plugin.PluginPropertyField) Row(io.cdap.cdap.api.dataset.table.Row) HashMap(java.util.HashMap) Map(java.util.Map)

Example 95 with Row

use of io.cdap.cdap.api.dataset.table.Row in project cdap by caskdata.

the class BufferingTable method get.

@ReadOnly
@Override
public List<Row> get(List<Get> gets) {
    ensureTransactionIsStarted();
    try {
        // get persisted, then overwrite with whats buffered
        List<Map<byte[], byte[]>> persistedRows = getPersisted(gets);
        // gets and rows lists are always of the same size
        Preconditions.checkArgument(gets.size() == persistedRows.size(), "Invalid number of rows fetched when performing multi-get. There must be one row for each get.");
        List<Row> result = Lists.newArrayListWithCapacity(persistedRows.size());
        Iterator<Map<byte[], byte[]>> persistedRowsIter = persistedRows.iterator();
        Iterator<Get> getIter = gets.iterator();
        while (persistedRowsIter.hasNext() && getIter.hasNext()) {
            Get get = getIter.next();
            Map<byte[], byte[]> persistedRow = persistedRowsIter.next();
            // navigable copy of the persisted data. Implementation may return immutable or unmodifiable maps,
            // so we make a copy here.
            NavigableMap<byte[], byte[]> rowColumns = Maps.newTreeMap(Bytes.BYTES_COMPARATOR);
            rowColumns.putAll(persistedRow);
            byte[] row = get.getRow();
            NavigableMap<byte[], Update> buffCols = buff.get(row);
            // merge what was in the buffer and what was persisted
            if (buffCols != null) {
                List<byte[]> getColumns = get.getColumns();
                byte[][] columns = getColumns == null ? null : getColumns.toArray(new byte[getColumns.size()][]);
                mergeToPersisted(rowColumns, buffCols, columns);
            }
            result.add(new Result(row, unwrapDeletes(rowColumns)));
        }
        return result;
    } catch (Exception e) {
        LOG.debug("multi-get failed for table: " + getTransactionAwareName(), e);
        throw new DataSetException("multi-get failed", e);
    }
}
Also used : DataSetException(io.cdap.cdap.api.dataset.DataSetException) IOException(java.io.IOException) Result(io.cdap.cdap.api.dataset.table.Result) DataSetException(io.cdap.cdap.api.dataset.DataSetException) Get(io.cdap.cdap.api.dataset.table.Get) Row(io.cdap.cdap.api.dataset.table.Row) Map(java.util.Map) NavigableMap(java.util.NavigableMap) ConcurrentSkipListMap(java.util.concurrent.ConcurrentSkipListMap) ReadOnly(io.cdap.cdap.api.annotation.ReadOnly)

Aggregations

Row (io.cdap.cdap.api.dataset.table.Row)166 Scanner (io.cdap.cdap.api.dataset.table.Scanner)81 Test (org.junit.Test)50 Table (io.cdap.cdap.api.dataset.table.Table)34 Put (io.cdap.cdap.api.dataset.table.Put)29 ArrayList (java.util.ArrayList)26 TransactionExecutor (org.apache.tephra.TransactionExecutor)26 Get (io.cdap.cdap.api.dataset.table.Get)24 Schema (io.cdap.cdap.api.data.schema.Schema)21 HashMap (java.util.HashMap)19 MDSKey (io.cdap.cdap.data2.dataset2.lib.table.MDSKey)16 Transaction (org.apache.tephra.Transaction)16 TransactionAware (org.apache.tephra.TransactionAware)16 IOException (java.io.IOException)14 Map (java.util.Map)14 StructuredRecord (io.cdap.cdap.api.data.format.StructuredRecord)13 DatasetAdmin (io.cdap.cdap.api.dataset.DatasetAdmin)12 WriteOnly (io.cdap.cdap.api.annotation.WriteOnly)10 DimensionValue (io.cdap.cdap.api.dataset.lib.cube.DimensionValue)10 HBaseTable (io.cdap.cdap.data2.dataset2.lib.table.hbase.HBaseTable)10