Search in sources :

Example 6 with Row

use of co.cask.cdap.api.dataset.table.Row in project cdap by caskdata.

the class LevelDBQueueEvictor method doEvict.

private synchronized int doEvict(Transaction transaction) throws IOException {
    final byte[] stopRow = QueueEntryRow.getStopRowForTransaction(queueRowPrefix, transaction);
    Row row;
    List<byte[]> rowsToDelete = Lists.newArrayList();
    // the scan must be non-transactional in order to see the state columns (which have latest timestamp)
    try (Scanner scanner = core.scan(queueRowPrefix, stopRow, null, null, Transaction.ALL_VISIBLE_LATEST)) {
        while ((row = scanner.next()) != null) {
            int processed = 0;
            for (Map.Entry<byte[], byte[]> entry : row.getColumns().entrySet()) {
                // is it a state column for a consumer instance?
                if (!QueueEntryRow.isStateColumn(entry.getKey())) {
                    continue;
                }
                // is the write pointer of this state committed w.r.t. the current transaction, and is it processed?
                if (QueueEntryRow.isCommittedProcessed(entry.getValue(), transaction)) {
                    ++processed;
                }
            }
            if (processed >= numGroups) {
                rowsToDelete.add(row.getRow());
            }
        }
    }
    if (!rowsToDelete.isEmpty()) {
        core.deleteRows(rowsToDelete);
        LOG.trace("Evicted {} entries from queue {}", rowsToDelete.size(), name);
    } else {
        LOG.trace("Nothing to evict from queue {}", name);
    }
    return rowsToDelete.size();
}
Also used : Scanner(co.cask.cdap.api.dataset.table.Scanner) QueueEntryRow(co.cask.cdap.data2.transaction.queue.QueueEntryRow) Row(co.cask.cdap.api.dataset.table.Row) Map(java.util.Map)

Example 7 with Row

use of co.cask.cdap.api.dataset.table.Row in project cdap by caskdata.

the class DatasetBasedStreamSizeScheduleStoreTest method testDeletion.

private void testDeletion(final ProgramId programId) throws Exception {
    final boolean defaultVersion = programId.getVersion().equals(ApplicationId.DEFAULT_VERSION);
    DatasetId storeTable = NamespaceId.SYSTEM.dataset(ScheduleStoreTableUtil.SCHEDULE_STORE_DATASET_NAME);
    final Table table = datasetFramework.getDataset(storeTable, ImmutableMap.<String, String>of(), null);
    Assert.assertNotNull(table);
    TransactionExecutor txnl = txExecutorFactory.createExecutor(ImmutableList.of((TransactionAware) table));
    final byte[] startKey = Bytes.toBytes(DatasetBasedStreamSizeScheduleStore.KEY_PREFIX);
    final byte[] stopKey = Bytes.stopKeyForPrefix(startKey);
    txnl.execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            Scanner scanner = table.scan(startKey, stopKey);
            Assert.assertNull(scanner.next());
            scanner.close();
        }
    });
    // Create one stream schedule - this will be persisted with new format
    scheduleStore.persist(programId, PROGRAM_TYPE, STREAM_SCHEDULE_1, MAP_1, 0L, 0L, 0L, 0L, true);
    // Create one stream schedule - based on the old format
    txnl.execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            // Create a programId without version so that we can create a old format schedule
            ProgramId defaultProgramId = new ProgramId(programId.getNamespace(), programId.getApplication(), programId.getType(), programId.getProgram());
            String newRowKey = scheduleStore.getRowKey(defaultProgramId, PROGRAM_TYPE, STREAM_SCHEDULE_1.getName());
            Row row = table.get(Bytes.toBytes(scheduleStore.getRowKey(programId, PROGRAM_TYPE, STREAM_SCHEDULE_1.getName())));
            Assert.assertFalse(row.isEmpty());
            byte[] oldRowKey = Bytes.toBytes(scheduleStore.removeAppVersion(newRowKey));
            for (Map.Entry<byte[], byte[]> entry : row.getColumns().entrySet()) {
                table.put(oldRowKey, entry.getKey(), entry.getValue());
            }
        }
    });
    // Make sure there are only two stream size schedules
    txnl.execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            Scanner scanner = table.scan(startKey, stopKey);
            int numRows = 0;
            while (true) {
                Row row = scanner.next();
                if (row == null) {
                    break;
                }
                numRows++;
            }
            scanner.close();
            Assert.assertEquals(2, numRows);
        }
    });
    // This delete should have deleted both the old and new row format
    scheduleStore.delete(programId, PROGRAM_TYPE, STREAM_SCHEDULE_1.getName());
    txnl.execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            Scanner scanner = table.scan(startKey, stopKey);
            if (defaultVersion) {
                Assert.assertNull(scanner.next());
            } else {
                Assert.assertNotNull(scanner.next());
                Assert.assertNull(scanner.next());
            }
            scanner.close();
        }
    });
    // If the version is not default, we need to delete the row which didn't have a version
    if (!defaultVersion) {
        txnl.execute(new TransactionExecutor.Subroutine() {

            @Override
            public void apply() throws Exception {
                // Create a programId without version so that we can create row key to delete the old format schedule
                ProgramId defaultProgramId = new ProgramId(programId.getNamespace(), programId.getApplication(), programId.getType(), programId.getProgram());
                String newRowKey = scheduleStore.getRowKey(defaultProgramId, PROGRAM_TYPE, STREAM_SCHEDULE_1.getName());
                byte[] oldRowKey = Bytes.toBytes(scheduleStore.removeAppVersion(newRowKey));
                Row row = table.get(oldRowKey);
                Assert.assertFalse(row.isEmpty());
                table.delete(oldRowKey);
            }
        });
    }
}
Also used : Scanner(co.cask.cdap.api.dataset.table.Scanner) Table(co.cask.cdap.api.dataset.table.Table) TransactionExecutor(org.apache.tephra.TransactionExecutor) ProgramId(co.cask.cdap.proto.id.ProgramId) DatasetId(co.cask.cdap.proto.id.DatasetId) TransactionAware(org.apache.tephra.TransactionAware) Row(co.cask.cdap.api.dataset.table.Row)

Example 8 with Row

use of co.cask.cdap.api.dataset.table.Row in project cdap by caskdata.

the class LookupTransform method transform.

@Override
public void transform(StructuredRecord input, Emitter<StructuredRecord> emitter) throws Exception {
    T lookedUpValue = lookup.lookup((String) input.get(config.lookupKey));
    // for the output schema, copy all the input fields, and add the 'destinationField'
    List<Schema.Field> outFields = new ArrayList<>();
    for (Schema.Field field : input.getSchema().getFields()) {
        outFields.add(field);
    }
    if (lookedUpValue instanceof String) {
        outFields.add(Schema.Field.of(config.destinationField, Schema.of(Schema.Type.STRING)));
    } else if (lookedUpValue instanceof Row) {
        Row lookedupRow = (Row) lookedUpValue;
        for (byte[] column : lookedupRow.getColumns().keySet()) {
            outFields.add(Schema.Field.of(Bytes.toString(column), Schema.of(Schema.Type.STRING)));
        }
    } else {
        throw new IllegalArgumentException("Unexpected value type: " + lookedUpValue.getClass());
    }
    Schema outSchema = Schema.recordOf(input.getSchema().getRecordName(), outFields);
    // copy all the values
    StructuredRecord.Builder outputBuilder = StructuredRecord.builder(outSchema);
    for (Schema.Field inField : input.getSchema().getFields()) {
        if (inField.getName().equals(config.lookupKey)) {
            if (lookedUpValue instanceof String) {
                outputBuilder.set(config.destinationField, lookedUpValue);
            } else {
                // due to the check above, we know its a Row
                Row lookedupRow = (Row) lookedUpValue;
                for (Map.Entry<byte[], byte[]> entry : lookedupRow.getColumns().entrySet()) {
                    outputBuilder.set(Bytes.toString(entry.getKey()), Bytes.toString(entry.getValue()));
                }
            }
        }
        // what if the destinationField already exists?
        outputBuilder.set(inField.getName(), input.get(inField.getName()));
    }
    emitter.emit(outputBuilder.build());
}
Also used : Schema(co.cask.cdap.api.data.schema.Schema) ArrayList(java.util.ArrayList) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord) PluginPropertyField(co.cask.cdap.api.plugin.PluginPropertyField) Row(co.cask.cdap.api.dataset.table.Row) HashMap(java.util.HashMap) Map(java.util.Map)

Example 9 with Row

use of co.cask.cdap.api.dataset.table.Row in project cdap by caskdata.

the class MockRuntimeDatasetSink method readOutput.

/**
   * Used to read the records written by this sink.
   *
   * @param tableManager dataset manager used to get the sink dataset to read from
   */
public static List<StructuredRecord> readOutput(DataSetManager<Table> tableManager) throws Exception {
    Table table = tableManager.get();
    try (Scanner scanner = table.scan(null, null)) {
        List<StructuredRecord> records = new ArrayList<>();
        Row row;
        while ((row = scanner.next()) != null) {
            Schema schema = Schema.parseJson(row.getString(SCHEMA_COL));
            String recordStr = row.getString(RECORD_COL);
            records.add(StructuredRecordStringConverter.fromJsonString(recordStr, schema));
        }
        return records;
    }
}
Also used : Scanner(co.cask.cdap.api.dataset.table.Scanner) Table(co.cask.cdap.api.dataset.table.Table) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) Schema(co.cask.cdap.api.data.schema.Schema) ArrayList(java.util.ArrayList) Row(co.cask.cdap.api.dataset.table.Row) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord)

Example 10 with Row

use of co.cask.cdap.api.dataset.table.Row in project cdap by caskdata.

the class MockSink method readOutput.

/**
   * Used to read the records written by this sink.
   *
   * @param tableManager dataset manager used to get the sink dataset to read from
   */
public static List<StructuredRecord> readOutput(DataSetManager<Table> tableManager) throws Exception {
    tableManager.flush();
    Table table = tableManager.get();
    try (Scanner scanner = table.scan(null, null)) {
        List<StructuredRecord> records = new ArrayList<>();
        Row row;
        while ((row = scanner.next()) != null) {
            Schema schema = Schema.parseJson(row.getString(SCHEMA_COL));
            String recordStr = row.getString(RECORD_COL);
            records.add(StructuredRecordStringConverter.fromJsonString(recordStr, schema));
        }
        return records;
    }
}
Also used : Scanner(co.cask.cdap.api.dataset.table.Scanner) Table(co.cask.cdap.api.dataset.table.Table) Schema(co.cask.cdap.api.data.schema.Schema) ArrayList(java.util.ArrayList) Row(co.cask.cdap.api.dataset.table.Row) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord)

Aggregations

Row (co.cask.cdap.api.dataset.table.Row)111 Scanner (co.cask.cdap.api.dataset.table.Scanner)60 Test (org.junit.Test)23 Table (co.cask.cdap.api.dataset.table.Table)20 Get (co.cask.cdap.api.dataset.table.Get)16 ArrayList (java.util.ArrayList)16 TransactionExecutor (org.apache.tephra.TransactionExecutor)16 Map (java.util.Map)15 Put (co.cask.cdap.api.dataset.table.Put)14 HashMap (java.util.HashMap)10 Scan (co.cask.cdap.api.dataset.table.Scan)9 TransactionAware (org.apache.tephra.TransactionAware)9 MDSKey (co.cask.cdap.data2.dataset2.lib.table.MDSKey)8 QueueEntryRow (co.cask.cdap.data2.transaction.queue.QueueEntryRow)8 DatasetId (co.cask.cdap.proto.id.DatasetId)8 IOException (java.io.IOException)8 ImmutableMap (com.google.common.collect.ImmutableMap)7 Transaction (org.apache.tephra.Transaction)7 WriteOnly (co.cask.cdap.api.annotation.WriteOnly)6 Schema (co.cask.cdap.api.data.schema.Schema)6