Search in sources :

Example 21 with Row

use of io.cdap.cdap.api.dataset.table.Row in project cdap by caskdata.

the class DefaultPreviewStore method getAllInWaitingState.

@Override
public List<PreviewRequest> getAllInWaitingState() {
    // PreviewStore is a singleton and we have to create gson for each operation since gson is not thread safe.
    Gson gson = new GsonBuilder().registerTypeAdapter(Schema.class, new SchemaTypeAdapter()).create();
    byte[] startRowKey = new MDSKey.Builder().add(WAITING).build().getKey();
    byte[] stopRowKey = new MDSKey(Bytes.stopKeyForPrefix(startRowKey)).getKey();
    List<PreviewRequest> result = new ArrayList<>();
    try (Scanner scanner = previewQueueTable.scan(startRowKey, stopRowKey, null, null, null)) {
        Row indexRow;
        while ((indexRow = scanner.next()) != null) {
            Map<byte[], byte[]> columns = indexRow.getColumns();
            AppRequest request = gson.fromJson(Bytes.toString(columns.get(CONFIG)), AppRequest.class);
            ApplicationId applicationId = gson.fromJson(Bytes.toString(columns.get(APPID)), ApplicationId.class);
            Principal principal = gson.fromJson(Bytes.toString(columns.get(PRINCIPAL)), Principal.class);
            result.add(new PreviewRequest(applicationId, request, principal));
        }
    } catch (IOException e) {
        throw new RuntimeException("Error while listing the waiting preview requests.", e);
    }
    return result;
}
Also used : Scanner(io.cdap.cdap.api.dataset.table.Scanner) GsonBuilder(com.google.gson.GsonBuilder) Schema(io.cdap.cdap.api.data.schema.Schema) ArrayList(java.util.ArrayList) Gson(com.google.gson.Gson) MDSKey(io.cdap.cdap.data2.dataset2.lib.table.MDSKey) IOException(java.io.IOException) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) SchemaTypeAdapter(io.cdap.cdap.internal.io.SchemaTypeAdapter) Row(io.cdap.cdap.api.dataset.table.Row) PreviewRequest(io.cdap.cdap.app.preview.PreviewRequest) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) Principal(io.cdap.cdap.proto.security.Principal)

Example 22 with Row

use of io.cdap.cdap.api.dataset.table.Row in project cdap by caskdata.

the class MapReduceWithPartitionedTest method testPartitionedFileSetWithMR.

private void testPartitionedFileSetWithMR(boolean useCombineFileInputFormat) throws Exception {
    ApplicationWithPrograms app = deployApp(AppWithPartitionedFileSet.class, new AppWithPartitionedFileSet.AppConfig(useCombineFileInputFormat));
    // write a value to the input table
    final Table table = datasetCache.getDataset(AppWithPartitionedFileSet.INPUT);
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) table).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            table.put(Bytes.toBytes("x"), AppWithPartitionedFileSet.ONLY_COLUMN, Bytes.toBytes("1"));
        }
    });
    // a partition key for the map/reduce output
    final PartitionKey keyX = PartitionKey.builder().addStringField("type", "x").addLongField("time", 150000L).build();
    // run the partition writer m/r with this output partition time
    Map<String, String> runtimeArguments = Maps.newHashMap();
    Map<String, String> outputArgs = Maps.newHashMap();
    PartitionedFileSetArguments.setOutputPartitionKey(outputArgs, keyX);
    runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, PARTITIONED, outputArgs));
    Assert.assertTrue(runProgram(app, AppWithPartitionedFileSet.PartitionWriter.class, new BasicArguments(runtimeArguments)));
    // this should have created a partition in the tpfs
    final PartitionedFileSet dataset = datasetCache.getDataset(PARTITIONED);
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) dataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            Partition partition = dataset.getPartition(keyX);
            Assert.assertNotNull(partition);
            String path = partition.getRelativePath();
            Assert.assertTrue(path.contains("x"));
            Assert.assertTrue(path.contains("150000"));
        }
    });
    // delete the data in the input table and write a new row
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) table).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            table.delete(Bytes.toBytes("x"));
            table.put(Bytes.toBytes("y"), AppWithPartitionedFileSet.ONLY_COLUMN, Bytes.toBytes("2"));
        }
    });
    // a new partition key for the next map/reduce
    final PartitionKey keyY = PartitionKey.builder().addStringField("type", "y").addLongField("time", 200000L).build();
    // now run the m/r again with a new partition time, say 5 minutes later
    PartitionedFileSetArguments.setOutputPartitionKey(outputArgs, keyY);
    runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, PARTITIONED, outputArgs));
    Assert.assertTrue(runProgram(app, AppWithPartitionedFileSet.PartitionWriter.class, new BasicArguments(runtimeArguments)));
    // this should have created a partition in the tpfs
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) dataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            Partition partition = dataset.getPartition(keyY);
            Assert.assertNotNull(partition);
            String path = partition.getRelativePath();
            Assert.assertNotNull(path);
            Assert.assertTrue(path.contains("y"));
            Assert.assertTrue(path.contains("200000"));
        }
    });
    // a partition filter that matches the outputs of both map/reduces
    PartitionFilter filterXY = PartitionFilter.builder().addRangeCondition("type", "x", "z").build();
    // now run a map/reduce that reads all the partitions
    runtimeArguments = Maps.newHashMap();
    Map<String, String> inputArgs = Maps.newHashMap();
    PartitionedFileSetArguments.setInputPartitionFilter(inputArgs, filterXY);
    runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, PARTITIONED, inputArgs));
    runtimeArguments.put(AppWithPartitionedFileSet.ROW_TO_WRITE, "a");
    Assert.assertTrue(runProgram(app, AppWithPartitionedFileSet.PartitionReader.class, new BasicArguments(runtimeArguments)));
    // this should have read both partitions - and written both x and y to row a
    final Table output = datasetCache.getDataset(AppWithPartitionedFileSet.OUTPUT);
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) output).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            Row row = output.get(Bytes.toBytes("a"));
            Assert.assertEquals("1", row.getString("x"));
            Assert.assertEquals("{type=x, time=150000}", row.getString("x_key"));
            Assert.assertEquals("2", row.getString("y"));
            Assert.assertEquals("{type=y, time=200000}", row.getString("y_key"));
        }
    });
    // a partition filter that matches the output key of the first map/reduce
    PartitionFilter filterX = PartitionFilter.builder().addValueCondition("type", "x").addRangeCondition("time", null, 160000L).build();
    // now run a map/reduce that reads a range of the partitions, namely the first one
    inputArgs.clear();
    PartitionedFileSetArguments.setInputPartitionFilter(inputArgs, filterX);
    runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, PARTITIONED, inputArgs));
    runtimeArguments.put(AppWithPartitionedFileSet.ROW_TO_WRITE, "b");
    Assert.assertTrue(runProgram(app, AppWithPartitionedFileSet.PartitionReader.class, new BasicArguments(runtimeArguments)));
    // this should have read the first partition only - and written only x to row b
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) output).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            Row row = output.get(Bytes.toBytes("b"));
            Assert.assertEquals("1", row.getString("x"));
            Assert.assertEquals("{type=x, time=150000}", row.getString("x_key"));
            Assert.assertNull(row.get("y"));
            Assert.assertNull(row.get("y_key"));
        }
    });
    // a partition filter that matches no key
    PartitionFilter filterMT = PartitionFilter.builder().addValueCondition("type", "nosuchthing").build();
    // now run a map/reduce that reads an empty range of partitions (the filter matches nothing)
    inputArgs.clear();
    PartitionedFileSetArguments.setInputPartitionFilter(inputArgs, filterMT);
    runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, PARTITIONED, inputArgs));
    runtimeArguments.put(AppWithPartitionedFileSet.ROW_TO_WRITE, "n");
    Assert.assertTrue(runProgram(app, AppWithPartitionedFileSet.PartitionReader.class, new BasicArguments(runtimeArguments)));
    // this should have read no partitions - and written nothing to row n
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) output).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            Row row = output.get(Bytes.toBytes("n"));
            Assert.assertTrue(row.isEmpty());
        }
    });
}
Also used : Partition(io.cdap.cdap.api.dataset.lib.Partition) Table(io.cdap.cdap.api.dataset.table.Table) TransactionExecutor(org.apache.tephra.TransactionExecutor) TimePartitionedFileSet(io.cdap.cdap.api.dataset.lib.TimePartitionedFileSet) PartitionedFileSet(io.cdap.cdap.api.dataset.lib.PartitionedFileSet) PartitionFilter(io.cdap.cdap.api.dataset.lib.PartitionFilter) ApplicationWithPrograms(io.cdap.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms) TransactionAware(org.apache.tephra.TransactionAware) PartitionKey(io.cdap.cdap.api.dataset.lib.PartitionKey) BasicArguments(io.cdap.cdap.internal.app.runtime.BasicArguments) Row(io.cdap.cdap.api.dataset.table.Row)

Example 23 with Row

use of io.cdap.cdap.api.dataset.table.Row in project cdap by caskdata.

the class ReflectionTableTest method assertGetAndPut.

private void assertGetAndPut(final Table table, final byte[] rowKey, final User obj, final Schema schema) throws Exception {
    // TableDataset is not accessible here, but we know that's the underlying implementation...
    TransactionExecutor tx = dsFrameworkUtil.newTransactionExecutor((TransactionAware) table);
    tx.execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            Put put = new Put(rowKey);
            ReflectionPutWriter<User> putWriter = new ReflectionPutWriter<>(schema);
            putWriter.write(obj, put);
            table.put(put);
            Row row = table.get(rowKey);
            ReflectionRowReader<User> rowReader = new ReflectionRowReader<>(schema, TypeToken.of(User.class));
            User actual = rowReader.read(row, schema);
            Assert.assertEquals(obj, actual);
        }
    });
}
Also used : ReflectionRowReader(io.cdap.cdap.internal.io.ReflectionRowReader) ReflectionPutWriter(io.cdap.cdap.internal.io.ReflectionPutWriter) TransactionExecutor(org.apache.tephra.TransactionExecutor) Row(io.cdap.cdap.api.dataset.table.Row) Put(io.cdap.cdap.api.dataset.table.Put)

Example 24 with Row

use of io.cdap.cdap.api.dataset.table.Row in project cdap by caskdata.

the class ReflectionTableTest method testStructuredRecordProjection.

@Test
public void testStructuredRecordProjection() throws Exception {
    dsFrameworkUtil.createInstance("table", users, DatasetProperties.builder().build());
    try {
        final Table usersTable = dsFrameworkUtil.getInstance(users);
        final byte[] rowKey = Bytes.toBytes(123);
        final User2 projected = new User2("Samuel L.", 123L, ((Float) 50000000.02f).doubleValue(), Double.MAX_VALUE, ByteBuffer.wrap(new byte[] { 0, 1, 2 }));
        final Schema fullSchema = new ReflectionSchemaGenerator().generate(User.class);
        final Schema projSchema = new ReflectionSchemaGenerator().generate(User2.class);
        // TableDataset is not accessible here, but we know that's the underlying implementation...
        TransactionExecutor tx = dsFrameworkUtil.newTransactionExecutor((TransactionAware) usersTable);
        tx.execute(new TransactionExecutor.Subroutine() {

            @Override
            public void apply() throws Exception {
                Put put = new Put(rowKey);
                ReflectionPutWriter<User> putWriter = new ReflectionPutWriter<>(fullSchema);
                putWriter.write(SAMUEL, put);
                usersTable.put(put);
                Row row = usersTable.get(rowKey);
                ReflectionRowRecordReader rowReader = new ReflectionRowRecordReader(projSchema, null);
                StructuredRecord actual = rowReader.read(row, fullSchema);
                assertRecordEqualsUser(projected, actual);
            }
        });
    } finally {
        dsFrameworkUtil.deleteInstance(users);
    }
}
Also used : Table(io.cdap.cdap.api.dataset.table.Table) Schema(io.cdap.cdap.api.data.schema.Schema) TransactionExecutor(org.apache.tephra.TransactionExecutor) ReflectionSchemaGenerator(io.cdap.cdap.internal.io.ReflectionSchemaGenerator) Put(io.cdap.cdap.api.dataset.table.Put) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) ReflectionPutWriter(io.cdap.cdap.internal.io.ReflectionPutWriter) Row(io.cdap.cdap.api.dataset.table.Row) ReflectionRowRecordReader(io.cdap.cdap.internal.io.ReflectionRowRecordReader) Test(org.junit.Test)

Example 25 with Row

use of io.cdap.cdap.api.dataset.table.Row in project cdap by caskdata.

the class PartitionedFileSetDataset method removeMetadata.

@Override
public void removeMetadata(PartitionKey key, Set<String> metadataKeys) {
    final byte[] rowKey = generateRowKey(key, partitioning);
    Row row = partitionsTable.get(rowKey);
    if (row.isEmpty()) {
        throw new PartitionNotFoundException(key, getName());
    }
    int i = 0;
    byte[][] deleteColumns = new byte[metadataKeys.size()][];
    for (String metadataKey : metadataKeys) {
        deleteColumns[i++] = columnKeyFromMetadataKey(metadataKey);
    }
    partitionsTable.delete(rowKey, deleteColumns);
}
Also used : PartitionNotFoundException(io.cdap.cdap.api.dataset.PartitionNotFoundException) Row(io.cdap.cdap.api.dataset.table.Row)

Aggregations

Row (io.cdap.cdap.api.dataset.table.Row)166 Scanner (io.cdap.cdap.api.dataset.table.Scanner)81 Test (org.junit.Test)50 Table (io.cdap.cdap.api.dataset.table.Table)34 Put (io.cdap.cdap.api.dataset.table.Put)29 ArrayList (java.util.ArrayList)26 TransactionExecutor (org.apache.tephra.TransactionExecutor)26 Get (io.cdap.cdap.api.dataset.table.Get)24 Schema (io.cdap.cdap.api.data.schema.Schema)21 HashMap (java.util.HashMap)19 MDSKey (io.cdap.cdap.data2.dataset2.lib.table.MDSKey)16 Transaction (org.apache.tephra.Transaction)16 TransactionAware (org.apache.tephra.TransactionAware)16 IOException (java.io.IOException)14 Map (java.util.Map)14 StructuredRecord (io.cdap.cdap.api.data.format.StructuredRecord)13 DatasetAdmin (io.cdap.cdap.api.dataset.DatasetAdmin)12 WriteOnly (io.cdap.cdap.api.annotation.WriteOnly)10 DimensionValue (io.cdap.cdap.api.dataset.lib.cube.DimensionValue)10 HBaseTable (io.cdap.cdap.data2.dataset2.lib.table.hbase.HBaseTable)10