Search in sources :

Example 11 with PartitionFilter

use of io.cdap.cdap.api.dataset.lib.PartitionFilter in project cdap by cdapio.

the class PartitionedFileSetArgumentsTest method testSetGetInputPartitionFilter.

@Test
public void testSetGetInputPartitionFilter() throws Exception {
    Map<String, String> arguments = new HashMap<>();
    PartitionFilter filter = PartitionFilter.builder().addRangeCondition("i", 30, 40).addValueCondition("l", 17L).addValueCondition("s", "x").build();
    PartitionedFileSetArguments.setInputPartitionFilter(arguments, filter);
    Assert.assertEquals(filter, PartitionedFileSetArguments.getInputPartitionFilter(arguments));
    arguments = new HashMap<>();
    filter = PartitionFilter.builder().addRangeCondition("i", 30, 40).addValueCondition("s", "x").build();
    PartitionedFileSetArguments.setInputPartitionFilter(arguments, filter);
    Assert.assertEquals(filter, PartitionedFileSetArguments.getInputPartitionFilter(arguments));
    arguments = new HashMap<>();
    filter = PartitionFilter.ALWAYS_MATCH;
    PartitionedFileSetArguments.setInputPartitionFilter(arguments, filter);
    Assert.assertEquals(filter, PartitionedFileSetArguments.getInputPartitionFilter(arguments));
}
Also used : PartitionFilter(io.cdap.cdap.api.dataset.lib.PartitionFilter) HashMap(java.util.HashMap) Test(org.junit.Test)

Example 12 with PartitionFilter

use of io.cdap.cdap.api.dataset.lib.PartitionFilter in project cdap by cdapio.

the class ConditionCodecTest method testSerDe.

@Test
public void testSerDe() {
    PartitionFilter filter = PartitionFilter.builder().addValueCondition("i", 42).addValueCondition("l", 17L).addValueCondition("s", "x").build();
    testSerDe(filter);
    filter = PartitionFilter.builder().addRangeCondition("i", 30, 40).addValueCondition("l", 17L).addValueCondition("s", "x").build();
    testSerDe(filter);
    filter = PartitionFilter.builder().addRangeCondition("i", 30, 40).addValueCondition("s", "x").build();
    testSerDe(filter);
    testSerDe(PartitionFilter.ALWAYS_MATCH);
}
Also used : PartitionFilter(io.cdap.cdap.api.dataset.lib.PartitionFilter) Test(org.junit.Test)

Example 13 with PartitionFilter

use of io.cdap.cdap.api.dataset.lib.PartitionFilter in project cdap by cdapio.

the class MapReduceWithPartitionedTest method testPartitionedFileSetWithMR.

private void testPartitionedFileSetWithMR(boolean useCombineFileInputFormat) throws Exception {
    ApplicationWithPrograms app = deployApp(AppWithPartitionedFileSet.class, new AppWithPartitionedFileSet.AppConfig(useCombineFileInputFormat));
    // write a value to the input table
    final Table table = datasetCache.getDataset(AppWithPartitionedFileSet.INPUT);
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) table).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            table.put(Bytes.toBytes("x"), AppWithPartitionedFileSet.ONLY_COLUMN, Bytes.toBytes("1"));
        }
    });
    // a partition key for the map/reduce output
    final PartitionKey keyX = PartitionKey.builder().addStringField("type", "x").addLongField("time", 150000L).build();
    // run the partition writer m/r with this output partition time
    Map<String, String> runtimeArguments = Maps.newHashMap();
    Map<String, String> outputArgs = Maps.newHashMap();
    PartitionedFileSetArguments.setOutputPartitionKey(outputArgs, keyX);
    runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, PARTITIONED, outputArgs));
    Assert.assertTrue(runProgram(app, AppWithPartitionedFileSet.PartitionWriter.class, new BasicArguments(runtimeArguments)));
    // this should have created a partition in the tpfs
    final PartitionedFileSet dataset = datasetCache.getDataset(PARTITIONED);
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) dataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            Partition partition = dataset.getPartition(keyX);
            Assert.assertNotNull(partition);
            String path = partition.getRelativePath();
            Assert.assertTrue(path.contains("x"));
            Assert.assertTrue(path.contains("150000"));
        }
    });
    // delete the data in the input table and write a new row
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) table).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            table.delete(Bytes.toBytes("x"));
            table.put(Bytes.toBytes("y"), AppWithPartitionedFileSet.ONLY_COLUMN, Bytes.toBytes("2"));
        }
    });
    // a new partition key for the next map/reduce
    final PartitionKey keyY = PartitionKey.builder().addStringField("type", "y").addLongField("time", 200000L).build();
    // now run the m/r again with a new partition time, say 5 minutes later
    PartitionedFileSetArguments.setOutputPartitionKey(outputArgs, keyY);
    runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, PARTITIONED, outputArgs));
    Assert.assertTrue(runProgram(app, AppWithPartitionedFileSet.PartitionWriter.class, new BasicArguments(runtimeArguments)));
    // this should have created a partition in the tpfs
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) dataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            Partition partition = dataset.getPartition(keyY);
            Assert.assertNotNull(partition);
            String path = partition.getRelativePath();
            Assert.assertNotNull(path);
            Assert.assertTrue(path.contains("y"));
            Assert.assertTrue(path.contains("200000"));
        }
    });
    // a partition filter that matches the outputs of both map/reduces
    PartitionFilter filterXY = PartitionFilter.builder().addRangeCondition("type", "x", "z").build();
    // now run a map/reduce that reads all the partitions
    runtimeArguments = Maps.newHashMap();
    Map<String, String> inputArgs = Maps.newHashMap();
    PartitionedFileSetArguments.setInputPartitionFilter(inputArgs, filterXY);
    runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, PARTITIONED, inputArgs));
    runtimeArguments.put(AppWithPartitionedFileSet.ROW_TO_WRITE, "a");
    Assert.assertTrue(runProgram(app, AppWithPartitionedFileSet.PartitionReader.class, new BasicArguments(runtimeArguments)));
    // this should have read both partitions - and written both x and y to row a
    final Table output = datasetCache.getDataset(AppWithPartitionedFileSet.OUTPUT);
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) output).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            Row row = output.get(Bytes.toBytes("a"));
            Assert.assertEquals("1", row.getString("x"));
            Assert.assertEquals("{type=x, time=150000}", row.getString("x_key"));
            Assert.assertEquals("2", row.getString("y"));
            Assert.assertEquals("{type=y, time=200000}", row.getString("y_key"));
        }
    });
    // a partition filter that matches the output key of the first map/reduce
    PartitionFilter filterX = PartitionFilter.builder().addValueCondition("type", "x").addRangeCondition("time", null, 160000L).build();
    // now run a map/reduce that reads a range of the partitions, namely the first one
    inputArgs.clear();
    PartitionedFileSetArguments.setInputPartitionFilter(inputArgs, filterX);
    runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, PARTITIONED, inputArgs));
    runtimeArguments.put(AppWithPartitionedFileSet.ROW_TO_WRITE, "b");
    Assert.assertTrue(runProgram(app, AppWithPartitionedFileSet.PartitionReader.class, new BasicArguments(runtimeArguments)));
    // this should have read the first partition only - and written only x to row b
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) output).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            Row row = output.get(Bytes.toBytes("b"));
            Assert.assertEquals("1", row.getString("x"));
            Assert.assertEquals("{type=x, time=150000}", row.getString("x_key"));
            Assert.assertNull(row.get("y"));
            Assert.assertNull(row.get("y_key"));
        }
    });
    // a partition filter that matches no key
    PartitionFilter filterMT = PartitionFilter.builder().addValueCondition("type", "nosuchthing").build();
    // now run a map/reduce that reads an empty range of partitions (the filter matches nothing)
    inputArgs.clear();
    PartitionedFileSetArguments.setInputPartitionFilter(inputArgs, filterMT);
    runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, PARTITIONED, inputArgs));
    runtimeArguments.put(AppWithPartitionedFileSet.ROW_TO_WRITE, "n");
    Assert.assertTrue(runProgram(app, AppWithPartitionedFileSet.PartitionReader.class, new BasicArguments(runtimeArguments)));
    // this should have read no partitions - and written nothing to row n
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) output).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            Row row = output.get(Bytes.toBytes("n"));
            Assert.assertTrue(row.isEmpty());
        }
    });
}
Also used : Partition(io.cdap.cdap.api.dataset.lib.Partition) Table(io.cdap.cdap.api.dataset.table.Table) TransactionExecutor(org.apache.tephra.TransactionExecutor) TimePartitionedFileSet(io.cdap.cdap.api.dataset.lib.TimePartitionedFileSet) PartitionedFileSet(io.cdap.cdap.api.dataset.lib.PartitionedFileSet) PartitionFilter(io.cdap.cdap.api.dataset.lib.PartitionFilter) ApplicationWithPrograms(io.cdap.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms) TransactionAware(org.apache.tephra.TransactionAware) PartitionKey(io.cdap.cdap.api.dataset.lib.PartitionKey) BasicArguments(io.cdap.cdap.internal.app.runtime.BasicArguments) Row(io.cdap.cdap.api.dataset.table.Row)

Example 14 with PartitionFilter

use of io.cdap.cdap.api.dataset.lib.PartitionFilter in project hydrator-plugins by cdapio.

the class SnapshotFileSet method deleteMatchingPartitionsByTime.

public void deleteMatchingPartitionsByTime(long upperLimit) throws IOException {
    if (upperLimit > 0 && upperLimit < Long.MAX_VALUE) {
        PartitionFilter filter = PartitionFilter.builder().addRangeCondition(SNAPSHOT_FIELD, null, upperLimit).build();
        Set<PartitionDetail> partitions = files.getPartitions(filter);
        for (PartitionDetail partition : partitions) {
            files.dropPartition(partition.getPartitionKey());
        }
    }
}
Also used : PartitionFilter(io.cdap.cdap.api.dataset.lib.PartitionFilter) PartitionDetail(io.cdap.cdap.api.dataset.lib.PartitionDetail)

Example 15 with PartitionFilter

use of io.cdap.cdap.api.dataset.lib.PartitionFilter in project cdap by caskdata.

the class PartitionedFileSetTest method testAddRemoveGetPartitions.

@Test
@Category(SlowTests.class)
public void testAddRemoveGetPartitions() throws Exception {
    final PartitionedFileSet dataset = dsFrameworkUtil.getInstance(pfsInstance);
    final PartitionKey[][][] keys = new PartitionKey[4][4][4];
    final String[][][] paths = new String[4][4][4];
    final Set<BasicPartition> allPartitionDetails = Sets.newHashSet();
    // add a bunch of partitions
    for (int s = 0; s < 4; s++) {
        for (int i = 0; i < 4; i++) {
            for (int l = 0; l < 4; l++) {
                final PartitionKey key = PartitionKey.builder().addField("s", String.format("%c-%d", 'a' + s, s)).addField("i", i * 100).addField("l", 15L - 10 * l).build();
                BasicPartition basicPartition = dsFrameworkUtil.newTransactionExecutor((TransactionAware) dataset).execute(new Callable<BasicPartition>() {

                    @Override
                    public BasicPartition call() throws Exception {
                        PartitionOutput p = dataset.getPartitionOutput(key);
                        p.addPartition();
                        return new BasicPartition((PartitionedFileSetDataset) dataset, p.getRelativePath(), p.getPartitionKey());
                    }
                });
                keys[s][i][l] = key;
                paths[s][i][l] = basicPartition.getRelativePath();
                allPartitionDetails.add(basicPartition);
            }
        }
    }
    // validate getPartition with exact partition key
    for (int s = 0; s < 4; s++) {
        for (int i = 0; i < 4; i++) {
            for (int l = 0; l < 4; l++) {
                final PartitionKey key = keys[s][i][l];
                final String path = paths[s][i][l];
                dsFrameworkUtil.newTransactionExecutor((TransactionAware) dataset).execute(new TransactionExecutor.Subroutine() {

                    @Override
                    public void apply() throws Exception {
                        PartitionDetail partitionDetail = dataset.getPartition(key);
                        Assert.assertNotNull(partitionDetail);
                        Assert.assertEquals(path, partitionDetail.getRelativePath());
                    }
                });
                // also test getPartitionPaths() and getPartitions() for the filter matching this
                @SuppressWarnings({ "unchecked", "unused" }) boolean success = testFilter(dataset, allPartitionDetails, PartitionFilter.builder().addValueCondition("l", key.getField("l")).addValueCondition("s", key.getField("s")).addValueCondition("i", key.getField("i")).build());
            }
        }
    }
    // test whether query works without filter
    testFilter(dataset, allPartitionDetails, null);
    // generate an list of partition filters with exhaustive coverage
    List<PartitionFilter> filters = generateFilters();
    // test all kinds of filters
    testAllFilters(dataset, allPartitionDetails, filters);
    // remove a few of the partitions and test again, repeatedly
    PartitionKey[] keysToRemove = { keys[1][2][3], keys[0][1][0], keys[2][3][2], keys[3][1][2] };
    for (final PartitionKey key : keysToRemove) {
        // remove in a transaction
        dsFrameworkUtil.newTransactionExecutor((TransactionAware) dataset).execute(new TransactionExecutor.Procedure<PartitionKey>() {

            @Override
            public void apply(PartitionKey partitionKey) throws Exception {
                dataset.dropPartition(partitionKey);
            }
        }, key);
        // test all filters
        BasicPartition toRemove = Iterables.tryFind(allPartitionDetails, new com.google.common.base.Predicate<BasicPartition>() {

            @Override
            public boolean apply(BasicPartition partition) {
                return key.equals(partition.getPartitionKey());
            }
        }).get();
        allPartitionDetails.remove(toRemove);
        testAllFilters(dataset, allPartitionDetails, filters);
    }
}
Also used : PartitionDetail(io.cdap.cdap.api.dataset.lib.PartitionDetail) Predicate(io.cdap.cdap.api.Predicate) PartitionedFileSet(io.cdap.cdap.api.dataset.lib.PartitionedFileSet) TransactionExecutor(org.apache.tephra.TransactionExecutor) DataSetException(io.cdap.cdap.api.dataset.DataSetException) PartitionNotFoundException(io.cdap.cdap.api.dataset.PartitionNotFoundException) PartitionAlreadyExistsException(io.cdap.cdap.api.dataset.lib.PartitionAlreadyExistsException) IOException(java.io.IOException) PartitionFilter(io.cdap.cdap.api.dataset.lib.PartitionFilter) PartitionOutput(io.cdap.cdap.api.dataset.lib.PartitionOutput) TransactionAware(org.apache.tephra.TransactionAware) PartitionKey(io.cdap.cdap.api.dataset.lib.PartitionKey) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Aggregations

PartitionFilter (io.cdap.cdap.api.dataset.lib.PartitionFilter)29 Test (org.junit.Test)20 PartitionKey (io.cdap.cdap.api.dataset.lib.PartitionKey)10 TransactionAware (org.apache.tephra.TransactionAware)10 PartitionedFileSet (io.cdap.cdap.api.dataset.lib.PartitionedFileSet)8 TransactionExecutor (org.apache.tephra.TransactionExecutor)8 PartitionDetail (io.cdap.cdap.api.dataset.lib.PartitionDetail)7 Predicate (io.cdap.cdap.api.Predicate)6 DataSetException (io.cdap.cdap.api.dataset.DataSetException)6 Partition (io.cdap.cdap.api.dataset.lib.Partition)6 IOException (java.io.IOException)6 HashMap (java.util.HashMap)6 ImmutableMap (com.google.common.collect.ImmutableMap)4 PartitionNotFoundException (io.cdap.cdap.api.dataset.PartitionNotFoundException)4 PartitionAlreadyExistsException (io.cdap.cdap.api.dataset.lib.PartitionAlreadyExistsException)4 FieldType (io.cdap.cdap.api.dataset.lib.Partitioning.FieldType)4 TimePartitionedFileSet (io.cdap.cdap.api.dataset.lib.TimePartitionedFileSet)4 HashSet (java.util.HashSet)4 List (java.util.List)4 Map (java.util.Map)4