Search in sources :

Example 41 with PartitionKey

use of co.cask.cdap.api.dataset.lib.PartitionKey in project cdap by caskdata.

the class TimePartitionedFileSetDefinition method updateArgumentsIfNeeded.

// if the arguments do not contain an output path, but an output partition time, generate an output path from that;
// also convert the output partition time to a partition key and add it to the arguments;
// also call the super class' method to update arguments if it needs to
protected Map<String, String> updateArgumentsIfNeeded(Map<String, String> arguments) {
    Long time = TimePartitionedFileSetArguments.getOutputPartitionTime(arguments);
    if (time != null) {
        // set the output path according to partition time
        if (FileSetArguments.getOutputPath(arguments) == null) {
            String outputPathFormat = TimePartitionedFileSetArguments.getOutputPathFormat(arguments);
            String path;
            if (Strings.isNullOrEmpty(outputPathFormat)) {
                path = String.format("%tF/%tH-%tM.%d", time, time, time, time);
            } else {
                SimpleDateFormat format = new SimpleDateFormat(outputPathFormat);
                String timeZoneID = TimePartitionedFileSetArguments.getOutputPathTimeZone(arguments);
                if (!Strings.isNullOrEmpty(timeZoneID)) {
                    format.setTimeZone(TimeZone.getTimeZone(timeZoneID));
                }
                path = format.format(new Date(time));
            }
            arguments = Maps.newHashMap(arguments);
            FileSetArguments.setOutputPath(arguments, path);
        }
        // add the corresponding partition key to the arguments
        PartitionKey outputKey = TimePartitionedFileSetDataset.partitionKeyForTime(time);
        PartitionedFileSetArguments.setOutputPartitionKey(arguments, outputKey);
    }
    // delegate to super class for anything it needs to do
    return updateArgumentsIfNeeded(arguments, TimePartitionedFileSetDataset.PARTITIONING);
}
Also used : PartitionKey(co.cask.cdap.api.dataset.lib.PartitionKey) SimpleDateFormat(java.text.SimpleDateFormat) Date(java.util.Date)

Example 42 with PartitionKey

use of co.cask.cdap.api.dataset.lib.PartitionKey in project cdap by caskdata.

the class PartitionedFileSetDefinition method updateArgumentsIfNeeded.

// if the arguments do not contain an output location, generate one from the partition key (if present)
protected static Map<String, String> updateArgumentsIfNeeded(Map<String, String> arguments, Partitioning partitioning) {
    if (FileSetArguments.getOutputPath(arguments) == null) {
        PartitionKey key = PartitionedFileSetArguments.getOutputPartitionKey(arguments, partitioning);
        if (key != null) {
            arguments = Maps.newHashMap(arguments);
            FileSetArguments.setOutputPath(arguments, PartitionedFileSetDataset.getOutputPath(key, partitioning));
        } else if (PartitionedFileSetArguments.getDynamicPartitioner(arguments) != null) {
            // when using DynamicPartitioner, use the baseLocation of the fileSet as the output location
            FileSetArguments.setBaseOutputPath(arguments);
        }
    }
    return arguments;
}
Also used : PartitionKey(co.cask.cdap.api.dataset.lib.PartitionKey)

Example 43 with PartitionKey

use of co.cask.cdap.api.dataset.lib.PartitionKey in project cdap by caskdata.

the class ExploreExecutorHttpHandler method doAddPartition.

private void doAddPartition(HttpRequest request, HttpResponder responder, DatasetId datasetId) {
    Dataset dataset;
    try (SystemDatasetInstantiator datasetInstantiator = datasetInstantiatorFactory.create()) {
        dataset = datasetInstantiator.getDataset(datasetId);
        if (dataset == null) {
            responder.sendString(HttpResponseStatus.NOT_FOUND, "Cannot load dataset " + datasetId);
            return;
        }
    } catch (IOException e) {
        String classNotFoundMessage = isClassNotFoundException(e);
        if (classNotFoundMessage != null) {
            JsonObject json = new JsonObject();
            json.addProperty("handle", QueryHandle.NO_OP.getHandle());
            responder.sendJson(HttpResponseStatus.OK, json);
            return;
        }
        LOG.error("Exception instantiating dataset {}.", datasetId, e);
        responder.sendString(HttpResponseStatus.INTERNAL_SERVER_ERROR, "Exception instantiating dataset " + datasetId.getDataset());
        return;
    }
    try {
        if (!(dataset instanceof PartitionedFileSet)) {
            responder.sendString(HttpResponseStatus.BAD_REQUEST, "not a partitioned dataset.");
            return;
        }
        Partitioning partitioning = ((PartitionedFileSet) dataset).getPartitioning();
        Reader reader = new InputStreamReader(new ChannelBufferInputStream(request.getContent()));
        Map<String, String> properties = GSON.fromJson(reader, new TypeToken<Map<String, String>>() {
        }.getType());
        String fsPath = properties.get("path");
        if (fsPath == null) {
            responder.sendString(HttpResponseStatus.BAD_REQUEST, "path was not specified.");
            return;
        }
        PartitionKey partitionKey;
        try {
            partitionKey = PartitionedFileSetArguments.getOutputPartitionKey(properties, partitioning);
        } catch (Exception e) {
            responder.sendString(HttpResponseStatus.BAD_REQUEST, "invalid partition key: " + e.getMessage());
            return;
        }
        if (partitionKey == null) {
            responder.sendString(HttpResponseStatus.BAD_REQUEST, "no partition key was given.");
            return;
        }
        QueryHandle handle = exploreTableManager.addPartition(datasetId, properties, partitionKey, fsPath);
        JsonObject json = new JsonObject();
        json.addProperty("handle", handle.getHandle());
        responder.sendJson(HttpResponseStatus.OK, json);
    } catch (Throwable e) {
        LOG.error("Got exception:", e);
        responder.sendString(HttpResponseStatus.INTERNAL_SERVER_ERROR, e.getMessage());
    }
}
Also used : InputStreamReader(java.io.InputStreamReader) Dataset(co.cask.cdap.api.dataset.Dataset) JsonObject(com.google.gson.JsonObject) Reader(java.io.Reader) InputStreamReader(java.io.InputStreamReader) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) IOException(java.io.IOException) BadRequestException(co.cask.cdap.common.BadRequestException) ExploreException(co.cask.cdap.explore.service.ExploreException) SQLException(java.sql.SQLException) DatasetManagementException(co.cask.cdap.api.dataset.DatasetManagementException) JsonSyntaxException(com.google.gson.JsonSyntaxException) UnsupportedTypeException(co.cask.cdap.api.data.schema.UnsupportedTypeException) IOException(java.io.IOException) Partitioning(co.cask.cdap.api.dataset.lib.Partitioning) SystemDatasetInstantiator(co.cask.cdap.data.dataset.SystemDatasetInstantiator) TypeToken(com.google.common.reflect.TypeToken) PartitionKey(co.cask.cdap.api.dataset.lib.PartitionKey) ChannelBufferInputStream(org.jboss.netty.buffer.ChannelBufferInputStream) QueryHandle(co.cask.cdap.proto.QueryHandle)

Example 44 with PartitionKey

use of co.cask.cdap.api.dataset.lib.PartitionKey in project cdap by caskdata.

the class PartitionedFileSetTest method testUpdateMetadata.

@Test
public void testUpdateMetadata() throws Exception {
    final PartitionedFileSet dataset = dsFrameworkUtil.getInstance(pfsInstance);
    dsFrameworkUtil.newTransactionExecutor((TransactionAware) dataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            PartitionOutput partitionOutput = dataset.getPartitionOutput(PARTITION_KEY);
            ImmutableMap<String, String> originalEntries = ImmutableMap.of("key1", "value1");
            partitionOutput.setMetadata(originalEntries);
            partitionOutput.addPartition();
            ImmutableMap<String, String> updatedMetadata = ImmutableMap.of("key2", "value2");
            dataset.addMetadata(PARTITION_KEY, updatedMetadata);
            PartitionDetail partitionDetail = dataset.getPartition(PARTITION_KEY);
            Assert.assertNotNull(partitionDetail);
            HashMap<String, String> combinedEntries = Maps.newHashMap();
            combinedEntries.putAll(originalEntries);
            combinedEntries.putAll(updatedMetadata);
            Assert.assertEquals(combinedEntries, partitionDetail.getMetadata().asMap());
            // adding an entry, for a key that already exists will throw an Exception
            try {
                dataset.addMetadata(PARTITION_KEY, "key2", "value3");
                Assert.fail("Expected not to be able to update an existing metadata entry");
            } catch (DataSetException expected) {
            }
            PartitionKey nonexistentPartitionKey = PartitionKey.builder().addIntField("i", 42).addLongField("l", 17L).addStringField("s", "nonexistent").build();
            try {
                // adding an entry, for a key that already exists will throw an Exception
                dataset.addMetadata(nonexistentPartitionKey, "key2", "value3");
                Assert.fail("Expected not to be able to add metadata for a nonexistent partition");
            } catch (DataSetException expected) {
            }
        }
    });
}
Also used : DataSetException(co.cask.cdap.api.dataset.DataSetException) PartitionOutput(co.cask.cdap.api.dataset.lib.PartitionOutput) HashMap(java.util.HashMap) TransactionAware(org.apache.tephra.TransactionAware) PartitionKey(co.cask.cdap.api.dataset.lib.PartitionKey) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) TransactionExecutor(org.apache.tephra.TransactionExecutor) PartitionDetail(co.cask.cdap.api.dataset.lib.PartitionDetail) PartitionNotFoundException(co.cask.cdap.api.dataset.PartitionNotFoundException) IOException(java.io.IOException) DataSetException(co.cask.cdap.api.dataset.DataSetException) ImmutableMap(com.google.common.collect.ImmutableMap) Test(org.junit.Test)

Example 45 with PartitionKey

use of co.cask.cdap.api.dataset.lib.PartitionKey in project cdap by caskdata.

the class PartitionedFileSetTest method testAddRemoveGetPartitions.

@Test
@Category(SlowTests.class)
public void testAddRemoveGetPartitions() throws Exception {
    final PartitionedFileSet dataset = dsFrameworkUtil.getInstance(pfsInstance);
    final PartitionKey[][][] keys = new PartitionKey[4][4][4];
    final String[][][] paths = new String[4][4][4];
    final Set<BasicPartition> allPartitionDetails = Sets.newHashSet();
    // add a bunch of partitions
    for (int s = 0; s < 4; s++) {
        for (int i = 0; i < 4; i++) {
            for (int l = 0; l < 4; l++) {
                final PartitionKey key = PartitionKey.builder().addField("s", String.format("%c-%d", 'a' + s, s)).addField("i", i * 100).addField("l", 15L - 10 * l).build();
                BasicPartition basicPartition = dsFrameworkUtil.newTransactionExecutor((TransactionAware) dataset).execute(new Callable<BasicPartition>() {

                    @Override
                    public BasicPartition call() throws Exception {
                        PartitionOutput p = dataset.getPartitionOutput(key);
                        p.addPartition();
                        return new BasicPartition((PartitionedFileSetDataset) dataset, p.getRelativePath(), p.getPartitionKey());
                    }
                });
                keys[s][i][l] = key;
                paths[s][i][l] = basicPartition.getRelativePath();
                allPartitionDetails.add(basicPartition);
            }
        }
    }
    // validate getPartition with exact partition key
    for (int s = 0; s < 4; s++) {
        for (int i = 0; i < 4; i++) {
            for (int l = 0; l < 4; l++) {
                final PartitionKey key = keys[s][i][l];
                final String path = paths[s][i][l];
                dsFrameworkUtil.newTransactionExecutor((TransactionAware) dataset).execute(new TransactionExecutor.Subroutine() {

                    @Override
                    public void apply() throws Exception {
                        PartitionDetail partitionDetail = dataset.getPartition(key);
                        Assert.assertNotNull(partitionDetail);
                        Assert.assertEquals(path, partitionDetail.getRelativePath());
                    }
                });
                // also test getPartitionPaths() and getPartitions() for the filter matching this
                @SuppressWarnings({ "unchecked", "unused" }) boolean success = testFilter(dataset, allPartitionDetails, PartitionFilter.builder().addValueCondition("l", key.getField("l")).addValueCondition("s", key.getField("s")).addValueCondition("i", key.getField("i")).build());
            }
        }
    }
    // test whether query works without filter
    testFilter(dataset, allPartitionDetails, null);
    // generate an list of partition filters with exhaustive coverage
    List<PartitionFilter> filters = generateFilters();
    // test all kinds of filters
    testAllFilters(dataset, allPartitionDetails, filters);
    // remove a few of the partitions and test again, repeatedly
    PartitionKey[] keysToRemove = { keys[1][2][3], keys[0][1][0], keys[2][3][2], keys[3][1][2] };
    for (final PartitionKey key : keysToRemove) {
        // remove in a transaction
        dsFrameworkUtil.newTransactionExecutor((TransactionAware) dataset).execute(new TransactionExecutor.Procedure<PartitionKey>() {

            @Override
            public void apply(PartitionKey partitionKey) throws Exception {
                dataset.dropPartition(partitionKey);
            }
        }, key);
        // test all filters
        BasicPartition toRemove = Iterables.tryFind(allPartitionDetails, new com.google.common.base.Predicate<BasicPartition>() {

            @Override
            public boolean apply(BasicPartition partition) {
                return key.equals(partition.getPartitionKey());
            }
        }).get();
        allPartitionDetails.remove(toRemove);
        testAllFilters(dataset, allPartitionDetails, filters);
    }
}
Also used : PartitionDetail(co.cask.cdap.api.dataset.lib.PartitionDetail) Predicate(co.cask.cdap.api.Predicate) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) TransactionExecutor(org.apache.tephra.TransactionExecutor) PartitionNotFoundException(co.cask.cdap.api.dataset.PartitionNotFoundException) IOException(java.io.IOException) DataSetException(co.cask.cdap.api.dataset.DataSetException) PartitionFilter(co.cask.cdap.api.dataset.lib.PartitionFilter) PartitionOutput(co.cask.cdap.api.dataset.lib.PartitionOutput) TransactionAware(org.apache.tephra.TransactionAware) PartitionKey(co.cask.cdap.api.dataset.lib.PartitionKey) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Aggregations

PartitionKey (co.cask.cdap.api.dataset.lib.PartitionKey)59 PartitionedFileSet (co.cask.cdap.api.dataset.lib.PartitionedFileSet)28 Test (org.junit.Test)27 TransactionAware (org.apache.tephra.TransactionAware)17 TransactionExecutor (org.apache.tephra.TransactionExecutor)17 IOException (java.io.IOException)12 HashMap (java.util.HashMap)12 PartitionDetail (co.cask.cdap.api.dataset.lib.PartitionDetail)11 ConcurrentPartitionConsumer (co.cask.cdap.api.dataset.lib.partitioned.ConcurrentPartitionConsumer)11 PartitionConsumer (co.cask.cdap.api.dataset.lib.partitioned.PartitionConsumer)11 ArrayList (java.util.ArrayList)11 List (java.util.List)11 HashSet (java.util.HashSet)10 DataSetException (co.cask.cdap.api.dataset.DataSetException)9 ImmutableList (com.google.common.collect.ImmutableList)9 PartitionNotFoundException (co.cask.cdap.api.dataset.PartitionNotFoundException)7 Partition (co.cask.cdap.api.dataset.lib.Partition)7 ConsumerConfiguration (co.cask.cdap.api.dataset.lib.partitioned.ConsumerConfiguration)7 TimePartitionedFileSet (co.cask.cdap.api.dataset.lib.TimePartitionedFileSet)6 Location (org.apache.twill.filesystem.Location)6