Search in sources :

Example 26 with DataSetException

use of io.cdap.cdap.api.dataset.DataSetException in project cdap by cdapio.

the class BufferingTable method get.

/**
 * NOTE: Depending on the use-case, calling this method may be much less
 *       efficient than calling same method with columns as parameters because it may always require round trip to
 *       persistent store
 */
@ReadOnly
@Override
public Row get(byte[] row) {
    ensureTransactionIsStarted();
    reportRead(1);
    try {
        return new Result(row, getRowMap(row));
    } catch (Exception e) {
        LOG.debug("get failed for table: " + getTransactionAwareName() + ", row: " + Bytes.toStringBinary(row), e);
        throw new DataSetException("get failed", e);
    }
}
Also used : DataSetException(io.cdap.cdap.api.dataset.DataSetException) DataSetException(io.cdap.cdap.api.dataset.DataSetException) IOException(java.io.IOException) Result(io.cdap.cdap.api.dataset.table.Result) ReadOnly(io.cdap.cdap.api.annotation.ReadOnly)

Example 27 with DataSetException

use of io.cdap.cdap.api.dataset.DataSetException in project cdap by cdapio.

the class ApplicationVerificationStage method verifyData.

private void verifyData(ApplicationId appId, ApplicationSpecification specification, @Nullable KerberosPrincipalId ownerPrincipal) throws Exception {
    // NOTE: no special restrictions on dataset module names, etc
    VerifyResult result;
    for (DatasetCreationSpec dataSetCreateSpec : specification.getDatasets().values()) {
        result = getVerifier(DatasetCreationSpec.class).verify(appId, dataSetCreateSpec);
        if (!result.isSuccess()) {
            throw new RuntimeException(result.getMessage());
        }
        String dsName = dataSetCreateSpec.getInstanceName();
        final DatasetId datasetInstanceId = appId.getParent().dataset(dsName);
        // get the authorizing user
        String authorizingUser = AuthorizationUtil.getAppAuthorizingUser(ownerAdmin, authenticationContext, appId, ownerPrincipal);
        DatasetSpecification existingSpec = AuthorizationUtil.authorizeAs(authorizingUser, new Callable<DatasetSpecification>() {

            @Override
            public DatasetSpecification call() throws Exception {
                return dsFramework.getDatasetSpec(datasetInstanceId);
            }
        });
        if (existingSpec != null && !existingSpec.getType().equals(dataSetCreateSpec.getTypeName())) {
            // New app trying to deploy an dataset with same instanceName but different Type than that of existing.
            throw new DataSetException(String.format("Cannot Deploy Dataset : %s with Type : %s : Dataset with different Type Already Exists", dsName, dataSetCreateSpec.getTypeName()));
        }
        // if the dataset existed verify its owner is same.
        if (existingSpec != null) {
            verifyOwner(datasetInstanceId, ownerPrincipal);
        }
    }
}
Also used : DataSetException(io.cdap.cdap.api.dataset.DataSetException) DatasetCreationSpec(io.cdap.cdap.internal.dataset.DatasetCreationSpec) DatasetSpecification(io.cdap.cdap.api.dataset.DatasetSpecification) VerifyResult(io.cdap.cdap.app.verification.VerifyResult) UnauthorizedException(io.cdap.cdap.security.spi.authorization.UnauthorizedException) DataSetException(io.cdap.cdap.api.dataset.DataSetException) DatasetManagementException(io.cdap.cdap.api.dataset.DatasetManagementException) IOException(java.io.IOException) DatasetId(io.cdap.cdap.proto.id.DatasetId)

Example 28 with DataSetException

use of io.cdap.cdap.api.dataset.DataSetException in project cdap by cdapio.

the class TimePartitionedFileSetTest method testOutputPartitionPath.

/**
 * Tests that the output file path is set correctly, based on the output partition time.
 */
@Test
public void testOutputPartitionPath() throws Exception {
    // test specifying output time
    Date date = DATE_FORMAT.parse("1/1/15 8:42 pm");
    Map<String, String> args = Maps.newHashMap();
    TimePartitionedFileSetArguments.setOutputPartitionTime(args, date.getTime());
    TimeZone timeZone = Calendar.getInstance().getTimeZone();
    TimePartitionedFileSetArguments.setOutputPathFormat(args, "yyyy-MM-dd/HH_mm", timeZone.getID());
    TimePartitionedFileSet ds = dsFrameworkUtil.getInstance(TPFS_INSTANCE, args);
    String outputPath = ds.getEmbeddedFileSet().getOutputLocation().toURI().getPath();
    Assert.assertTrue(outputPath.endsWith("2015-01-01/20_42"));
    Map<String, String> outputConfig = ds.getOutputFormatConfiguration();
    Assert.assertTrue(outputConfig.get(FileOutputFormat.OUTDIR).endsWith("2015-01-01/20_42"));
    // test specifying output time and partition key -> time should prevail
    PartitionKey key = PartitionKey.builder().addIntField("year", 2014).addIntField("month", 1).addIntField("day", 1).addIntField("hour", 20).addIntField("minute", 54).build();
    TimePartitionedFileSet ds1 = dsFrameworkUtil.getInstance(TPFS_INSTANCE, args);
    TimePartitionedFileSetArguments.setOutputPartitionKey(args, key);
    outputConfig = ds1.getOutputFormatConfiguration();
    Assert.assertTrue(outputConfig.get(FileOutputFormat.OUTDIR).endsWith("2015-01-01/20_42"));
    args.clear();
    TimePartitionedFileSetArguments.setOutputPartitionKey(args, key);
    TimePartitionedFileSet ds2 = dsFrameworkUtil.getInstance(TPFS_INSTANCE, args);
    outputConfig = ds2.getOutputFormatConfiguration();
    Assert.assertTrue(outputConfig.get(FileOutputFormat.OUTDIR).endsWith("54"));
    args.clear();
    TimePartitionedFileSet ds3 = dsFrameworkUtil.getInstance(TPFS_INSTANCE, args);
    try {
        ds3.getOutputFormatConfiguration();
        Assert.fail("getOutputFormatConfiguration should have failed with neither output time nor partition key");
    } catch (DataSetException e) {
    // expected
    }
}
Also used : TimeZone(java.util.TimeZone) DataSetException(io.cdap.cdap.api.dataset.DataSetException) PartitionKey(io.cdap.cdap.api.dataset.lib.PartitionKey) TimePartitionedFileSet(io.cdap.cdap.api.dataset.lib.TimePartitionedFileSet) Date(java.util.Date) Test(org.junit.Test)

Example 29 with DataSetException

use of io.cdap.cdap.api.dataset.DataSetException in project cdap by cdapio.

the class HiveExploreServiceFileSetTestRun method testPartitionedFileSet.

private void testPartitionedFileSet(@Nullable String dbName, @Nullable String tableName) throws Exception {
    DatasetId datasetInstanceId = NAMESPACE_ID.dataset("parted");
    String hiveTableName = getDatasetHiveName(datasetInstanceId);
    String showTablesCommand = "show tables";
    FileSetProperties.Builder props = PartitionedFileSetProperties.builder().setPartitioning(Partitioning.builder().addStringField("str").addIntField("num").build()).setBasePath("parted").setEnableExploreOnCreate(true).setSerDe("org.apache.hadoop.hive.serde2.avro.AvroSerDe").setExploreInputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat").setExploreOutputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat").setTableProperty("avro.schema.literal", SCHEMA.toString());
    if (tableName != null) {
        props.setExploreTableName(tableName);
        hiveTableName = tableName;
    }
    String queryTableName = hiveTableName;
    if (dbName != null) {
        props.setExploreDatabaseName(dbName);
        runCommand(NAMESPACE_ID, "create database " + dbName, false, null, null);
        showTablesCommand += " in " + dbName;
        queryTableName = dbName + "." + queryTableName;
    }
    // create a time partitioned file set
    datasetFramework.addInstance("partitionedFileSet", datasetInstanceId, props.build());
    // verify that the hive table was created for this file set
    runCommand(NAMESPACE_ID, showTablesCommand, true, null, Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList(hiveTableName))));
    // Accessing dataset instance to perform data operations
    final PartitionedFileSet partitioned = datasetFramework.getDataset(datasetInstanceId, DatasetDefinition.NO_ARGUMENTS, null);
    Assert.assertNotNull(partitioned);
    FileSet fileSet = partitioned.getEmbeddedFileSet();
    // add some partitions. Beware that Hive expects a partition to be a directory, so we create dirs with one file
    Location locationX1 = fileSet.getLocation("fileX1/nn");
    Location locationY1 = fileSet.getLocation("fileY1/nn");
    Location locationX2 = fileSet.getLocation("fileX2/nn");
    Location locationY2 = fileSet.getLocation("fileY2/nn");
    FileWriterHelper.generateAvroFile(locationX1.getOutputStream(), "x", 1, 2);
    FileWriterHelper.generateAvroFile(locationY1.getOutputStream(), "y", 1, 2);
    FileWriterHelper.generateAvroFile(locationX2.getOutputStream(), "x", 2, 3);
    FileWriterHelper.generateAvroFile(locationY2.getOutputStream(), "y", 2, 3);
    final PartitionKey keyX1 = PartitionKey.builder().addStringField("str", "x").addIntField("num", 1).build();
    PartitionKey keyY1 = PartitionKey.builder().addStringField("str", "y").addIntField("num", 1).build();
    final PartitionKey keyX2 = PartitionKey.builder().addStringField("str", "x").addIntField("num", 2).build();
    PartitionKey keyY2 = PartitionKey.builder().addStringField("str", "y").addIntField("num", 2).build();
    addPartition(partitioned, keyX1, "fileX1");
    addPartition(partitioned, keyY1, "fileY1");
    addPartition(partitioned, keyX2, "fileX2");
    addPartition(partitioned, keyY2, "fileY2");
    // verify that the partitions were added to Hive
    validatePartitions(queryTableName, partitioned, ImmutableList.of(keyX1, keyX2, keyY1, keyY2));
    // verify that count() and where... work in Hive
    runCommand(NAMESPACE_ID, "SELECT count(*) AS count FROM " + queryTableName, true, Lists.newArrayList(new ColumnDesc("count", "BIGINT", 1, null)), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList(4L))));
    runCommand(NAMESPACE_ID, "SELECT * FROM " + queryTableName + " WHERE num = 2 ORDER BY key, value", true, Lists.newArrayList(new ColumnDesc(hiveTableName + ".key", "STRING", 1, null), new ColumnDesc(hiveTableName + ".value", "STRING", 2, null), new ColumnDesc(hiveTableName + ".str", "STRING", 3, null), new ColumnDesc(hiveTableName + ".num", "INT", 4, null)), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("x2", "#2", "x", 2)), new QueryResult(Lists.<Object>newArrayList("y2", "#2", "y", 2))));
    // drop a partition and query again
    dropPartition(partitioned, keyX2);
    validatePartitions(queryTableName, partitioned, ImmutableSet.of(keyX1, keyY1, keyY2));
    // attempt a transaction that drops one partition, adds another, and then fails
    try {
        doTransaction(partitioned, new Runnable() {

            @Override
            public void run() {
                partitioned.dropPartition(keyX1);
                partitioned.addPartition(keyX2, "fileX2");
                Assert.fail("fail tx");
            }
        });
    } catch (TransactionFailureException e) {
    // expected
    }
    // validate that both the drop and addPartition were undone
    validatePartitions(queryTableName, partitioned, ImmutableSet.of(keyX1, keyY1, keyY2));
    // attempt a transaction that attempts to add an existing partition, hence fails
    try {
        doTransaction(partitioned, new Runnable() {

            @Override
            public void run() {
                partitioned.addPartition(keyX1, "fileX1");
                throw new RuntimeException("on purpose");
            }
        });
    } catch (TransactionFailureException e) {
        // expected if the cause is not "on purpose"
        Assert.assertTrue(e.getCause() instanceof DataSetException);
    }
    // validate that both the drop and addPartition were undone
    validatePartitions(queryTableName, partitioned, ImmutableSet.of(keyX1, keyY1, keyY2));
    // drop a partition directly from hive
    runCommand(NAMESPACE_ID, "ALTER TABLE " + queryTableName + " DROP PARTITION (str='y', num=2)", false, null, null);
    // verify that one more value is gone now, namely y2, in Hive, but the PFS still has it
    validatePartitionsInHive(queryTableName, ImmutableSet.of(keyX1, keyY1));
    validatePartitionsInPFS(partitioned, ImmutableSet.of(keyX1, keyY1, keyY2));
    // make sure the partition can still be dropped from the PFS dataset
    dropPartition(partitioned, keyY2);
    validatePartitions(queryTableName, partitioned, ImmutableSet.of(keyX1, keyY1));
    // change the explore schema by updating the props
    datasetFramework.updateInstance(datasetInstanceId, props.setTableProperty("avro.schema.literal", K_SCHEMA.toString()).build());
    // valudate the schema was updated
    validatePartitions(queryTableName, partitioned, ImmutableSet.of(keyX1, keyY1), true);
    // disable explore by updating the props
    datasetFramework.updateInstance(datasetInstanceId, props.setEnableExploreOnCreate(false).build());
    // verify the Hive table is gone
    runCommand(NAMESPACE_ID, showTablesCommand, false, null, Collections.<QueryResult>emptyList());
    // re-enable explore by updating the props
    datasetFramework.updateInstance(datasetInstanceId, props.setEnableExploreOnCreate(true).build());
    // verify the Hive table is back
    runCommand(NAMESPACE_ID, showTablesCommand, true, null, Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList(hiveTableName))));
    // drop the dataset
    datasetFramework.deleteInstance(datasetInstanceId);
    // verify the Hive table is gone
    runCommand(NAMESPACE_ID, "show tables", false, null, Collections.<QueryResult>emptyList());
}
Also used : PartitionedFileSet(io.cdap.cdap.api.dataset.lib.PartitionedFileSet) FileSet(io.cdap.cdap.api.dataset.lib.FileSet) TimePartitionedFileSet(io.cdap.cdap.api.dataset.lib.TimePartitionedFileSet) PartitionedFileSet(io.cdap.cdap.api.dataset.lib.PartitionedFileSet) TimePartitionedFileSet(io.cdap.cdap.api.dataset.lib.TimePartitionedFileSet) ColumnDesc(io.cdap.cdap.proto.ColumnDesc) DatasetId(io.cdap.cdap.proto.id.DatasetId) PartitionedFileSetProperties(io.cdap.cdap.api.dataset.lib.PartitionedFileSetProperties) FileSetProperties(io.cdap.cdap.api.dataset.lib.FileSetProperties) QueryResult(io.cdap.cdap.proto.QueryResult) TransactionFailureException(org.apache.tephra.TransactionFailureException) DataSetException(io.cdap.cdap.api.dataset.DataSetException) PartitionKey(io.cdap.cdap.api.dataset.lib.PartitionKey) Location(org.apache.twill.filesystem.Location)

Example 30 with DataSetException

use of io.cdap.cdap.api.dataset.DataSetException in project cdap by cdapio.

the class HBaseMetricsTable method incrementAndGet.

@Override
public long incrementAndGet(byte[] row, byte[] column, long delta) {
    byte[] distributedKey = createDistributedRowKey(row);
    Increment increment = new Increment(distributedKey);
    increment.addColumn(columnFamily, column, delta);
    try {
        Result result = table.increment(increment);
        return Bytes.toLong(result.getValue(columnFamily, column));
    } catch (IOException e) {
        // currently there is not other way to extract that from the HBase exception than string match
        if (e.getMessage() != null && e.getMessage().contains("isn't 64 bits wide")) {
            throw new NumberFormatException("Attempted to increment a value that is not convertible to long," + " row: " + Bytes.toStringBinary(distributedKey) + " column: " + Bytes.toStringBinary(column));
        }
        throw new DataSetException("IncrementAndGet failed on table " + tableId, e);
    }
}
Also used : DataSetException(io.cdap.cdap.api.dataset.DataSetException) Increment(org.apache.hadoop.hbase.client.Increment) IOException(java.io.IOException) Result(org.apache.hadoop.hbase.client.Result)

Aggregations

DataSetException (io.cdap.cdap.api.dataset.DataSetException)74 IOException (java.io.IOException)54 ReadOnly (io.cdap.cdap.api.annotation.ReadOnly)14 Map (java.util.Map)12 TransactionFailureException (org.apache.tephra.TransactionFailureException)12 Location (org.apache.twill.filesystem.Location)12 PartitionKey (io.cdap.cdap.api.dataset.lib.PartitionKey)10 Result (io.cdap.cdap.api.dataset.table.Result)10 NavigableMap (java.util.NavigableMap)10 Test (org.junit.Test)10 PartitionAlreadyExistsException (io.cdap.cdap.api.dataset.lib.PartitionAlreadyExistsException)8 TimePartitionedFileSet (io.cdap.cdap.api.dataset.lib.TimePartitionedFileSet)8 Put (org.apache.hadoop.hbase.client.Put)8 ImmutableMap (com.google.common.collect.ImmutableMap)6 WriteOnly (io.cdap.cdap.api.annotation.WriteOnly)6 DatasetManagementException (io.cdap.cdap.api.dataset.DatasetManagementException)6 PartitionedFileSet (io.cdap.cdap.api.dataset.lib.PartitionedFileSet)6 Put (io.cdap.cdap.api.dataset.table.Put)6 Row (io.cdap.cdap.api.dataset.table.Row)6 UnauthorizedException (io.cdap.cdap.security.spi.authorization.UnauthorizedException)6