Search in sources :

Example 26 with Schema

use of co.cask.cdap.api.data.schema.Schema in project cdap by caskdata.

the class MetadataHttpHandlerTestRun method assertDataEntitySearch.

private void assertDataEntitySearch() throws Exception {
    DatasetId datasetInstance = NamespaceId.DEFAULT.dataset(AllProgramsApp.DATASET_NAME);
    DatasetId datasetInstance2 = NamespaceId.DEFAULT.dataset(AllProgramsApp.DATASET_NAME2);
    DatasetId datasetInstance3 = NamespaceId.DEFAULT.dataset(AllProgramsApp.DATASET_NAME3);
    DatasetId datasetInstance4 = NamespaceId.DEFAULT.dataset(AllProgramsApp.DATASET_NAME4);
    DatasetId datasetInstance5 = NamespaceId.DEFAULT.dataset(AllProgramsApp.DATASET_NAME5);
    DatasetId datasetInstance6 = NamespaceId.DEFAULT.dataset(AllProgramsApp.DATASET_NAME6);
    DatasetId datasetInstance7 = NamespaceId.DEFAULT.dataset(AllProgramsApp.DATASET_NAME7);
    DatasetId dsWithSchema = NamespaceId.DEFAULT.dataset(AllProgramsApp.DS_WITH_SCHEMA_NAME);
    StreamId streamId = NamespaceId.DEFAULT.stream(AllProgramsApp.STREAM_NAME);
    StreamViewId view = streamId.view("view");
    Set<MetadataSearchResultRecord> expected = ImmutableSet.of(new MetadataSearchResultRecord(streamId), new MetadataSearchResultRecord(mystream));
    Set<MetadataSearchResultRecord> expectedWithView = ImmutableSet.<MetadataSearchResultRecord>builder().addAll(expected).add(new MetadataSearchResultRecord(myview)).build();
    // schema search with fieldname
    Set<MetadataSearchResultRecord> metadataSearchResultRecords = searchMetadata(NamespaceId.DEFAULT, "body");
    Assert.assertEquals(expectedWithView, metadataSearchResultRecords);
    // schema search with fieldname and fieldtype
    metadataSearchResultRecords = searchMetadata(NamespaceId.DEFAULT, "body:" + Schema.Type.STRING.toString());
    Assert.assertEquals(expected, metadataSearchResultRecords);
    // schema search for partial fieldname
    metadataSearchResultRecords = searchMetadata(NamespaceId.DEFAULT, "bo*");
    Assert.assertEquals(expectedWithView, metadataSearchResultRecords);
    // schema search with fieldname and all/partial fieldtype
    metadataSearchResultRecords = searchMetadata(NamespaceId.DEFAULT, "body:STR*");
    Assert.assertEquals(expected, metadataSearchResultRecords);
    // schema search for a field with the given fieldname:fieldtype
    metadataSearchResultRecords = searchMetadata(NamespaceId.DEFAULT, "body:STRING+field1:STRING");
    Assert.assertEquals(ImmutableSet.<MetadataSearchResultRecord>builder().addAll(expected).add(new MetadataSearchResultRecord(dsWithSchema)).build(), metadataSearchResultRecords);
    // create a view
    Schema viewSchema = Schema.recordOf("record", Schema.Field.of("viewBody", Schema.nullableOf(Schema.of(Schema.Type.BYTES))));
    streamViewClient.createOrUpdate(view, new ViewSpecification(new FormatSpecification("format", viewSchema)));
    // search all entities that have a defined schema
    // add a user property with "schema" as key
    Map<String, String> datasetProperties = ImmutableMap.of("schema", "schemaValue");
    addProperties(datasetInstance, datasetProperties);
    metadataSearchResultRecords = searchMetadata(NamespaceId.DEFAULT, "schema:*");
    Assert.assertEquals(ImmutableSet.<MetadataSearchResultRecord>builder().addAll(expectedWithView).add(new MetadataSearchResultRecord(datasetInstance)).add(new MetadataSearchResultRecord(dsWithSchema)).add(new MetadataSearchResultRecord(view)).build(), metadataSearchResultRecords);
    // search dataset
    ImmutableSet<MetadataSearchResultRecord> expectedKvTables = ImmutableSet.of(new MetadataSearchResultRecord(datasetInstance), new MetadataSearchResultRecord(datasetInstance2), new MetadataSearchResultRecord(datasetInstance3), new MetadataSearchResultRecord(myds));
    ImmutableSet<MetadataSearchResultRecord> expectedExplorableDatasets = ImmutableSet.<MetadataSearchResultRecord>builder().addAll(expectedKvTables).add(new MetadataSearchResultRecord(datasetInstance4)).add(new MetadataSearchResultRecord(datasetInstance5)).add(new MetadataSearchResultRecord(dsWithSchema)).build();
    ImmutableSet<MetadataSearchResultRecord> expectedAllDatasets = ImmutableSet.<MetadataSearchResultRecord>builder().addAll(expectedExplorableDatasets).add(new MetadataSearchResultRecord(datasetInstance6)).add(new MetadataSearchResultRecord(datasetInstance7)).build();
    ImmutableSet<MetadataSearchResultRecord> expectedExplorables = ImmutableSet.<MetadataSearchResultRecord>builder().addAll(expectedExplorableDatasets).add(new MetadataSearchResultRecord(streamId)).add(new MetadataSearchResultRecord(mystream)).build();
    metadataSearchResultRecords = searchMetadata(NamespaceId.DEFAULT, "explore");
    Assert.assertEquals(expectedExplorables, metadataSearchResultRecords);
    metadataSearchResultRecords = searchMetadata(NamespaceId.DEFAULT, KeyValueTable.class.getName());
    Assert.assertEquals(expectedKvTables, metadataSearchResultRecords);
    metadataSearchResultRecords = searchMetadata(NamespaceId.DEFAULT, "type:*");
    Assert.assertEquals(expectedAllDatasets, metadataSearchResultRecords);
    // search using ttl
    metadataSearchResultRecords = searchMetadata(NamespaceId.DEFAULT, "ttl:*");
    Assert.assertEquals(expected, metadataSearchResultRecords);
    // search using names
    metadataSearchResultRecords = searchMetadata(NamespaceId.DEFAULT, AllProgramsApp.STREAM_NAME);
    Assert.assertEquals(ImmutableSet.of(new MetadataSearchResultRecord(streamId), new MetadataSearchResultRecord(view)), metadataSearchResultRecords);
    metadataSearchResultRecords = searchMetadata(NamespaceId.DEFAULT, AllProgramsApp.STREAM_NAME, EntityTypeSimpleName.STREAM);
    Assert.assertEquals(ImmutableSet.of(new MetadataSearchResultRecord(streamId)), metadataSearchResultRecords);
    metadataSearchResultRecords = searchMetadata(NamespaceId.DEFAULT, AllProgramsApp.STREAM_NAME, EntityTypeSimpleName.VIEW);
    Assert.assertEquals(ImmutableSet.of(new MetadataSearchResultRecord(view)), metadataSearchResultRecords);
    metadataSearchResultRecords = searchMetadata(NamespaceId.DEFAULT, "view", EntityTypeSimpleName.VIEW);
    Assert.assertEquals(ImmutableSet.of(new MetadataSearchResultRecord(view)), metadataSearchResultRecords);
    metadataSearchResultRecords = searchMetadata(NamespaceId.DEFAULT, AllProgramsApp.DATASET_NAME);
    Assert.assertEquals(ImmutableSet.of(new MetadataSearchResultRecord(datasetInstance)), metadataSearchResultRecords);
    metadataSearchResultRecords = searchMetadata(NamespaceId.DEFAULT, AllProgramsApp.DS_WITH_SCHEMA_NAME);
    Assert.assertEquals(ImmutableSet.of(new MetadataSearchResultRecord(dsWithSchema)), metadataSearchResultRecords);
}
Also used : StreamId(co.cask.cdap.proto.id.StreamId) MetadataSearchResultRecord(co.cask.cdap.proto.metadata.MetadataSearchResultRecord) Schema(co.cask.cdap.api.data.schema.Schema) FormatSpecification(co.cask.cdap.api.data.format.FormatSpecification) ViewSpecification(co.cask.cdap.proto.ViewSpecification) DatasetId(co.cask.cdap.proto.id.DatasetId) StreamViewId(co.cask.cdap.proto.id.StreamViewId)

Example 27 with Schema

use of co.cask.cdap.api.data.schema.Schema in project cdap by caskdata.

the class ObjectMappedTableDefinition method configureSchema.

private DatasetProperties configureSchema(DatasetProperties properties) {
    Map<String, String> props = properties.getProperties();
    Preconditions.checkArgument(props.containsKey(ObjectMappedTableProperties.OBJECT_TYPE));
    // schema can normally be derived from the type. However, we cannot use the Type in this method because
    // this is called by the system, where the Type is often not available. for example, if somebody creates
    // an ObjectMappedTable<Purchase> where Purchase is a class internal to their app.
    // we require schema here because we want to validate it to make sure it is supported.
    Preconditions.checkArgument(props.containsKey(ObjectMappedTableProperties.OBJECT_SCHEMA));
    Preconditions.checkArgument(props.containsKey(ObjectMappedTableProperties.ROW_KEY_EXPLORE_NAME));
    Preconditions.checkArgument(props.containsKey(ObjectMappedTableProperties.ROW_KEY_EXPLORE_TYPE));
    try {
        Schema objectSchema = ObjectMappedTableProperties.getObjectSchema(props);
        validateSchema(objectSchema);
        String keyName = ObjectMappedTableProperties.getRowKeyExploreName(props);
        Schema.Type keyType = ObjectMappedTableProperties.getRowKeyExploreType(props);
        Schema fullSchema = addKeyToSchema(objectSchema, keyName, keyType);
        return TableProperties.builder().setSchema(fullSchema).setRowFieldName(keyName).addAll(properties.getProperties()).build();
    } catch (IOException e) {
        throw new IllegalArgumentException("Could not parse schema.", e);
    } catch (UnsupportedTypeException e) {
        throw new IllegalArgumentException("Schema is of an unsupported type.", e);
    }
}
Also used : Schema(co.cask.cdap.api.data.schema.Schema) UnsupportedTypeException(co.cask.cdap.api.data.schema.UnsupportedTypeException) IOException(java.io.IOException)

Example 28 with Schema

use of co.cask.cdap.api.data.schema.Schema in project cdap by caskdata.

the class ObjectStoreDefinition method getDataset.

@Override
public ObjectStoreDataset<?> getDataset(DatasetContext datasetContext, DatasetSpecification spec, Map<String, String> arguments, ClassLoader classLoader) throws IOException {
    DatasetSpecification kvTableSpec = spec.getSpecification("objects");
    KeyValueTable table = tableDef.getDataset(datasetContext, kvTableSpec, arguments, classLoader);
    TypeRepresentation typeRep = GSON.fromJson(spec.getProperty("type"), TypeRepresentation.class);
    Schema schema = GSON.fromJson(spec.getProperty("schema"), Schema.class);
    return new ObjectStoreDataset(spec.getName(), table, typeRep, schema, classLoader);
}
Also used : KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) TypeRepresentation(co.cask.cdap.internal.io.TypeRepresentation) Schema(co.cask.cdap.api.data.schema.Schema) DatasetSpecification(co.cask.cdap.api.dataset.DatasetSpecification)

Example 29 with Schema

use of co.cask.cdap.api.data.schema.Schema in project cdap by caskdata.

the class FileStreamAdmin method updateConfig.

@Override
public void updateConfig(final StreamId streamId, final StreamProperties properties) throws Exception {
    Location streamLocation;
    // User should have admin access on the stream to update its configuration
    ensureAccess(streamId, Action.ADMIN);
    streamLocation = impersonator.doAs(streamId.getParent(), new Callable<Location>() {

        @Override
        public Location call() throws Exception {
            return getStreamLocation(streamId);
        }
    });
    Preconditions.checkArgument(streamLocation.isDirectory(), "Stream '%s' does not exist.", streamId);
    SecurityUtil.verifyOwnerPrincipal(streamId, properties.getOwnerPrincipal(), ownerAdmin);
    streamCoordinatorClient.updateProperties(streamId, new Callable<CoordinatorStreamProperties>() {

        @Override
        public CoordinatorStreamProperties call() throws Exception {
            StreamProperties oldProperties = updateProperties(streamId, properties);
            FormatSpecification format = properties.getFormat();
            if (format != null) {
                // if the schema has changed, we need to recreate the hive table.
                // Changes in format and settings don't require
                // a hive change, as they are just properties used by the stream storage handler.
                Schema currSchema = oldProperties.getFormat().getSchema();
                Schema newSchema = format.getSchema();
                if (!Objects.equals(currSchema, newSchema)) {
                    alterExploreStream(streamId, false, null);
                    alterExploreStream(streamId, true, format);
                }
            }
            publishAudit(streamId, AuditType.UPDATE);
            return new CoordinatorStreamProperties(properties.getTTL(), properties.getFormat(), properties.getNotificationThresholdMB(), null, properties.getDescription(), properties.getOwnerPrincipal());
        }
    });
}
Also used : CoordinatorStreamProperties(co.cask.cdap.data.stream.CoordinatorStreamProperties) Schema(co.cask.cdap.api.data.schema.Schema) StreamProperties(co.cask.cdap.proto.StreamProperties) CoordinatorStreamProperties(co.cask.cdap.data.stream.CoordinatorStreamProperties) FormatSpecification(co.cask.cdap.api.data.format.FormatSpecification) Callable(java.util.concurrent.Callable) NotificationFeedException(co.cask.cdap.notifications.feeds.NotificationFeedException) FileNotFoundException(java.io.FileNotFoundException) UnauthorizedException(co.cask.cdap.security.spi.authorization.UnauthorizedException) IOException(java.io.IOException) NotFoundException(co.cask.cdap.common.NotFoundException) StreamNotFoundException(co.cask.cdap.common.StreamNotFoundException) Location(org.apache.twill.filesystem.Location)

Example 30 with Schema

use of co.cask.cdap.api.data.schema.Schema in project cdap by caskdata.

the class SchemaIndexerTest method testSimpleSchema.

@Test
public void testSimpleSchema() throws Exception {
    Schema simpleSchema = Schema.of(Schema.Type.INT);
    Set<String> expected = Collections.emptySet();
    SchemaIndexer indexer = new SchemaIndexer();
    DatasetId datasetInstance = new DatasetId("ns1", "ds1");
    Set<String> actual = indexer.getIndexes(new MetadataEntry(datasetInstance, KEY, simpleSchema.toString()));
    Assert.assertEquals(expected, actual);
}
Also used : Schema(co.cask.cdap.api.data.schema.Schema) MetadataEntry(co.cask.cdap.data2.metadata.dataset.MetadataEntry) DatasetId(co.cask.cdap.proto.id.DatasetId) Test(org.junit.Test)

Aggregations

Schema (co.cask.cdap.api.data.schema.Schema)210 Test (org.junit.Test)92 StructuredRecord (co.cask.cdap.api.data.format.StructuredRecord)69 Table (co.cask.cdap.api.dataset.table.Table)38 ETLStage (co.cask.cdap.etl.proto.v2.ETLStage)35 ApplicationId (co.cask.cdap.proto.id.ApplicationId)34 FormatSpecification (co.cask.cdap.api.data.format.FormatSpecification)32 ApplicationManager (co.cask.cdap.test.ApplicationManager)30 AppRequest (co.cask.cdap.proto.artifact.AppRequest)29 KeyValueTable (co.cask.cdap.api.dataset.lib.KeyValueTable)24 IOException (java.io.IOException)23 ETLBatchConfig (co.cask.cdap.etl.proto.v2.ETLBatchConfig)22 ReflectionSchemaGenerator (co.cask.cdap.internal.io.ReflectionSchemaGenerator)22 ArrayList (java.util.ArrayList)22 WorkflowManager (co.cask.cdap.test.WorkflowManager)20 Map (java.util.Map)18 Set (java.util.Set)14 UnsupportedTypeException (co.cask.cdap.api.data.schema.UnsupportedTypeException)12 HashMap (java.util.HashMap)12 HashSet (java.util.HashSet)11