Search in sources :

Example 11 with TupleSchema

use of org.apache.drill.exec.record.metadata.TupleSchema in project drill by apache.

the class SchemaVisitor method visitSchema.

@Override
public TupleMetadata visitSchema(SchemaParser.SchemaContext ctx) {
    TupleMetadata schema = ctx.columns() == null ? new TupleSchema() : visitColumns(ctx.columns());
    if (ctx.property_values() != null) {
        PropertiesVisitor propertiesVisitor = new PropertiesVisitor();
        schema.setProperties(ctx.property_values().accept(propertiesVisitor));
    }
    return schema;
}
Also used : TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) TupleSchema(org.apache.drill.exec.record.metadata.TupleSchema)

Example 12 with TupleSchema

use of org.apache.drill.exec.record.metadata.TupleSchema in project drill by apache.

the class ConvertHiveMapRDBJsonScanToDrillMapRDBJsonScan method createNativeScanRel.

/**
 * Helper method which creates a DrillScanRel with native Drill HiveScan.
 */
private DrillScanRel createNativeScanRel(DrillScanRel hiveScanRel, PlannerSettings settings) throws IOException {
    RelDataTypeFactory typeFactory = hiveScanRel.getCluster().getTypeFactory();
    HiveScan hiveScan = (HiveScan) hiveScanRel.getGroupScan();
    HiveReadEntry hiveReadEntry = hiveScan.getHiveReadEntry();
    Map<String, String> parameters = hiveReadEntry.getHiveTableWrapper().getParameters();
    JsonScanSpec scanSpec = new JsonScanSpec(parameters.get(MAPRDB_TABLE_NAME), null, null);
    List<SchemaPath> hiveScanCols = hiveScanRel.getColumns().stream().map(colNameSchemaPath -> replaceOverriddenSchemaPath(parameters, colNameSchemaPath)).collect(Collectors.toList());
    // creates TupleMetadata based on Hive's schema (with optional data modes) to be used in the reader
    // for the case when column type wasn't discovered
    HiveToRelDataTypeConverter dataTypeConverter = new HiveToRelDataTypeConverter(typeFactory);
    TupleMetadata schema = new TupleSchema();
    hiveReadEntry.getTable().getColumnListsCache().getTableSchemaColumns().forEach(column -> schema.addColumn(HiveUtilities.getColumnMetadata(replaceOverriddenColumnId(parameters, column.getName()), dataTypeConverter.convertToNullableRelDataType(column))));
    MapRDBFormatPluginConfig formatConfig = new MapRDBFormatPluginConfig();
    formatConfig.readTimestampWithZoneOffset = settings.getOptions().getBoolean(ExecConstants.HIVE_READ_MAPRDB_JSON_TIMESTAMP_WITH_TIMEZONE_OFFSET);
    formatConfig.allTextMode = settings.getOptions().getBoolean(ExecConstants.HIVE_MAPRDB_JSON_ALL_TEXT_MODE);
    JsonTableGroupScan nativeMapRDBScan = new JsonTableGroupScan(hiveScan.getUserName(), hiveScan.getStoragePlugin(), // TODO: We should use Hive format plugins here, once it will be implemented. DRILL-6621
    (MapRDBFormatPlugin) hiveScan.getStoragePlugin().getFormatPlugin(formatConfig), scanSpec, hiveScanCols, new MapRDBStatistics(), FileSystemMetadataProviderManager.getMetadataProviderForSchema(schema));
    List<String> nativeScanColNames = hiveScanRel.getRowType().getFieldList().stream().map(field -> replaceOverriddenColumnId(parameters, field.getName())).collect(Collectors.toList());
    List<RelDataType> nativeScanColTypes = hiveScanRel.getRowType().getFieldList().stream().map(RelDataTypeField::getType).collect(Collectors.toList());
    RelDataType nativeScanRowType = typeFactory.createStructType(nativeScanColTypes, nativeScanColNames);
    return new DrillScanRel(hiveScanRel.getCluster(), hiveScanRel.getTraitSet(), hiveScanRel.getTable(), nativeMapRDBScan, nativeScanRowType, hiveScanCols);
}
Also used : JsonScanSpec(org.apache.drill.exec.store.mapr.db.json.JsonScanSpec) RelDataTypeFactory(org.apache.calcite.rel.type.RelDataTypeFactory) DrillScanRel(org.apache.drill.exec.planner.logical.DrillScanRel) HiveUtilities(org.apache.drill.exec.store.hive.HiveUtilities) MapRDBFormatPluginConfig(org.apache.drill.exec.store.mapr.db.MapRDBFormatPluginConfig) HiveToRelDataTypeConverter(org.apache.drill.exec.planner.types.HiveToRelDataTypeConverter) RelOptHelper(org.apache.drill.exec.planner.logical.RelOptHelper) Map(java.util.Map) MapRDBStatistics(org.apache.drill.exec.planner.index.MapRDBStatistics) MapRDBFormatPlugin(org.apache.drill.exec.store.mapr.db.MapRDBFormatPlugin) TupleSchema(org.apache.drill.exec.record.metadata.TupleSchema) RelDataType(org.apache.calcite.rel.type.RelDataType) PrelUtil(org.apache.drill.exec.planner.physical.PrelUtil) StoragePluginOptimizerRule(org.apache.drill.exec.store.StoragePluginOptimizerRule) HiveMapRDBJsonInputFormat(org.apache.hadoop.hive.maprdb.json.input.HiveMapRDBJsonInputFormat) HiveMetadataProvider(org.apache.drill.exec.store.hive.HiveMetadataProvider) SchemaPath(org.apache.drill.common.expression.SchemaPath) IOException(java.io.IOException) FileSystemMetadataProviderManager(org.apache.drill.exec.metastore.store.FileSystemMetadataProviderManager) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) Collectors(java.util.stream.Collectors) RelOptRuleCall(org.apache.calcite.plan.RelOptRuleCall) HiveScan(org.apache.drill.exec.store.hive.HiveScan) HiveReadEntry(org.apache.drill.exec.store.hive.HiveReadEntry) List(java.util.List) PlannerSettings(org.apache.drill.exec.planner.physical.PlannerSettings) RelDataTypeField(org.apache.calcite.rel.type.RelDataTypeField) JsonTableGroupScan(org.apache.drill.exec.store.mapr.db.json.JsonTableGroupScan) ExecConstants(org.apache.drill.exec.ExecConstants) DocumentConstants(org.ojai.DocumentConstants) MapRDBStatistics(org.apache.drill.exec.planner.index.MapRDBStatistics) DrillScanRel(org.apache.drill.exec.planner.logical.DrillScanRel) MapRDBFormatPluginConfig(org.apache.drill.exec.store.mapr.db.MapRDBFormatPluginConfig) RelDataType(org.apache.calcite.rel.type.RelDataType) TupleSchema(org.apache.drill.exec.record.metadata.TupleSchema) HiveReadEntry(org.apache.drill.exec.store.hive.HiveReadEntry) SchemaPath(org.apache.drill.common.expression.SchemaPath) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) RelDataTypeFactory(org.apache.calcite.rel.type.RelDataTypeFactory) HiveScan(org.apache.drill.exec.store.hive.HiveScan) HiveToRelDataTypeConverter(org.apache.drill.exec.planner.types.HiveToRelDataTypeConverter) JsonTableGroupScan(org.apache.drill.exec.store.mapr.db.json.JsonTableGroupScan) JsonScanSpec(org.apache.drill.exec.store.mapr.db.json.JsonScanSpec)

Example 13 with TupleSchema

use of org.apache.drill.exec.record.metadata.TupleSchema in project drill by apache.

the class TestSchemaTrackerDefined method testEmptyProjectWithDefinedSchema.

@Test
public void testEmptyProjectWithDefinedSchema() {
    // Simulate SELECT ...
    // That is, project nothing, as for COUNT(*)
    final ScanSchemaConfigBuilder builder = new ScanSchemaConfigBuilder().projection(RowSetTestUtils.projectNone());
    builder.definedSchema(new TupleSchema());
    final ScanSchemaTracker schemaTracker = builder.build();
    assertTrue(schemaTracker instanceof SchemaBasedTracker);
    assertTrue(schemaTracker.isResolved());
    assertSame(ProjectionType.NONE, schemaTracker.projectionType());
}
Also used : TupleSchema(org.apache.drill.exec.record.metadata.TupleSchema) Test(org.junit.Test) BaseTest(org.apache.drill.test.BaseTest) EvfTest(org.apache.drill.categories.EvfTest)

Example 14 with TupleSchema

use of org.apache.drill.exec.record.metadata.TupleSchema in project drill by axbaretto.

the class TestTupleSchema method testNonEmptyRootTuple.

/**
 * Test the basics of a non-empty root tuple (i.e. a row) using a pair
 * of primitive columns.
 */
@Test
public void testNonEmptyRootTuple() {
    TupleMetadata root = new TupleSchema();
    MaterializedField fieldA = SchemaBuilder.columnSchema("a", MinorType.INT, DataMode.REQUIRED);
    ColumnMetadata colA = root.add(fieldA);
    assertEquals(1, root.size());
    assertFalse(root.isEmpty());
    assertEquals(0, root.index("a"));
    assertEquals(-1, root.index("b"));
    assertTrue(fieldA.isEquivalent(root.column(0)));
    assertTrue(fieldA.isEquivalent(root.column("a")));
    assertTrue(fieldA.isEquivalent(root.column("A")));
    assertSame(colA, root.metadata(0));
    assertSame(colA, root.metadata("a"));
    assertEquals("a", root.fullName(0));
    assertEquals("a", root.fullName(colA));
    try {
        root.add(fieldA);
        fail();
    } catch (IllegalArgumentException e) {
    // Expected
    }
    MaterializedField fieldB = SchemaBuilder.columnSchema("b", MinorType.VARCHAR, DataMode.OPTIONAL);
    ColumnMetadata colB = MetadataUtils.fromField(fieldB);
    int indexB = root.addColumn(colB);
    assertEquals(1, indexB);
    assertEquals(2, root.size());
    assertFalse(root.isEmpty());
    assertEquals(indexB, root.index("b"));
    assertTrue(fieldB.isEquivalent(root.column(1)));
    assertTrue(fieldB.isEquivalent(root.column("b")));
    assertSame(colB, root.metadata(1));
    assertSame(colB, root.metadata("b"));
    assertEquals("b", root.fullName(1));
    assertEquals("b", root.fullName(colB));
    try {
        root.add(fieldB);
        fail();
    } catch (IllegalArgumentException e) {
    // Expected
    }
    List<MaterializedField> fieldList = root.toFieldList();
    assertTrue(fieldA.isEquivalent(fieldList.get(0)));
    assertTrue(fieldB.isEquivalent(fieldList.get(1)));
    TupleMetadata emptyRoot = new TupleSchema();
    assertFalse(emptyRoot.isEquivalent(root));
    // Same schema: the tuples are equivalent
    TupleMetadata root3 = new TupleSchema();
    root3.add(fieldA);
    root3.addColumn(colB);
    assertTrue(root3.isEquivalent(root));
    assertTrue(root.isEquivalent(root3));
    // Same columns, different order. The tuples are not equivalent.
    TupleMetadata root4 = new TupleSchema();
    root4.addColumn(colB);
    root4.add(fieldA);
    assertFalse(root4.isEquivalent(root));
    assertFalse(root.isEquivalent(root4));
    // A tuple is equivalent to its copy.
    assertTrue(root.isEquivalent(((TupleSchema) root).copy()));
    // And it is equivalent to the round trip to a batch schema.
    BatchSchema batchSchema = ((TupleSchema) root).toBatchSchema(SelectionVectorMode.NONE);
    assertTrue(root.isEquivalent(MetadataUtils.fromFields(batchSchema)));
}
Also used : MapColumnMetadata(org.apache.drill.exec.record.metadata.MapColumnMetadata) ColumnMetadata(org.apache.drill.exec.record.metadata.ColumnMetadata) PrimitiveColumnMetadata(org.apache.drill.exec.record.metadata.PrimitiveColumnMetadata) VariantColumnMetadata(org.apache.drill.exec.record.metadata.VariantColumnMetadata) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) TupleSchema(org.apache.drill.exec.record.metadata.TupleSchema) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 15 with TupleSchema

use of org.apache.drill.exec.record.metadata.TupleSchema in project drill by axbaretto.

the class TestTupleSchema method testMapTupleFromMetadata.

/**
 * Test a complex map schema of the form:<br>
 * a.`b.x`.`c.y`.d<br>
 * in which columns "a", "b.x" and "c.y" are maps, "b.x" and "c.y" are names
 * that contains dots, and d is primitive.
 * Here we build up the schema using the metadata schema, and generate a
 * materialized field from the metadata.
 */
@Test
public void testMapTupleFromMetadata() {
    TupleMetadata root = new TupleSchema();
    MaterializedField fieldA = SchemaBuilder.columnSchema("a", MinorType.MAP, DataMode.REQUIRED);
    ColumnMetadata colA = root.add(fieldA);
    TupleMetadata mapA = colA.mapSchema();
    MaterializedField fieldB = SchemaBuilder.columnSchema("b.x", MinorType.MAP, DataMode.REQUIRED);
    ColumnMetadata colB = mapA.add(fieldB);
    TupleMetadata mapB = colB.mapSchema();
    MaterializedField fieldC = SchemaBuilder.columnSchema("c.y", MinorType.MAP, DataMode.REQUIRED);
    ColumnMetadata colC = mapB.add(fieldC);
    TupleMetadata mapC = colC.mapSchema();
    MaterializedField fieldD = SchemaBuilder.columnSchema("d", MinorType.VARCHAR, DataMode.REQUIRED);
    ColumnMetadata colD = mapC.add(fieldD);
    MaterializedField fieldE = SchemaBuilder.columnSchema("e", MinorType.INT, DataMode.REQUIRED);
    ColumnMetadata colE = mapC.add(fieldE);
    assertEquals(1, root.size());
    assertEquals(1, mapA.size());
    assertEquals(1, mapB.size());
    assertEquals(2, mapC.size());
    assertSame(colA, root.metadata("a"));
    assertSame(colB, mapA.metadata("b.x"));
    assertSame(colC, mapB.metadata("c.y"));
    assertSame(colD, mapC.metadata("d"));
    assertSame(colE, mapC.metadata("e"));
    // The full name contains quoted names if the contain dots.
    // This name is more for diagnostic than semantic purposes.
    assertEquals("a", root.fullName(0));
    assertEquals("a.`b.x`", mapA.fullName(0));
    assertEquals("a.`b.x`.`c.y`", mapB.fullName(0));
    assertEquals("a.`b.x`.`c.y`.d", mapC.fullName(0));
    assertEquals("a.`b.x`.`c.y`.e", mapC.fullName(1));
    assertEquals(1, colA.schema().getChildren().size());
    assertEquals(1, colB.schema().getChildren().size());
    assertEquals(2, colC.schema().getChildren().size());
    // Yes, it is awful that MaterializedField does not provide indexed
    // access to its children. That's one reason we have the TupleMetadata
    // classes...
    // Note that the metadata layer does not store the materialized field.
    // (Doing so causes no end of synchronization problems.) So we test
    // for equivalence, not sameness.
    Iterator<MaterializedField> iterC = colC.schema().getChildren().iterator();
    assertTrue(fieldD.isEquivalent(iterC.next()));
    assertTrue(fieldE.isEquivalent(iterC.next()));
    // Copying should be deep.
    TupleMetadata root2 = ((TupleSchema) root).copy();
    assertEquals(2, root2.metadata(0).mapSchema().metadata(0).mapSchema().metadata(0).mapSchema().size());
    assert (root.isEquivalent(root2));
    // Generate a materialized field and compare.
    fieldA.addChild(fieldB);
    fieldB.addChild(fieldC);
    fieldC.addChild(fieldD);
    fieldC.addChild(fieldE);
    assertTrue(colA.schema().isEquivalent(fieldA));
}
Also used : MapColumnMetadata(org.apache.drill.exec.record.metadata.MapColumnMetadata) ColumnMetadata(org.apache.drill.exec.record.metadata.ColumnMetadata) PrimitiveColumnMetadata(org.apache.drill.exec.record.metadata.PrimitiveColumnMetadata) VariantColumnMetadata(org.apache.drill.exec.record.metadata.VariantColumnMetadata) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) TupleSchema(org.apache.drill.exec.record.metadata.TupleSchema) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Aggregations

TupleSchema (org.apache.drill.exec.record.metadata.TupleSchema)30 TupleMetadata (org.apache.drill.exec.record.metadata.TupleMetadata)25 ColumnMetadata (org.apache.drill.exec.record.metadata.ColumnMetadata)12 Test (org.junit.Test)10 MapColumnMetadata (org.apache.drill.exec.record.metadata.MapColumnMetadata)6 PrimitiveColumnMetadata (org.apache.drill.exec.record.metadata.PrimitiveColumnMetadata)6 VariantColumnMetadata (org.apache.drill.exec.record.metadata.VariantColumnMetadata)6 SubOperatorTest (org.apache.drill.test.SubOperatorTest)5 SchemaPath (org.apache.drill.common.expression.SchemaPath)4 IOException (java.io.IOException)3 ArrayList (java.util.ArrayList)3 Map (java.util.Map)3 UserException (org.apache.drill.common.exceptions.UserException)3 MajorType (org.apache.drill.common.types.TypeProtos.MajorType)3 MaterializedField (org.apache.drill.exec.record.MaterializedField)3 BaseTest (org.apache.drill.test.BaseTest)3 Collectors (java.util.stream.Collectors)2 MinorType (org.apache.drill.common.types.TypeProtos.MinorType)2 CompoundProjectionFilter (org.apache.drill.exec.physical.resultSet.impl.ProjectionFilter.CompoundProjectionFilter)2 DirectProjectionFilter (org.apache.drill.exec.physical.resultSet.impl.ProjectionFilter.DirectProjectionFilter)2