use of org.apache.drill.exec.record.metadata.TupleSchema in project drill by apache.
the class SchemaVisitor method visitSchema.
@Override
public TupleMetadata visitSchema(SchemaParser.SchemaContext ctx) {
TupleMetadata schema = ctx.columns() == null ? new TupleSchema() : visitColumns(ctx.columns());
if (ctx.property_values() != null) {
PropertiesVisitor propertiesVisitor = new PropertiesVisitor();
schema.setProperties(ctx.property_values().accept(propertiesVisitor));
}
return schema;
}
use of org.apache.drill.exec.record.metadata.TupleSchema in project drill by apache.
the class ConvertHiveMapRDBJsonScanToDrillMapRDBJsonScan method createNativeScanRel.
/**
* Helper method which creates a DrillScanRel with native Drill HiveScan.
*/
private DrillScanRel createNativeScanRel(DrillScanRel hiveScanRel, PlannerSettings settings) throws IOException {
RelDataTypeFactory typeFactory = hiveScanRel.getCluster().getTypeFactory();
HiveScan hiveScan = (HiveScan) hiveScanRel.getGroupScan();
HiveReadEntry hiveReadEntry = hiveScan.getHiveReadEntry();
Map<String, String> parameters = hiveReadEntry.getHiveTableWrapper().getParameters();
JsonScanSpec scanSpec = new JsonScanSpec(parameters.get(MAPRDB_TABLE_NAME), null, null);
List<SchemaPath> hiveScanCols = hiveScanRel.getColumns().stream().map(colNameSchemaPath -> replaceOverriddenSchemaPath(parameters, colNameSchemaPath)).collect(Collectors.toList());
// creates TupleMetadata based on Hive's schema (with optional data modes) to be used in the reader
// for the case when column type wasn't discovered
HiveToRelDataTypeConverter dataTypeConverter = new HiveToRelDataTypeConverter(typeFactory);
TupleMetadata schema = new TupleSchema();
hiveReadEntry.getTable().getColumnListsCache().getTableSchemaColumns().forEach(column -> schema.addColumn(HiveUtilities.getColumnMetadata(replaceOverriddenColumnId(parameters, column.getName()), dataTypeConverter.convertToNullableRelDataType(column))));
MapRDBFormatPluginConfig formatConfig = new MapRDBFormatPluginConfig();
formatConfig.readTimestampWithZoneOffset = settings.getOptions().getBoolean(ExecConstants.HIVE_READ_MAPRDB_JSON_TIMESTAMP_WITH_TIMEZONE_OFFSET);
formatConfig.allTextMode = settings.getOptions().getBoolean(ExecConstants.HIVE_MAPRDB_JSON_ALL_TEXT_MODE);
JsonTableGroupScan nativeMapRDBScan = new JsonTableGroupScan(hiveScan.getUserName(), hiveScan.getStoragePlugin(), // TODO: We should use Hive format plugins here, once it will be implemented. DRILL-6621
(MapRDBFormatPlugin) hiveScan.getStoragePlugin().getFormatPlugin(formatConfig), scanSpec, hiveScanCols, new MapRDBStatistics(), FileSystemMetadataProviderManager.getMetadataProviderForSchema(schema));
List<String> nativeScanColNames = hiveScanRel.getRowType().getFieldList().stream().map(field -> replaceOverriddenColumnId(parameters, field.getName())).collect(Collectors.toList());
List<RelDataType> nativeScanColTypes = hiveScanRel.getRowType().getFieldList().stream().map(RelDataTypeField::getType).collect(Collectors.toList());
RelDataType nativeScanRowType = typeFactory.createStructType(nativeScanColTypes, nativeScanColNames);
return new DrillScanRel(hiveScanRel.getCluster(), hiveScanRel.getTraitSet(), hiveScanRel.getTable(), nativeMapRDBScan, nativeScanRowType, hiveScanCols);
}
use of org.apache.drill.exec.record.metadata.TupleSchema in project drill by apache.
the class TestSchemaTrackerDefined method testEmptyProjectWithDefinedSchema.
@Test
public void testEmptyProjectWithDefinedSchema() {
// Simulate SELECT ...
// That is, project nothing, as for COUNT(*)
final ScanSchemaConfigBuilder builder = new ScanSchemaConfigBuilder().projection(RowSetTestUtils.projectNone());
builder.definedSchema(new TupleSchema());
final ScanSchemaTracker schemaTracker = builder.build();
assertTrue(schemaTracker instanceof SchemaBasedTracker);
assertTrue(schemaTracker.isResolved());
assertSame(ProjectionType.NONE, schemaTracker.projectionType());
}
use of org.apache.drill.exec.record.metadata.TupleSchema in project drill by axbaretto.
the class TestTupleSchema method testNonEmptyRootTuple.
/**
* Test the basics of a non-empty root tuple (i.e. a row) using a pair
* of primitive columns.
*/
@Test
public void testNonEmptyRootTuple() {
TupleMetadata root = new TupleSchema();
MaterializedField fieldA = SchemaBuilder.columnSchema("a", MinorType.INT, DataMode.REQUIRED);
ColumnMetadata colA = root.add(fieldA);
assertEquals(1, root.size());
assertFalse(root.isEmpty());
assertEquals(0, root.index("a"));
assertEquals(-1, root.index("b"));
assertTrue(fieldA.isEquivalent(root.column(0)));
assertTrue(fieldA.isEquivalent(root.column("a")));
assertTrue(fieldA.isEquivalent(root.column("A")));
assertSame(colA, root.metadata(0));
assertSame(colA, root.metadata("a"));
assertEquals("a", root.fullName(0));
assertEquals("a", root.fullName(colA));
try {
root.add(fieldA);
fail();
} catch (IllegalArgumentException e) {
// Expected
}
MaterializedField fieldB = SchemaBuilder.columnSchema("b", MinorType.VARCHAR, DataMode.OPTIONAL);
ColumnMetadata colB = MetadataUtils.fromField(fieldB);
int indexB = root.addColumn(colB);
assertEquals(1, indexB);
assertEquals(2, root.size());
assertFalse(root.isEmpty());
assertEquals(indexB, root.index("b"));
assertTrue(fieldB.isEquivalent(root.column(1)));
assertTrue(fieldB.isEquivalent(root.column("b")));
assertSame(colB, root.metadata(1));
assertSame(colB, root.metadata("b"));
assertEquals("b", root.fullName(1));
assertEquals("b", root.fullName(colB));
try {
root.add(fieldB);
fail();
} catch (IllegalArgumentException e) {
// Expected
}
List<MaterializedField> fieldList = root.toFieldList();
assertTrue(fieldA.isEquivalent(fieldList.get(0)));
assertTrue(fieldB.isEquivalent(fieldList.get(1)));
TupleMetadata emptyRoot = new TupleSchema();
assertFalse(emptyRoot.isEquivalent(root));
// Same schema: the tuples are equivalent
TupleMetadata root3 = new TupleSchema();
root3.add(fieldA);
root3.addColumn(colB);
assertTrue(root3.isEquivalent(root));
assertTrue(root.isEquivalent(root3));
// Same columns, different order. The tuples are not equivalent.
TupleMetadata root4 = new TupleSchema();
root4.addColumn(colB);
root4.add(fieldA);
assertFalse(root4.isEquivalent(root));
assertFalse(root.isEquivalent(root4));
// A tuple is equivalent to its copy.
assertTrue(root.isEquivalent(((TupleSchema) root).copy()));
// And it is equivalent to the round trip to a batch schema.
BatchSchema batchSchema = ((TupleSchema) root).toBatchSchema(SelectionVectorMode.NONE);
assertTrue(root.isEquivalent(MetadataUtils.fromFields(batchSchema)));
}
use of org.apache.drill.exec.record.metadata.TupleSchema in project drill by axbaretto.
the class TestTupleSchema method testMapTupleFromMetadata.
/**
* Test a complex map schema of the form:<br>
* a.`b.x`.`c.y`.d<br>
* in which columns "a", "b.x" and "c.y" are maps, "b.x" and "c.y" are names
* that contains dots, and d is primitive.
* Here we build up the schema using the metadata schema, and generate a
* materialized field from the metadata.
*/
@Test
public void testMapTupleFromMetadata() {
TupleMetadata root = new TupleSchema();
MaterializedField fieldA = SchemaBuilder.columnSchema("a", MinorType.MAP, DataMode.REQUIRED);
ColumnMetadata colA = root.add(fieldA);
TupleMetadata mapA = colA.mapSchema();
MaterializedField fieldB = SchemaBuilder.columnSchema("b.x", MinorType.MAP, DataMode.REQUIRED);
ColumnMetadata colB = mapA.add(fieldB);
TupleMetadata mapB = colB.mapSchema();
MaterializedField fieldC = SchemaBuilder.columnSchema("c.y", MinorType.MAP, DataMode.REQUIRED);
ColumnMetadata colC = mapB.add(fieldC);
TupleMetadata mapC = colC.mapSchema();
MaterializedField fieldD = SchemaBuilder.columnSchema("d", MinorType.VARCHAR, DataMode.REQUIRED);
ColumnMetadata colD = mapC.add(fieldD);
MaterializedField fieldE = SchemaBuilder.columnSchema("e", MinorType.INT, DataMode.REQUIRED);
ColumnMetadata colE = mapC.add(fieldE);
assertEquals(1, root.size());
assertEquals(1, mapA.size());
assertEquals(1, mapB.size());
assertEquals(2, mapC.size());
assertSame(colA, root.metadata("a"));
assertSame(colB, mapA.metadata("b.x"));
assertSame(colC, mapB.metadata("c.y"));
assertSame(colD, mapC.metadata("d"));
assertSame(colE, mapC.metadata("e"));
// The full name contains quoted names if the contain dots.
// This name is more for diagnostic than semantic purposes.
assertEquals("a", root.fullName(0));
assertEquals("a.`b.x`", mapA.fullName(0));
assertEquals("a.`b.x`.`c.y`", mapB.fullName(0));
assertEquals("a.`b.x`.`c.y`.d", mapC.fullName(0));
assertEquals("a.`b.x`.`c.y`.e", mapC.fullName(1));
assertEquals(1, colA.schema().getChildren().size());
assertEquals(1, colB.schema().getChildren().size());
assertEquals(2, colC.schema().getChildren().size());
// Yes, it is awful that MaterializedField does not provide indexed
// access to its children. That's one reason we have the TupleMetadata
// classes...
// Note that the metadata layer does not store the materialized field.
// (Doing so causes no end of synchronization problems.) So we test
// for equivalence, not sameness.
Iterator<MaterializedField> iterC = colC.schema().getChildren().iterator();
assertTrue(fieldD.isEquivalent(iterC.next()));
assertTrue(fieldE.isEquivalent(iterC.next()));
// Copying should be deep.
TupleMetadata root2 = ((TupleSchema) root).copy();
assertEquals(2, root2.metadata(0).mapSchema().metadata(0).mapSchema().metadata(0).mapSchema().size());
assert (root.isEquivalent(root2));
// Generate a materialized field and compare.
fieldA.addChild(fieldB);
fieldB.addChild(fieldC);
fieldC.addChild(fieldD);
fieldC.addChild(fieldE);
assertTrue(colA.schema().isEquivalent(fieldA));
}
Aggregations