use of org.apache.drill.common.expression.PathSegment in project drill by apache.
the class FieldIdUtil method getFieldIdIfMatches.
public static TypedFieldId getFieldIdIfMatches(ValueVector vector, TypedFieldId.Builder builder, boolean addToBreadCrumb, PathSegment seg) {
if (vector instanceof RepeatedMapVector && seg != null && seg.isArray() && !seg.isLastPath()) {
if (addToBreadCrumb) {
addToBreadCrumb = false;
builder.remainder(seg);
}
// skip the first array segment as there is no corresponding child vector.
seg = seg.getChild();
// segment.
if (seg.isArray()) {
return null;
}
}
if (seg == null) {
if (addToBreadCrumb) {
builder.intermediateType(vector.getField().getType());
}
return builder.finalType(vector.getField().getType()).build();
}
if (seg.isArray()) {
if (seg.isLastPath()) {
MajorType type;
if (vector instanceof AbstractContainerVector) {
type = ((AbstractContainerVector) vector).getLastPathType();
} else if (vector instanceof ListVector) {
type = ((ListVector) vector).getDataVector().getField().getType();
builder.listVector();
} else {
throw new UnsupportedOperationException("FieldIdUtil does not support vector of type " + vector.getField().getType());
}
//
builder.withIndex().finalType(type);
// only set remainder when it's the only array segment.
if (addToBreadCrumb) {
addToBreadCrumb = false;
builder.remainder(seg);
}
return builder.build();
} else {
if (addToBreadCrumb) {
addToBreadCrumb = false;
builder.remainder(seg);
}
}
} else {
if (vector instanceof ListVector) {
return null;
}
}
ValueVector v;
if (vector instanceof AbstractContainerVector) {
VectorWithOrdinal vord = ((AbstractContainerVector) vector).getChildVectorWithOrdinal(seg.isArray() ? null : seg.getNameSegment().getPath());
if (vord == null) {
return null;
}
v = vord.vector;
if (addToBreadCrumb) {
builder.intermediateType(v.getField().getType());
builder.addId(vord.ordinal);
}
} else if (vector instanceof ListVector) {
v = ((ListVector) vector).getDataVector();
} else {
throw new UnsupportedOperationException("FieldIdUtil does not support vector of type " + vector.getField().getType());
}
if (v instanceof AbstractContainerVector) {
// we're looking for a multi path.
AbstractContainerVector c = (AbstractContainerVector) v;
return getFieldIdIfMatches(c, builder, addToBreadCrumb, seg.getChild());
} else if (v instanceof ListVector) {
ListVector list = (ListVector) v;
return getFieldIdIfMatches(list, builder, addToBreadCrumb, seg.getChild());
} else if (v instanceof UnionVector) {
return getFieldIdIfMatchesUnion((UnionVector) v, builder, addToBreadCrumb, seg.getChild());
} else {
if (seg.isNamed()) {
if (addToBreadCrumb) {
builder.intermediateType(v.getField().getType());
}
builder.finalType(v.getField().getType());
} else {
builder.finalType(v.getField().getType().toBuilder().setMode(DataMode.OPTIONAL).build());
}
if (seg.isLastPath()) {
return builder.build();
} else {
PathSegment child = seg.getChild();
if (child.isLastPath() && child.isArray()) {
if (addToBreadCrumb) {
builder.remainder(child);
}
builder.withIndex();
builder.finalType(v.getField().getType().toBuilder().setMode(DataMode.OPTIONAL).build());
return builder.build();
} else {
logger.warn("You tried to request a complex type inside a scalar object or path or type is wrong.");
return null;
}
}
}
}
use of org.apache.drill.common.expression.PathSegment in project drill by apache.
the class JsonReader method ensureAtLeastOneField.
@SuppressWarnings("resource")
@Override
public void ensureAtLeastOneField(ComplexWriter writer) {
List<BaseWriter.MapWriter> writerList = Lists.newArrayList();
List<PathSegment> fieldPathList = Lists.newArrayList();
BitSet emptyStatus = new BitSet(columns.size());
// first pass: collect which fields are empty
for (int i = 0; i < columns.size(); i++) {
SchemaPath sp = columns.get(i);
PathSegment fieldPath = sp.getRootSegment();
BaseWriter.MapWriter fieldWriter = writer.rootAsMap();
while (fieldPath.getChild() != null && !fieldPath.getChild().isArray()) {
fieldWriter = fieldWriter.map(fieldPath.getNameSegment().getPath());
fieldPath = fieldPath.getChild();
}
writerList.add(fieldWriter);
fieldPathList.add(fieldPath);
if (fieldWriter.isEmptyMap()) {
emptyStatus.set(i, true);
}
if (i == 0 && !allTextMode) {
// avoid schema change exceptions by downstream operators.
break;
}
}
// so we rely on the emptyStatus.
for (int j = 0; j < fieldPathList.size(); j++) {
BaseWriter.MapWriter fieldWriter = writerList.get(j);
PathSegment fieldPath = fieldPathList.get(j);
if (emptyStatus.get(j)) {
if (allTextMode) {
fieldWriter.varChar(fieldPath.getNameSegment().getPath());
} else {
fieldWriter.integer(fieldPath.getNameSegment().getPath());
}
}
}
for (ListWriter field : emptyArrayWriters) {
// checks that array has not been initialized
if (field.getValueCapacity() == 0) {
if (allTextMode) {
field.varChar();
} else {
field.integer();
}
}
}
}
use of org.apache.drill.common.expression.PathSegment in project drill by apache.
the class DrillParquetReader method getProjection.
public static MessageType getProjection(MessageType schema, Collection<SchemaPath> columns, List<SchemaPath> columnsNotFound) {
MessageType projection = null;
String messageName = schema.getName();
List<ColumnDescriptor> schemaColumns = schema.getColumns();
// parquet type.union() seems to lose ConvertedType info when merging two columns that are the same type. This can
// happen when selecting two elements from an array. So to work around this, we use set of SchemaPath to avoid duplicates
// and then merge the types at the end
Set<SchemaPath> selectedSchemaPaths = Sets.newLinkedHashSet();
// get a list of modified columns which have the array elements removed from the schema path since parquet schema doesn't include array elements
List<SchemaPath> modifiedColumns = Lists.newLinkedList();
for (SchemaPath path : columns) {
List<String> segments = Lists.newArrayList();
PathSegment seg = path.getRootSegment();
do {
if (seg.isNamed()) {
segments.add(seg.getNameSegment().getPath());
}
} while ((seg = seg.getChild()) != null);
String[] pathSegments = new String[segments.size()];
segments.toArray(pathSegments);
SchemaPath modifiedSchemaPath = SchemaPath.getCompoundPath(pathSegments);
modifiedColumns.add(modifiedSchemaPath);
}
// convert the columns in the parquet schema to a list of SchemaPath columns so that they can be compared in case insensitive manner
// to the projection columns
List<SchemaPath> schemaPaths = Lists.newLinkedList();
for (ColumnDescriptor columnDescriptor : schemaColumns) {
String[] schemaColDesc = Arrays.copyOf(columnDescriptor.getPath(), columnDescriptor.getPath().length);
SchemaPath schemaPath = SchemaPath.getCompoundPath(schemaColDesc);
schemaPaths.add(schemaPath);
}
// loop through projection columns and add any columns that are missing from parquet schema to columnsNotFound list
for (SchemaPath columnPath : modifiedColumns) {
boolean notFound = true;
for (SchemaPath schemaPath : schemaPaths) {
if (schemaPath.contains(columnPath)) {
selectedSchemaPaths.add(schemaPath);
notFound = false;
}
}
if (notFound) {
columnsNotFound.add(columnPath);
}
}
// convert SchemaPaths from selectedSchemaPaths and convert to parquet type, and merge into projection schema
for (SchemaPath schemaPath : selectedSchemaPaths) {
List<String> segments = Lists.newArrayList();
PathSegment seg = schemaPath.getRootSegment();
do {
segments.add(seg.getNameSegment().getPath());
} while ((seg = seg.getChild()) != null);
String[] pathSegments = new String[segments.size()];
segments.toArray(pathSegments);
Type t = getType(pathSegments, 0, schema);
if (projection == null) {
projection = new MessageType(messageName, t);
} else {
projection = projection.union(new MessageType(messageName, t));
}
}
return projection;
}
use of org.apache.drill.common.expression.PathSegment in project drill by axbaretto.
the class ProjectionSetImpl method addSegment.
private void addSegment(NameSegment rootSegment) {
String rootKey = rootSegment.getPath().toLowerCase();
projection.add(rootKey);
PathSegment child = rootSegment.getChild();
if (child == null) {
return;
}
if (child.isArray()) {
// Ignore the [x] array suffix.
return;
}
ProjectionSetImpl map = mapProjections.get(rootKey);
if (map == null) {
map = new ProjectionSetImpl();
mapProjections.put(rootKey, map);
}
map.addSegment((NameSegment) child);
}
use of org.apache.drill.common.expression.PathSegment in project drill by axbaretto.
the class BsonRecordReader method ensureAtLeastOneField.
public void ensureAtLeastOneField(ComplexWriter writer) {
if (!atLeastOneWrite) {
// if we had no columns, create one empty one so we can return some data
// for count purposes.
SchemaPath sp = columns.get(0);
PathSegment root = sp.getRootSegment();
BaseWriter.MapWriter fieldWriter = writer.rootAsMap();
while (root.getChild() != null && !root.getChild().isArray()) {
fieldWriter = fieldWriter.map(root.getNameSegment().getPath());
root = root.getChild();
}
fieldWriter.integer(root.getNameSegment().getPath());
}
}
Aggregations