use of org.apache.drill.common.expression.PathSegment in project drill by apache.
the class DrillParquetReader method convertSelectedColumnsToMessageType.
/**
* Convert SchemaPaths from selectedSchemaPaths and convert to parquet type, and merge into projection schema.
*
* @param schema Parquet file schema
* @param selectedSchemaPaths columns found in schema
* @return projection schema
*/
private static MessageType convertSelectedColumnsToMessageType(MessageType schema, Set<SchemaPath> selectedSchemaPaths) {
MessageType projection = null;
String messageName = schema.getName();
for (SchemaPath schemaPath : selectedSchemaPaths) {
List<String> segments = new ArrayList<>();
PathSegment seg = schemaPath.getRootSegment();
do {
segments.add(seg.getNameSegment().getPath());
} while ((seg = seg.getChild()) != null);
String[] pathSegments = new String[segments.size()];
segments.toArray(pathSegments);
Type t = getSegmentType(pathSegments, 0, schema);
if (projection == null) {
projection = new MessageType(messageName, t);
} else {
projection = projection.union(new MessageType(messageName, t));
}
}
return projection;
}
use of org.apache.drill.common.expression.PathSegment in project drill by apache.
the class PrelUtil method getColumns.
public static ProjectPushInfo getColumns(RelDataType rowType, List<RexNode> projects) {
final List<String> fieldNames = rowType.getFieldNames();
if (fieldNames.isEmpty()) {
return null;
}
RefFieldsVisitor v = new RefFieldsVisitor(rowType);
for (RexNode exp : projects) {
PathSegment segment = exp.accept(v);
v.addColumn(segment);
}
return v.getInfo();
}
use of org.apache.drill.common.expression.PathSegment in project drill by axbaretto.
the class DrillPushProjectIntoScanRule method getFieldsInformation.
private ProjectPushInfo getFieldsInformation(RelDataType rowType, List<RexNode> projects) {
ProjectFieldsVisitor fieldsVisitor = new ProjectFieldsVisitor(rowType);
for (RexNode exp : projects) {
PathSegment segment = exp.accept(fieldsVisitor);
fieldsVisitor.addField(segment);
}
return fieldsVisitor.getInfo();
}
use of org.apache.drill.common.expression.PathSegment in project drill by axbaretto.
the class JsonReaderUtils method ensureAtLeastOneField.
public static void ensureAtLeastOneField(BaseWriter.ComplexWriter writer, Collection<SchemaPath> columns, boolean allTextMode, List<BaseWriter.ListWriter> emptyArrayWriters) {
List<BaseWriter.MapWriter> writerList = Lists.newArrayList();
List<PathSegment> fieldPathList = Lists.newArrayList();
BitSet emptyStatus = new BitSet(columns.size());
int i = 0;
// first pass: collect which fields are empty
for (SchemaPath sp : columns) {
PathSegment fieldPath = sp.getRootSegment();
BaseWriter.MapWriter fieldWriter = writer.rootAsMap();
while (fieldPath.getChild() != null && !fieldPath.getChild().isArray()) {
fieldWriter = fieldWriter.map(fieldPath.getNameSegment().getPath());
fieldPath = fieldPath.getChild();
}
writerList.add(fieldWriter);
fieldPathList.add(fieldPath);
if (fieldWriter.isEmptyMap()) {
emptyStatus.set(i, true);
}
if (i == 0 && !allTextMode) {
// is necessary in order to avoid schema change exceptions by downstream operators.
break;
}
i++;
}
// independently, so we rely on the emptyStatus.
for (int j = 0; j < fieldPathList.size(); j++) {
BaseWriter.MapWriter fieldWriter = writerList.get(j);
PathSegment fieldPath = fieldPathList.get(j);
if (emptyStatus.get(j)) {
if (allTextMode) {
fieldWriter.varChar(fieldPath.getNameSegment().getPath());
} else {
fieldWriter.integer(fieldPath.getNameSegment().getPath());
}
}
}
for (BaseWriter.ListWriter field : emptyArrayWriters) {
// checks that array has not been initialized
if (field.getValueCapacity() == 0) {
if (allTextMode) {
field.varChar();
} else {
field.integer();
}
}
}
}
use of org.apache.drill.common.expression.PathSegment in project drill by axbaretto.
the class HBaseRecordReader method transformColumns.
/**
* Provides the projected columns information to the Hbase Scan instance. If the
* projected columns list contains a column family and also a column in the
* column family, only the column family is passed to the Scan instance.
*
* For example, if the projection list is {cf1, cf1.col1, cf2.col1} then we only
* pass {cf1, cf2.col1} to the Scan instance.
*
* @param columns collection of projected columns
* @return collection of projected column family names
*/
@Override
protected Collection<SchemaPath> transformColumns(Collection<SchemaPath> columns) {
Set<SchemaPath> transformed = Sets.newLinkedHashSet();
completeFamilies = Sets.newHashSet();
rowKeyOnly = true;
if (!isStarQuery()) {
for (SchemaPath column : columns) {
if (column.getRootSegment().getPath().equalsIgnoreCase(ROW_KEY)) {
transformed.add(ROW_KEY_PATH);
continue;
}
rowKeyOnly = false;
NameSegment root = column.getRootSegment();
byte[] family = root.getPath().getBytes();
transformed.add(SchemaPath.getSimplePath(root.getPath()));
PathSegment child = root.getChild();
if (child != null && child.isNamed()) {
byte[] qualifier = child.getNameSegment().getPath().getBytes();
hbaseScanColumnsOnly.addColumn(family, qualifier);
if (!completeFamilies.contains(root.getPath())) {
hbaseScan.addColumn(family, qualifier);
}
} else {
hbaseScan.addFamily(family);
completeFamilies.add(root.getPath());
}
}
/* if only the row key was requested, add a FirstKeyOnlyFilter to the scan
* to fetch only one KV from each row. If a filter is already part of this
* scan, add the FirstKeyOnlyFilter as the LAST filter of a MUST_PASS_ALL
* FilterList.
*/
if (rowKeyOnly) {
hbaseScan.setFilter(HBaseUtils.andFilterAtIndex(hbaseScan.getFilter(), HBaseUtils.LAST_FILTER, new FirstKeyOnlyFilter()));
}
} else {
rowKeyOnly = false;
transformed.add(ROW_KEY_PATH);
}
return transformed;
}
Aggregations