use of org.apache.drill.exec.record.MaterializedField in project drill by apache.
the class ParquetRecordWriter method newSchema.
private void newSchema() throws IOException {
List<Type> types = Lists.newArrayList();
for (MaterializedField field : batchSchema) {
if (field.getPath().equalsIgnoreCase(WriterPrel.PARTITION_COMPARATOR_FIELD)) {
continue;
}
types.add(getType(field));
}
schema = new MessageType("root", types);
int initialBlockBufferSize = max(MINIMUM_BUFFER_SIZE, blockSize / this.schema.getColumns().size() / 5);
pageStore = ColumnChunkPageWriteStoreExposer.newColumnChunkPageWriteStore(this.oContext, codecFactory.getCompressor(codec), schema);
int initialPageBufferSize = max(MINIMUM_BUFFER_SIZE, min(pageSize + pageSize / 10, initialBlockBufferSize));
store = new ColumnWriteStoreV1(pageStore, pageSize, initialPageBufferSize, enableDictionary, writerVersion, new ParquetDirectByteBufferAllocator(oContext));
MessageColumnIO columnIO = new ColumnIOFactory(false).getColumnIO(this.schema);
consumer = columnIO.getRecordWriter(store);
setUp(schema, consumer);
}
use of org.apache.drill.exec.record.MaterializedField in project drill by apache.
the class ParquetRecordWriter method getType.
private Type getType(MaterializedField field) {
MinorType minorType = field.getType().getMinorType();
DataMode dataMode = field.getType().getMode();
switch(minorType) {
case MAP:
List<Type> types = Lists.newArrayList();
for (MaterializedField childField : field.getChildren()) {
types.add(getType(childField));
}
return new GroupType(dataMode == DataMode.REPEATED ? Repetition.REPEATED : Repetition.OPTIONAL, field.getLastName(), types);
case LIST:
throw new UnsupportedOperationException("Unsupported type " + minorType);
default:
return getPrimitiveType(field);
}
}
use of org.apache.drill.exec.record.MaterializedField in project drill by apache.
the class VectorUtil method showVectorAccessibleContent.
public static void showVectorAccessibleContent(VectorAccessible va, int[] columnWidths) {
int width = 0;
int columnIndex = 0;
List<String> columns = Lists.newArrayList();
List<String> formats = Lists.newArrayList();
for (VectorWrapper<?> vw : va) {
int columnWidth = getColumnWidth(columnWidths, columnIndex);
width += columnWidth + 2;
formats.add("| %-" + columnWidth + "s");
MaterializedField field = vw.getValueVector().getField();
columns.add(field.getPath() + "<" + field.getType().getMinorType() + "(" + field.getType().getMode() + ")" + ">");
columnIndex++;
}
int rows = va.getRecordCount();
System.out.println(rows + " row(s):");
for (int row = 0; row < rows; row++) {
// header, every 50 rows.
if (row % 50 == 0) {
System.out.println(StringUtils.repeat("-", width + 1));
columnIndex = 0;
for (String column : columns) {
int columnWidth = getColumnWidth(columnWidths, columnIndex);
System.out.printf(formats.get(columnIndex), column.length() <= columnWidth ? column : column.substring(0, columnWidth - 1));
columnIndex++;
}
System.out.printf("|\n");
System.out.println(StringUtils.repeat("-", width + 1));
}
// column values
columnIndex = 0;
for (VectorWrapper<?> vw : va) {
int columnWidth = getColumnWidth(columnWidths, columnIndex);
Object o = vw.getValueVector().getAccessor().getObject(row);
String cellString;
if (o instanceof byte[]) {
cellString = DrillStringUtils.toBinaryString((byte[]) o);
} else {
cellString = DrillStringUtils.escapeNewLines(String.valueOf(o));
}
System.out.printf(formats.get(columnIndex), cellString.length() <= columnWidth ? cellString : cellString.substring(0, columnWidth - 1));
columnIndex++;
}
System.out.printf("|\n");
}
if (rows > 0) {
System.out.println(StringUtils.repeat("-", width + 1));
}
for (VectorWrapper<?> vw : va) {
vw.clear();
}
}
use of org.apache.drill.exec.record.MaterializedField in project drill by apache.
the class HBaseRecordReader method setup.
@Override
public void setup(OperatorContext context, OutputMutator output) throws ExecutionSetupException {
this.operatorContext = context;
this.outputMutator = output;
familyVectorMap = new HashMap<>();
try {
hTable = connection.getTable(hbaseTableName);
// when creating reader (order of first appearance in query).
for (SchemaPath column : getColumns()) {
if (column.equals(ROW_KEY_PATH)) {
MaterializedField field = MaterializedField.create(column.getAsNamePart().getName(), ROW_KEY_TYPE);
rowKeyVector = outputMutator.addField(field, VarBinaryVector.class);
} else {
getOrCreateFamilyVector(column.getRootSegment().getPath(), false);
}
}
// Add map and child vectors for any HBase column families and/or HBase
// columns that are requested (in order to avoid later creation of dummy
// NullableIntVectors for them).
final Set<Map.Entry<byte[], NavigableSet<byte[]>>> familiesEntries = hbaseScan.getFamilyMap().entrySet();
for (Map.Entry<byte[], NavigableSet<byte[]>> familyEntry : familiesEntries) {
final String familyName = new String(familyEntry.getKey(), StandardCharsets.UTF_8);
final MapVector familyVector = getOrCreateFamilyVector(familyName, false);
final Set<byte[]> children = familyEntry.getValue();
if (null != children) {
for (byte[] childNameBytes : children) {
final String childName = new String(childNameBytes, StandardCharsets.UTF_8);
getOrCreateColumnVector(familyVector, childName);
}
}
}
resultScanner = hTable.getScanner(hbaseScan);
} catch (SchemaChangeException | IOException e) {
throw new ExecutionSetupException(e);
}
}
use of org.apache.drill.exec.record.MaterializedField in project drill by apache.
the class HBaseRecordReader method getOrCreateFamilyVector.
private MapVector getOrCreateFamilyVector(String familyName, boolean allocateOnCreate) {
try {
MapVector v = familyVectorMap.get(familyName);
if (v == null) {
SchemaPath column = SchemaPath.getSimplePath(familyName);
MaterializedField field = MaterializedField.create(column.getAsNamePart().getName(), COLUMN_FAMILY_TYPE);
v = outputMutator.addField(field, MapVector.class);
if (allocateOnCreate) {
v.allocateNew();
}
getColumns().add(column);
familyVectorMap.put(familyName, v);
}
return v;
} catch (SchemaChangeException e) {
throw new DrillRuntimeException(e);
}
}
Aggregations