use of org.apache.drill.exec.vector.complex.writer.BaseWriter.ListWriter in project drill by apache.
the class JsonReader method ensureAtLeastOneField.
@SuppressWarnings("resource")
@Override
public void ensureAtLeastOneField(ComplexWriter writer) {
List<BaseWriter.MapWriter> writerList = Lists.newArrayList();
List<PathSegment> fieldPathList = Lists.newArrayList();
BitSet emptyStatus = new BitSet(columns.size());
// first pass: collect which fields are empty
for (int i = 0; i < columns.size(); i++) {
SchemaPath sp = columns.get(i);
PathSegment fieldPath = sp.getRootSegment();
BaseWriter.MapWriter fieldWriter = writer.rootAsMap();
while (fieldPath.getChild() != null && !fieldPath.getChild().isArray()) {
fieldWriter = fieldWriter.map(fieldPath.getNameSegment().getPath());
fieldPath = fieldPath.getChild();
}
writerList.add(fieldWriter);
fieldPathList.add(fieldPath);
if (fieldWriter.isEmptyMap()) {
emptyStatus.set(i, true);
}
if (i == 0 && !allTextMode) {
// avoid schema change exceptions by downstream operators.
break;
}
}
// so we rely on the emptyStatus.
for (int j = 0; j < fieldPathList.size(); j++) {
BaseWriter.MapWriter fieldWriter = writerList.get(j);
PathSegment fieldPath = fieldPathList.get(j);
if (emptyStatus.get(j)) {
if (allTextMode) {
fieldWriter.varChar(fieldPath.getNameSegment().getPath());
} else {
fieldWriter.integer(fieldPath.getNameSegment().getPath());
}
}
}
for (ListWriter field : emptyArrayWriters) {
// checks that array has not been initialized
if (field.getValueCapacity() == 0) {
if (allTextMode) {
field.varChar();
} else {
field.integer();
}
}
}
}
use of org.apache.drill.exec.vector.complex.writer.BaseWriter.ListWriter in project drill by apache.
the class TestScanBatchWriters method sanityTest.
@Test
public void sanityTest() throws Exception {
Scan scanConfig = new AbstractSubScan("bob") {
@Override
public String getOperatorType() {
return "";
}
};
OperatorContext opContext = fixture.newOperatorContext(scanConfig);
// Setup: normally done by ScanBatch
VectorContainer container = new VectorContainer(fixture.allocator());
OutputMutator output = new ScanBatch.Mutator(opContext, fixture.allocator(), container);
DrillBuf buffer = opContext.getManagedBuffer();
try (VectorContainerWriter writer = new VectorContainerWriter(output)) {
// Per-batch
writer.allocate();
writer.reset();
BaseWriter.MapWriter map = writer.rootAsMap();
// Write one record (10, "Fred", [100, 110, 120] )
map.integer("a").writeInt(10);
byte[] bytes = "Fred".getBytes("UTF-8");
buffer.setBytes(0, bytes, 0, bytes.length);
map.varChar("b").writeVarChar(0, bytes.length, buffer);
try (ListWriter list = map.list("c")) {
list.startList();
list.integer().writeInt(100);
list.integer().writeInt(110);
list.integer().writeInt(120);
list.endList();
// Write another record: (20, "Wilma", [])
writer.setPosition(1);
map.integer("a").writeInt(20);
bytes = "Wilma".getBytes("UTF-8");
buffer.setBytes(0, bytes, 0, bytes.length);
map.varChar("b").writeVarChar(0, bytes.length, buffer);
writer.setValueCount(2);
// Wrap-up done by ScanBatch
container.setRecordCount(2);
container.buildSchema(SelectionVectorMode.NONE);
RowSet rowSet = fixture.wrap(container);
// Expected
TupleMetadata schema = new SchemaBuilder().addNullable("a", MinorType.INT).addNullable("b", MinorType.VARCHAR).addArray("c", MinorType.INT).buildSchema();
RowSet expected = fixture.rowSetBuilder(schema).addRow(10, "Fred", new int[] { 100, 110, 120 }).addRow(20, "Wilma", null).build();
new RowSetComparison(expected).verifyAndClearAll(rowSet);
}
} finally {
opContext.close();
}
}
use of org.apache.drill.exec.vector.complex.writer.BaseWriter.ListWriter in project drill by apache.
the class DrillParquetGroupConverter method getConverterForType.
protected PrimitiveConverter getConverterForType(String name, PrimitiveType type) {
switch(type.getPrimitiveTypeName()) {
case INT32:
{
if (type.getOriginalType() == null) {
return getIntConverter(name, type);
}
switch(type.getOriginalType()) {
case UINT_8:
case UINT_16:
case UINT_32:
case INT_8:
case INT_16:
case INT_32:
{
return getIntConverter(name, type);
}
case DECIMAL:
{
ParquetReaderUtility.checkDecimalTypeEnabled(options);
return getVarDecimalConverter(name, type);
}
case DATE:
{
DateWriter writer = type.isRepetition(Repetition.REPEATED) ? getWriter(name, (m, f) -> m.list(f).date(), l -> l.list().date()) : getWriter(name, (m, f) -> m.date(f), l -> l.date());
switch(containsCorruptedDates) {
case META_SHOWS_CORRUPTION:
return new DrillCorruptedDateConverter(writer);
case META_SHOWS_NO_CORRUPTION:
return new DrillDateConverter(writer);
case META_UNCLEAR_TEST_VALUES:
return new CorruptionDetectingDateConverter(writer);
default:
throw new DrillRuntimeException(String.format("Issue setting up parquet reader for date type, " + "unrecognized date corruption status %s. See DRILL-4203 for more info.", containsCorruptedDates));
}
}
case TIME_MILLIS:
{
TimeWriter writer = type.isRepetition(Repetition.REPEATED) ? getWriter(name, (m, f) -> m.list(f).time(), l -> l.list().time()) : getWriter(name, (m, f) -> m.time(f), l -> l.time());
return new DrillTimeConverter(writer);
}
default:
{
throw new UnsupportedOperationException("Unsupported type: " + type.getOriginalType());
}
}
}
case INT64:
{
if (type.getOriginalType() == null) {
return getBigIntConverter(name, type);
}
switch(type.getOriginalType()) {
case UINT_64:
case INT_64:
return getBigIntConverter(name, type);
case TIMESTAMP_MICROS:
{
TimeStampWriter writer = getTimeStampWriter(name, type);
return new DrillTimeStampMicrosConverter(writer);
}
case TIME_MICROS:
{
TimeWriter writer = type.isRepetition(Repetition.REPEATED) ? getWriter(name, (m, f) -> m.list(f).time(), l -> l.list().time()) : getWriter(name, MapWriter::time, ListWriter::time);
return new DrillTimeMicrosConverter(writer);
}
case DECIMAL:
{
ParquetReaderUtility.checkDecimalTypeEnabled(options);
return getVarDecimalConverter(name, type);
}
case TIMESTAMP_MILLIS:
{
TimeStampWriter writer = getTimeStampWriter(name, type);
return new DrillTimeStampConverter(writer);
}
default:
{
throw new UnsupportedOperationException("Unsupported type " + type.getOriginalType());
}
}
}
case INT96:
{
// TODO: replace null with TIMESTAMP_NANOS once parquet support such type annotation.
if (type.getOriginalType() == null) {
if (options.getOption(ExecConstants.PARQUET_READER_INT96_AS_TIMESTAMP).bool_val) {
TimeStampWriter writer = getTimeStampWriter(name, type);
return new DrillFixedBinaryToTimeStampConverter(writer);
} else {
VarBinaryWriter writer = type.isRepetition(Repetition.REPEATED) ? getWriter(name, (m, f) -> m.list(f).varBinary(), l -> l.list().varBinary()) : getWriter(name, (m, f) -> m.varBinary(f), listWriter -> listWriter.varBinary());
return new DrillFixedBinaryToVarbinaryConverter(writer, ParquetColumnMetadata.getTypeLengthInBits(type.getPrimitiveTypeName()) / 8, mutator.getManagedBuffer());
}
}
}
case FLOAT:
{
Float4Writer writer = type.isRepetition(Repetition.REPEATED) ? getWriter(name, (m, f) -> m.list(f).float4(), l -> l.list().float4()) : getWriter(name, (m, f) -> m.float4(f), l -> l.float4());
return new DrillFloat4Converter(writer);
}
case DOUBLE:
{
Float8Writer writer = type.isRepetition(Repetition.REPEATED) ? getWriter(name, (m, f) -> m.list(f).float8(), l -> l.list().float8()) : getWriter(name, (m, f) -> m.float8(f), l -> l.float8());
return new DrillFloat8Converter(writer);
}
case BOOLEAN:
{
BitWriter writer = type.isRepetition(Repetition.REPEATED) ? getWriter(name, (m, f) -> m.list(f).bit(), l -> l.list().bit()) : getWriter(name, (m, f) -> m.bit(f), l -> l.bit());
return new DrillBoolConverter(writer);
}
case BINARY:
{
LogicalTypeAnnotation.LogicalTypeAnnotationVisitor<PrimitiveConverter> typeAnnotationVisitor = new LogicalTypeAnnotation.LogicalTypeAnnotationVisitor<PrimitiveConverter>() {
@Override
public Optional<PrimitiveConverter> visit(LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimalLogicalType) {
ParquetReaderUtility.checkDecimalTypeEnabled(options);
return Optional.of(getVarDecimalConverter(name, type));
}
@Override
public Optional<PrimitiveConverter> visit(LogicalTypeAnnotation.StringLogicalTypeAnnotation stringLogicalType) {
return Optional.of(getVarCharConverter(name, type));
}
@Override
public Optional<PrimitiveConverter> visit(LogicalTypeAnnotation.EnumLogicalTypeAnnotation stringLogicalType) {
return Optional.of(getVarCharConverter(name, type));
}
};
Supplier<PrimitiveConverter> converterSupplier = () -> {
VarBinaryWriter writer = type.isRepetition(Repetition.REPEATED) ? getWriter(name, (m, f) -> m.list(f).varBinary(), l -> l.list().varBinary()) : getWriter(name, MapWriter::varBinary, ListWriter::varBinary);
return new DrillVarBinaryConverter(writer, mutator.getManagedBuffer());
};
return Optional.ofNullable(type.getLogicalTypeAnnotation()).map(typeAnnotation -> typeAnnotation.accept(typeAnnotationVisitor)).flatMap(Function.identity()).orElseGet(converterSupplier);
}
case FIXED_LEN_BYTE_ARRAY:
LogicalTypeAnnotation.LogicalTypeAnnotationVisitor<PrimitiveConverter> typeAnnotationVisitor = new LogicalTypeAnnotation.LogicalTypeAnnotationVisitor<PrimitiveConverter>() {
@Override
public Optional<PrimitiveConverter> visit(LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimalLogicalType) {
ParquetReaderUtility.checkDecimalTypeEnabled(options);
return Optional.of(getVarDecimalConverter(name, type));
}
@Override
public Optional<PrimitiveConverter> visit(LogicalTypeAnnotation.IntervalLogicalTypeAnnotation intervalLogicalType) {
IntervalWriter writer = type.isRepetition(Repetition.REPEATED) ? getWriter(name, (m, f) -> m.list(f).interval(), l -> l.list().interval()) : getWriter(name, MapWriter::interval, ListWriter::interval);
return Optional.of(new DrillFixedLengthByteArrayToInterval(writer));
}
};
Supplier<PrimitiveConverter> converterSupplier = () -> {
VarBinaryWriter writer = type.isRepetition(Repetition.REPEATED) ? getWriter(name, (m, f) -> m.list(f).varBinary(), l -> l.list().varBinary()) : getWriter(name, MapWriter::varBinary, ListWriter::varBinary);
return new DrillFixedBinaryToVarbinaryConverter(writer, type.getTypeLength(), mutator.getManagedBuffer());
};
return Optional.ofNullable(type.getLogicalTypeAnnotation()).map(typeAnnotation -> typeAnnotation.accept(typeAnnotationVisitor)).flatMap(Function.identity()).orElseGet(converterSupplier);
default:
throw new UnsupportedOperationException("Unsupported type: " + type.getPrimitiveTypeName());
}
}
use of org.apache.drill.exec.vector.complex.writer.BaseWriter.ListWriter in project drill by apache.
the class DrillParquetGroupConverter method createFieldConverter.
private Converter createFieldConverter(boolean skipRepeated, Type fieldType, String name, PathSegment colNextChild) {
Converter converter;
if (fieldType.isPrimitive()) {
converter = getConverterForType(name, fieldType.asPrimitiveType());
} else {
while (colNextChild != null && !colNextChild.isNamed()) {
colNextChild = colNextChild.getChild();
}
Collection<SchemaPath> columns = colNextChild == null ? Collections.emptyList() : Collections.singletonList(new SchemaPath(colNextChild.getNameSegment()));
BaseWriter writer;
GroupType fieldGroupType = fieldType.asGroupType();
if (ParquetReaderUtility.isLogicalListType(fieldGroupType)) {
writer = getWriter(name, MapWriter::list, ListWriter::list);
converter = new DrillParquetGroupConverter(mutator, writer, fieldGroupType, columns, options, containsCorruptedDates, true, converterName);
} else if (options.getOption(ExecConstants.PARQUET_READER_ENABLE_MAP_SUPPORT_VALIDATOR) && ParquetReaderUtility.isLogicalMapType(fieldGroupType)) {
writer = getWriter(name, MapWriter::dict, ListWriter::dict);
converter = new DrillParquetMapGroupConverter(mutator, (DictWriter) writer, fieldGroupType, options, containsCorruptedDates);
} else if (fieldType.isRepetition(Repetition.REPEATED)) {
if (skipRepeated) {
converter = new DrillIntermediateParquetGroupConverter(mutator, baseWriter, fieldGroupType, columns, options, containsCorruptedDates, false, converterName);
} else {
writer = getWriter(name, (m, s) -> m.list(s).map(), l -> l.list().map());
converter = new DrillParquetGroupConverter(mutator, writer, fieldGroupType, columns, options, containsCorruptedDates, false, converterName);
}
} else {
writer = getWriter(name, MapWriter::map, ListWriter::map);
converter = new DrillParquetGroupConverter(mutator, writer, fieldGroupType, columns, options, containsCorruptedDates, false, converterName);
}
}
return converter;
}
use of org.apache.drill.exec.vector.complex.writer.BaseWriter.ListWriter in project drill by apache.
the class TestRepeated method listOfList.
//
// @Test
// public void repeatedMap() {
//
// /**
// * We're going to try to create an object that looks like:
// *
// * {
// * a: [
// * {x: 1, y: 2}
// * {x: 2, y: 1}
// * ]
// * }
// *
// */
// MapVector v = new MapVector("", allocator);
// ComplexWriter writer = new ComplexWriterImpl("col", v);
//
// MapWriter map = writer.rootAsMap();
//
// map.start();
// ListWriter list = map.list("a");
// MapWriter inner = list.map();
//
// IntHolder holder = new IntHolder();
// IntWriter xCol = inner.integer("x");
// IntWriter yCol = inner.integer("y");
//
// inner.start();
//
// holder.value = 1;
// xCol.write(holder);
// holder.value = 2;
// yCol.write(holder);
//
// inner.end();
//
// inner.start();
//
// holder.value = 2;
// xCol.write(holder);
// holder.value = 1;
// yCol.write(holder);
//
// inner.end();
//
// IntWriter numCol = map.integer("nums");
// holder.value = 14;
// numCol.write(holder);
//
// map.end();
//
//
// assertTrue(writer.ok());
//
// }
@Test
public void listOfList() throws Exception {
/**
* We're going to try to create an object that looks like:
*
* {
* a: [
* [1,2,3],
* [2,3,4]
* ],
* nums: 14,
* b: [
* { c: 1 },
* { c: 2 , x: 15}
* ]
* }
*/
final MapVector mapVector = new MapVector("", allocator, null);
final ComplexWriterImpl writer = new ComplexWriterImpl("col", mapVector);
writer.allocate();
{
final MapWriter map = writer.rootAsMap();
final ListWriter list = map.list("a");
list.startList();
final ListWriter innerList = list.list();
final IntWriter innerInt = innerList.integer();
innerList.startList();
final IntHolder holder = new IntHolder();
holder.value = 1;
innerInt.write(holder);
holder.value = 2;
innerInt.write(holder);
holder.value = 3;
innerInt.write(holder);
innerList.endList();
innerList.startList();
holder.value = 4;
innerInt.write(holder);
holder.value = 5;
innerInt.write(holder);
innerList.endList();
list.endList();
final IntWriter numCol = map.integer("nums");
holder.value = 14;
numCol.write(holder);
final MapWriter repeatedMap = map.list("b").map();
repeatedMap.start();
holder.value = 1;
repeatedMap.integer("c").write(holder);
repeatedMap.end();
repeatedMap.start();
holder.value = 2;
repeatedMap.integer("c").write(holder);
final BigIntHolder h = new BigIntHolder();
h.value = 15;
repeatedMap.bigInt("x").write(h);
repeatedMap.end();
map.end();
}
{
writer.setPosition(1);
final MapWriter map = writer.rootAsMap();
final ListWriter list = map.list("a");
list.startList();
final ListWriter innerList = list.list();
final IntWriter innerInt = innerList.integer();
innerList.startList();
final IntHolder holder = new IntHolder();
holder.value = -1;
innerInt.write(holder);
holder.value = -2;
innerInt.write(holder);
holder.value = -3;
innerInt.write(holder);
innerList.endList();
innerList.startList();
holder.value = -4;
innerInt.write(holder);
holder.value = -5;
innerInt.write(holder);
innerList.endList();
list.endList();
final IntWriter numCol = map.integer("nums");
holder.value = -28;
numCol.write(holder);
final MapWriter repeatedMap = map.list("b").map();
repeatedMap.start();
holder.value = -1;
repeatedMap.integer("c").write(holder);
repeatedMap.end();
repeatedMap.start();
holder.value = -2;
repeatedMap.integer("c").write(holder);
final BigIntHolder h = new BigIntHolder();
h.value = -30;
repeatedMap.bigInt("x").write(h);
repeatedMap.end();
map.end();
}
final ObjectWriter ow = new ObjectMapper().writer().withDefaultPrettyPrinter();
final ByteArrayOutputStream stream = new ByteArrayOutputStream();
final JsonWriter jsonWriter = new JsonWriter(stream, true, true);
final FieldReader reader = mapVector.getChild("col", MapVector.class).getReader();
reader.setPosition(0);
jsonWriter.write(reader);
reader.setPosition(1);
jsonWriter.write(reader);
writer.close();
}
Aggregations