Search in sources :

Example 1 with MapOrListWriterImpl

use of org.apache.drill.exec.vector.complex.impl.MapOrListWriterImpl in project drill by axbaretto.

the class MaprDBJsonRecordReader method next.

@Override
public int next() {
    Stopwatch watch = Stopwatch.createUnstarted();
    watch.start();
    vectorWriter.allocate();
    vectorWriter.reset();
    int recordCount = 0;
    DBDocumentReaderBase reader = null;
    while (recordCount < BaseValueVector.INITIAL_VALUE_ALLOCATION) {
        vectorWriter.setPosition(recordCount);
        try {
            reader = nextDocumentReader();
            if (reader == null) {
                // no more documents for this scanner
                break;
            } else if (isSkipQuery()) {
                vectorWriter.rootAsMap().bit("count").writeBit(1);
            } else {
                MapOrListWriterImpl writer = new MapOrListWriterImpl(vectorWriter.rootAsMap());
                if (idOnly) {
                    writeId(writer, reader.getId());
                } else {
                    if (reader.next() != EventType.START_MAP) {
                        throw dataReadError("The document did not start with START_MAP!");
                    }
                    writeToListOrMap(writer, reader);
                }
            }
            recordCount++;
        } catch (UserException e) {
            throw UserException.unsupportedError(e).addContext(String.format("Table: %s, document id: '%s'", table.getPath(), reader == null ? null : IdCodec.asString(reader.getId()))).build(logger);
        } catch (SchemaChangeException e) {
            if (ignoreSchemaChange) {
                logger.warn("{}. Dropping the row from result.", e.getMessage());
                logger.debug("Stack trace:", e);
            } else {
                throw dataReadError(e);
            }
        }
    }
    if (nonExistentColumnsProjection && recordCount > 0) {
        JsonReaderUtils.ensureAtLeastOneField(vectorWriter, getColumns(), allTextMode, Collections.EMPTY_LIST);
    }
    vectorWriter.setValueCount(recordCount);
    logger.debug("Took {} ms to get {} records", watch.elapsed(TimeUnit.MILLISECONDS), recordCount);
    return recordCount;
}
Also used : SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) DBDocumentReaderBase(com.mapr.db.ojai.DBDocumentReaderBase) MapOrListWriterImpl(org.apache.drill.exec.vector.complex.impl.MapOrListWriterImpl) Stopwatch(com.google.common.base.Stopwatch) UserException(org.apache.drill.common.exceptions.UserException)

Example 2 with MapOrListWriterImpl

use of org.apache.drill.exec.vector.complex.impl.MapOrListWriterImpl in project drill by axbaretto.

the class AvroRecordReader method process.

private void process(final Object value, final Schema schema, final String fieldName, MapOrListWriterImpl writer, FieldSelection fieldSelection) {
    if (value == null) {
        return;
    }
    final Schema.Type type = schema.getType();
    switch(type) {
        case RECORD:
            // list field of MapOrListWriter will be non null when we want to store array of maps/records.
            MapOrListWriterImpl _writer = writer;
            for (final Schema.Field field : schema.getFields()) {
                if (field.schema().getType() == Schema.Type.RECORD || (field.schema().getType() == Schema.Type.UNION && field.schema().getTypes().get(0).getType() == Schema.Type.NULL && field.schema().getTypes().get(1).getType() == Schema.Type.RECORD)) {
                    _writer = (MapOrListWriterImpl) writer.map(field.name());
                }
                process(((GenericRecord) value).get(field.name()), field.schema(), field.name(), _writer, fieldSelection.getChild(field.name()));
            }
            break;
        case ARRAY:
            assert fieldName != null;
            final GenericArray<?> array = (GenericArray<?>) value;
            Schema elementSchema = array.getSchema().getElementType();
            Type elementType = elementSchema.getType();
            if (elementType == Schema.Type.RECORD || elementType == Schema.Type.MAP) {
                writer = (MapOrListWriterImpl) writer.list(fieldName).listoftmap(fieldName);
            } else {
                writer = (MapOrListWriterImpl) writer.list(fieldName);
            }
            for (final Object o : array) {
                writer.start();
                process(o, elementSchema, fieldName, writer, fieldSelection.getChild(fieldName));
                writer.end();
            }
            break;
        case UNION:
            // currently supporting only nullable union (optional fields) like ["null", "some-type"].
            if (schema.getTypes().get(0).getType() != Schema.Type.NULL) {
                throw new UnsupportedOperationException("Avro union type must be of the format : [\"null\", \"some-type\"]");
            }
            process(value, schema.getTypes().get(1), fieldName, writer, fieldSelection);
            break;
        case MAP:
            @SuppressWarnings("unchecked") final HashMap<Object, Object> map = (HashMap<Object, Object>) value;
            Schema valueSchema = schema.getValueType();
            writer = (MapOrListWriterImpl) writer.map(fieldName);
            writer.start();
            for (Entry<Object, Object> entry : map.entrySet()) {
                process(entry.getValue(), valueSchema, entry.getKey().toString(), writer, fieldSelection.getChild(entry.getKey().toString()));
            }
            writer.end();
            break;
        case FIXED:
            throw new UnsupportedOperationException("Unimplemented type: " + type.toString());
        // Enum symbols are strings
        case ENUM:
        // Treat null type as a primitive
        case NULL:
        default:
            assert fieldName != null;
            if (writer.isMapWriter()) {
                if (fieldSelection.isNeverValid()) {
                    break;
                }
            }
            processPrimitive(value, schema.getType(), fieldName, writer);
            break;
    }
}
Also used : MapOrListWriterImpl(org.apache.drill.exec.vector.complex.impl.MapOrListWriterImpl) HashMap(java.util.HashMap) Schema(org.apache.avro.Schema) Type(org.apache.avro.Schema.Type) Type(org.apache.avro.Schema.Type) GenericArray(org.apache.avro.generic.GenericArray)

Example 3 with MapOrListWriterImpl

use of org.apache.drill.exec.vector.complex.impl.MapOrListWriterImpl in project drill by apache.

the class BsonRecordReader method writeToListOrMap.

private void writeToListOrMap(BsonReader reader, final MapOrListWriterImpl writer, boolean isList, String fieldName) {
    writer.start();
    // writing
    while (reader.readBsonType() != BsonType.END_OF_DOCUMENT) {
        if (!isList) {
            fieldName = reader.readName();
        }
        BsonType currentBsonType = reader.getCurrentBsonType();
        switch(currentBsonType) {
            case INT32:
                int readInt32 = reader.readInt32();
                if (readNumbersAsDouble) {
                    writeDouble(readInt32, writer, fieldName, isList);
                } else {
                    writeInt32(readInt32, writer, fieldName, isList);
                }
                atLeastOneWrite = true;
                break;
            case INT64:
                long readInt64 = reader.readInt64();
                if (readNumbersAsDouble) {
                    writeDouble(readInt64, writer, fieldName, isList);
                } else {
                    writeInt64(readInt64, writer, fieldName, isList);
                }
                atLeastOneWrite = true;
                break;
            case ARRAY:
                reader.readStartArray();
                writeToListOrMap(reader, (MapOrListWriterImpl) writer.list(fieldName), true, fieldName);
                atLeastOneWrite = true;
                break;
            case BINARY:
                // handle types
                writeBinary(reader, writer, fieldName, isList);
                atLeastOneWrite = true;
                break;
            case BOOLEAN:
                boolean readBoolean = reader.readBoolean();
                writeBoolean(readBoolean, writer, fieldName, isList);
                atLeastOneWrite = true;
                break;
            case DATE_TIME:
                long readDateTime = reader.readDateTime();
                writeDateTime(readDateTime, writer, fieldName, isList);
                atLeastOneWrite = true;
                break;
            case DOCUMENT:
                reader.readStartDocument();
                // To handle nested Documents.
                MapOrListWriterImpl _writer = writer;
                if (!isList) {
                    _writer = (MapOrListWriterImpl) writer.map(fieldName);
                } else {
                    _writer = (MapOrListWriterImpl) writer.listoftmap(fieldName);
                }
                writeToListOrMap(reader, _writer, false, fieldName);
                atLeastOneWrite = true;
                break;
            case DOUBLE:
                double readDouble = reader.readDouble();
                writeDouble(readDouble, writer, fieldName, isList);
                atLeastOneWrite = true;
                break;
            case JAVASCRIPT:
                final String readJavaScript = reader.readJavaScript();
                writeString(readJavaScript, writer, fieldName, isList);
                atLeastOneWrite = true;
                break;
            case JAVASCRIPT_WITH_SCOPE:
                final String readJavaScriptWithScopeString = reader.readJavaScriptWithScope();
                writeString(readJavaScriptWithScopeString, writer, fieldName, isList);
                atLeastOneWrite = true;
                break;
            case NULL:
                // just read and ignore.
                reader.readNull();
                break;
            case OBJECT_ID:
                writeObjectId(reader, writer, fieldName, isList);
                atLeastOneWrite = true;
                break;
            case STRING:
                final String readString = reader.readString();
                writeString(readString, writer, fieldName, isList);
                atLeastOneWrite = true;
                break;
            case SYMBOL:
                final String readSymbol = reader.readSymbol();
                writeString(readSymbol, writer, fieldName, isList);
                atLeastOneWrite = true;
                break;
            case TIMESTAMP:
                int time = reader.readTimestamp().getTime();
                writeTimeStamp(time, writer, fieldName, isList);
                atLeastOneWrite = true;
                break;
            case DECIMAL128:
                BigDecimal readBigDecimalAsDecimal128 = reader.readDecimal128().bigDecimalValue();
                writeDecimal128(readBigDecimalAsDecimal128, writer, fieldName, isList);
                atLeastOneWrite = true;
                break;
            default:
                // Didn't handled REGULAR_EXPRESSION and DB_POINTER types
                throw new DrillRuntimeException("UnSupported Bson type: " + currentBsonType);
        }
    }
    if (!isList) {
        reader.readEndDocument();
    } else {
        reader.readEndArray();
    }
}
Also used : BsonType(org.bson.BsonType) MapOrListWriterImpl(org.apache.drill.exec.vector.complex.impl.MapOrListWriterImpl) DrillRuntimeException(org.apache.drill.common.exceptions.DrillRuntimeException) BigDecimal(java.math.BigDecimal)

Example 4 with MapOrListWriterImpl

use of org.apache.drill.exec.vector.complex.impl.MapOrListWriterImpl in project drill by apache.

the class FieldTransferVectorWriter method writeDBDocument.

@Override
protected void writeDBDocument(VectorContainerWriter vectorWriter, DBDocumentReaderBase reader) throws SchemaChangeException {
    MapOrListWriterImpl writer = new MapOrListWriterImpl(vectorWriter.rootAsMap());
    if (reader.next() != EventType.START_MAP) {
        throw dataReadError(logger, "The document did not start with START_MAP!");
    }
    valueWriter.writeToListOrMap(writer, reader);
}
Also used : MapOrListWriterImpl(org.apache.drill.exec.vector.complex.impl.MapOrListWriterImpl)

Example 5 with MapOrListWriterImpl

use of org.apache.drill.exec.vector.complex.impl.MapOrListWriterImpl in project drill by apache.

the class AvroRecordReader method process.

private void process(final Object value, final Schema schema, final String fieldName, MapOrListWriterImpl writer, FieldSelection fieldSelection) {
    if (value == null) {
        return;
    }
    final Schema.Type type = schema.getType();
    switch(type) {
        case RECORD:
            // list field of MapOrListWriter will be non null when we want to store array of maps/records.
            MapOrListWriterImpl _writer = writer;
            for (final Schema.Field field : schema.getFields()) {
                if (field.schema().getType() == Schema.Type.RECORD || (field.schema().getType() == Schema.Type.UNION && field.schema().getTypes().get(0).getType() == Schema.Type.NULL && field.schema().getTypes().get(1).getType() == Schema.Type.RECORD)) {
                    _writer = (MapOrListWriterImpl) writer.map(field.name());
                }
                process(((GenericRecord) value).get(field.name()), field.schema(), field.name(), _writer, fieldSelection.getChild(field.name()));
            }
            break;
        case ARRAY:
            assert fieldName != null;
            final GenericArray<?> array = (GenericArray<?>) value;
            Schema elementSchema = array.getSchema().getElementType();
            Type elementType = elementSchema.getType();
            if (elementType == Schema.Type.RECORD || elementType == Schema.Type.MAP) {
                writer = (MapOrListWriterImpl) writer.list(fieldName).listoftmap(fieldName);
            } else {
                writer = (MapOrListWriterImpl) writer.list(fieldName);
            }
            for (final Object o : array) {
                writer.start();
                process(o, elementSchema, fieldName, writer, fieldSelection.getChild(fieldName));
                writer.end();
            }
            break;
        case UNION:
            // currently supporting only nullable union (optional fields) like ["null", "some-type"].
            if (schema.getTypes().get(0).getType() != Schema.Type.NULL) {
                throw new UnsupportedOperationException("Avro union type must be of the format : [\"null\", \"some-type\"]");
            }
            process(value, schema.getTypes().get(1), fieldName, writer, fieldSelection);
            break;
        case MAP:
            @SuppressWarnings("unchecked") final HashMap<Object, Object> map = (HashMap<Object, Object>) value;
            Schema valueSchema = schema.getValueType();
            writer = (MapOrListWriterImpl) writer.map(fieldName);
            writer.start();
            for (Entry<Object, Object> entry : map.entrySet()) {
                process(entry.getValue(), valueSchema, entry.getKey().toString(), writer, fieldSelection.getChild(entry.getKey().toString()));
            }
            writer.end();
            break;
        case FIXED:
            throw new UnsupportedOperationException("Unimplemented type: " + type.toString());
        // Enum symbols are strings
        case ENUM:
        // Treat null type as a primitive
        case NULL:
        default:
            assert fieldName != null;
            if (writer.isMapWriter()) {
                if (fieldSelection.isNeverValid()) {
                    break;
                }
            }
            processPrimitive(value, schema.getType(), fieldName, writer);
            break;
    }
}
Also used : MapOrListWriterImpl(org.apache.drill.exec.vector.complex.impl.MapOrListWriterImpl) HashMap(java.util.HashMap) Schema(org.apache.avro.Schema) Type(org.apache.avro.Schema.Type) Type(org.apache.avro.Schema.Type) GenericArray(org.apache.avro.generic.GenericArray)

Aggregations

MapOrListWriterImpl (org.apache.drill.exec.vector.complex.impl.MapOrListWriterImpl)9 DrillRuntimeException (org.apache.drill.common.exceptions.DrillRuntimeException)4 BsonType (org.bson.BsonType)4 HashMap (java.util.HashMap)2 Schema (org.apache.avro.Schema)2 Type (org.apache.avro.Schema.Type)2 GenericArray (org.apache.avro.generic.GenericArray)2 Stopwatch (com.google.common.base.Stopwatch)1 DBDocumentReaderBase (com.mapr.db.ojai.DBDocumentReaderBase)1 BigDecimal (java.math.BigDecimal)1 ByteBuffer (java.nio.ByteBuffer)1 UserException (org.apache.drill.common.exceptions.UserException)1 SchemaChangeException (org.apache.drill.exec.exception.SchemaChangeException)1 MapOrListWriter (org.apache.drill.exec.vector.complex.writer.BaseWriter.MapOrListWriter)1 DocumentReaderWithProjection (org.ojai.util.DocumentReaderWithProjection)1