Search in sources :

Example 1 with DBDocumentReaderBase

use of com.mapr.db.ojai.DBDocumentReaderBase in project drill by axbaretto.

the class MaprDBJsonRecordReader method next.

@Override
public int next() {
    Stopwatch watch = Stopwatch.createUnstarted();
    watch.start();
    vectorWriter.allocate();
    vectorWriter.reset();
    int recordCount = 0;
    DBDocumentReaderBase reader = null;
    while (recordCount < BaseValueVector.INITIAL_VALUE_ALLOCATION) {
        vectorWriter.setPosition(recordCount);
        try {
            reader = nextDocumentReader();
            if (reader == null) {
                // no more documents for this scanner
                break;
            } else if (isSkipQuery()) {
                vectorWriter.rootAsMap().bit("count").writeBit(1);
            } else {
                MapOrListWriterImpl writer = new MapOrListWriterImpl(vectorWriter.rootAsMap());
                if (idOnly) {
                    writeId(writer, reader.getId());
                } else {
                    if (reader.next() != EventType.START_MAP) {
                        throw dataReadError("The document did not start with START_MAP!");
                    }
                    writeToListOrMap(writer, reader);
                }
            }
            recordCount++;
        } catch (UserException e) {
            throw UserException.unsupportedError(e).addContext(String.format("Table: %s, document id: '%s'", table.getPath(), reader == null ? null : IdCodec.asString(reader.getId()))).build(logger);
        } catch (SchemaChangeException e) {
            if (ignoreSchemaChange) {
                logger.warn("{}. Dropping the row from result.", e.getMessage());
                logger.debug("Stack trace:", e);
            } else {
                throw dataReadError(e);
            }
        }
    }
    if (nonExistentColumnsProjection && recordCount > 0) {
        JsonReaderUtils.ensureAtLeastOneField(vectorWriter, getColumns(), allTextMode, Collections.EMPTY_LIST);
    }
    vectorWriter.setValueCount(recordCount);
    logger.debug("Took {} ms to get {} records", watch.elapsed(TimeUnit.MILLISECONDS), recordCount);
    return recordCount;
}
Also used : SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) DBDocumentReaderBase(com.mapr.db.ojai.DBDocumentReaderBase) MapOrListWriterImpl(org.apache.drill.exec.vector.complex.impl.MapOrListWriterImpl) Stopwatch(com.google.common.base.Stopwatch) UserException(org.apache.drill.common.exceptions.UserException)

Example 2 with DBDocumentReaderBase

use of com.mapr.db.ojai.DBDocumentReaderBase in project drill by apache.

the class MaprDBJsonRecordReader method next.

@Override
public int next() {
    Stopwatch watch = Stopwatch.createUnstarted();
    watch.start();
    vectorWriter.allocate();
    vectorWriter.reset();
    int recordCount = 0;
    reader = null;
    document = null;
    int maxRecordsForThisBatch = this.maxRecordsToRead >= 0 ? Math.min(BaseValueVector.INITIAL_VALUE_ALLOCATION, this.maxRecordsToRead) : BaseValueVector.INITIAL_VALUE_ALLOCATION;
    try {
        // If the last document caused a SchemaChange create a new output schema for this scan batch
        if (schemaState == SchemaState.SCHEMA_CHANGE && !ignoreSchemaChange) {
            // Clear the ScanBatch vector container writer/mutator in order to be able to generate the new schema
            vectorWriterMutator.clear();
            vectorWriter = new VectorContainerWriter(vectorWriterMutator, unionEnabled);
            logger.debug("Encountered schema change earlier use new writer {}", vectorWriter.toString());
            document = lastDocument;
            setupWriter();
            if (recordCount < maxRecordsForThisBatch) {
                vectorWriter.setPosition(recordCount);
                if (document != null) {
                    reader = (DBDocumentReaderBase) document.asReader();
                    documentWriter.writeDBDocument(vectorWriter, reader);
                    recordCount++;
                }
            }
        }
    } catch (SchemaChangeException e) {
        String err_row = reader.getId().asJsonString();
        if (ignoreSchemaChange) {
            logger.warn("{}. Dropping row '{}' from result.", e.getMessage(), err_row);
            logger.debug("Stack trace:", e);
        } else {
            /* We should not encounter a SchemaChangeException here since this is the first document for this
           * new schema. Something is very wrong - cannot handle any further!
           */
            throw dataReadError(logger, e, "SchemaChangeException for row '%s'.", err_row);
        }
    }
    schemaState = SchemaState.SCHEMA_INIT;
    while (recordCount < maxRecordsForThisBatch) {
        vectorWriter.setPosition(recordCount);
        try {
            document = nextDocument();
            if (document == null) {
                // no more documents for this reader
                break;
            } else {
                documentWriter.writeDBDocument(vectorWriter, (DBDocumentReaderBase) document.asReader());
            }
            recordCount++;
        } catch (UserException e) {
            throw UserException.unsupportedError(e).addContext(String.format("Table: %s, document id: '%s'", table.getPath(), document.asReader() == null ? null : IdCodec.asString(((DBDocumentReaderBase) document.asReader()).getId()))).build(logger);
        } catch (SchemaChangeException e) {
            String err_row = ((DBDocumentReaderBase) document.asReader()).getId().asJsonString();
            if (ignoreSchemaChange) {
                logger.warn("{}. Dropping row '{}' from result.", e.getMessage(), err_row);
                logger.debug("Stack trace:", e);
            } else {
                /* Save the current document reader for next iteration. The recordCount is not updated so we
           * would start from this reader on the next next() call
           */
                lastDocument = document;
                schemaState = SchemaState.SCHEMA_CHANGE;
                break;
            }
        }
    }
    if (nonExistentColumnsProjection && recordCount > 0) {
        if (schema == null || schema.isEmpty()) {
            JsonReaderUtils.ensureAtLeastOneField(vectorWriter, getColumns(), allTextMode, Collections.emptyList());
        } else {
            JsonReaderUtils.writeColumnsUsingSchema(vectorWriter, getColumns(), schema, allTextMode);
        }
    }
    vectorWriter.setValueCount(recordCount);
    if (maxRecordsToRead > 0) {
        maxRecordsToRead -= recordCount;
    }
    logger.debug("Took {} ms to get {} records", watch.elapsed(TimeUnit.MILLISECONDS), recordCount);
    return recordCount;
}
Also used : SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) VectorContainerWriter(org.apache.drill.exec.vector.complex.impl.VectorContainerWriter) DBDocumentReaderBase(com.mapr.db.ojai.DBDocumentReaderBase) Stopwatch(org.apache.drill.shaded.guava.com.google.common.base.Stopwatch) UserException(org.apache.drill.common.exceptions.UserException)

Example 3 with DBDocumentReaderBase

use of com.mapr.db.ojai.DBDocumentReaderBase in project drill by apache.

the class RestrictedJsonRecordReader method next.

@Override
public int next() {
    Stopwatch watch = Stopwatch.createUnstarted();
    watch.start();
    RestrictedMapRDBSubScanSpec rss = ((RestrictedMapRDBSubScanSpec) this.subScanSpec);
    vectorWriter.allocate();
    vectorWriter.reset();
    if (!rss.readyToGetRowKey()) {
        // when we are in the build schema phase
        if (rss.isBuildSchemaPhase()) {
            readToInitSchema();
        }
        return 0;
    }
    Table table = super.formatPlugin.getJsonTableCache().getTable(subScanSpec.getTableName(), subScanSpec.getUserName());
    final MultiGet multiGet = new MultiGet((BaseJsonTable) table, condition, false, projections);
    int recordCount = 0;
    DBDocumentReaderBase reader = null;
    int maxRecordsForThisBatch = this.maxRecordsToRead > 0 ? Math.min(rss.getMaxRowKeysToBeRead(), this.maxRecordsToRead) : this.maxRecordsToRead == -1 ? rss.getMaxRowKeysToBeRead() : 0;
    Stopwatch timer = Stopwatch.createUnstarted();
    while (recordCount < maxRecordsForThisBatch) {
        ByteBuffer[] rowKeyIds = rss.getRowKeyIdsToRead(batchSize);
        if (rowKeyIds == null) {
            break;
        }
        try {
            timer.start();
            final List<Document> docList = multiGet.doGet(rowKeyIds);
            int index = 0;
            long docsToRead = docList.size();
            // If limit pushdown then stop once we have `limit` rows from multiget i.e. maxRecordsForThisBatch
            if (this.maxRecordsToRead != -1) {
                docsToRead = Math.min(docsToRead, maxRecordsForThisBatch);
            }
            while (index < docsToRead) {
                vectorWriter.setPosition(recordCount);
                reader = (DBDocumentReaderBase) docList.get(index).asReader();
                documentWriter.writeDBDocument(vectorWriter, reader);
                recordCount++;
                index++;
            }
            timer.stop();
        } catch (UserException e) {
            throw UserException.unsupportedError(e).addContext(String.format("Table: %s, document id: '%s'", getTable().getPath(), reader == null ? null : IdCodec.asString(reader.getId()))).build(logger);
        } catch (SchemaChangeException e) {
            if (getIgnoreSchemaChange()) {
                logger.warn("{}. Dropping the row from result.", e.getMessage());
                logger.debug("Stack trace:", e);
            } else {
                throw dataReadError(logger, e);
            }
        }
    }
    vectorWriter.setValueCount(recordCount);
    if (maxRecordsToRead > 0) {
        if (maxRecordsToRead - recordCount >= 0) {
            maxRecordsToRead -= recordCount;
        } else {
            maxRecordsToRead = 0;
        }
    }
    logger.debug("Took {} ms to get {} records, getrowkey {}", watch.elapsed(TimeUnit.MILLISECONDS), recordCount, timer.elapsed(TimeUnit.MILLISECONDS));
    return recordCount;
}
Also used : BaseJsonTable(com.mapr.db.impl.BaseJsonTable) Table(com.mapr.db.Table) RestrictedMapRDBSubScanSpec(org.apache.drill.exec.store.mapr.db.RestrictedMapRDBSubScanSpec) Stopwatch(org.apache.drill.shaded.guava.com.google.common.base.Stopwatch) Document(org.ojai.Document) ByteBuffer(java.nio.ByteBuffer) MultiGet(com.mapr.db.impl.MultiGet) SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) DBDocumentReaderBase(com.mapr.db.ojai.DBDocumentReaderBase) UserException(org.apache.drill.common.exceptions.UserException)

Example 4 with DBDocumentReaderBase

use of com.mapr.db.ojai.DBDocumentReaderBase in project drill by apache.

the class RestrictedJsonRecordReader method readToInitSchema.

public void readToInitSchema() {
    DBDocumentReaderBase reader = null;
    vectorWriter.setPosition(0);
    try (DocumentStream dstream = table.find()) {
        reader = (DBDocumentReaderBase) dstream.iterator().next().asReader();
        documentWriter.writeDBDocument(vectorWriter, reader);
    } catch (UserException e) {
        throw UserException.unsupportedError(e).addContext(String.format("Table: %s, document id: '%s'", getTable().getPath(), reader == null ? null : IdCodec.asString(reader.getId()))).build(logger);
    } catch (SchemaChangeException e) {
        if (getIgnoreSchemaChange()) {
            logger.warn("{}. Dropping the row from result.", e.getMessage());
            logger.debug("Stack trace:", e);
        } else {
            throw dataReadError(logger, e);
        }
    } finally {
        vectorWriter.setPosition(0);
    }
}
Also used : SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) DBDocumentReaderBase(com.mapr.db.ojai.DBDocumentReaderBase) UserException(org.apache.drill.common.exceptions.UserException) DocumentStream(org.ojai.DocumentStream)

Aggregations

DBDocumentReaderBase (com.mapr.db.ojai.DBDocumentReaderBase)4 UserException (org.apache.drill.common.exceptions.UserException)4 SchemaChangeException (org.apache.drill.exec.exception.SchemaChangeException)4 Stopwatch (org.apache.drill.shaded.guava.com.google.common.base.Stopwatch)2 Stopwatch (com.google.common.base.Stopwatch)1 Table (com.mapr.db.Table)1 BaseJsonTable (com.mapr.db.impl.BaseJsonTable)1 MultiGet (com.mapr.db.impl.MultiGet)1 ByteBuffer (java.nio.ByteBuffer)1 RestrictedMapRDBSubScanSpec (org.apache.drill.exec.store.mapr.db.RestrictedMapRDBSubScanSpec)1 MapOrListWriterImpl (org.apache.drill.exec.vector.complex.impl.MapOrListWriterImpl)1 VectorContainerWriter (org.apache.drill.exec.vector.complex.impl.VectorContainerWriter)1 Document (org.ojai.Document)1 DocumentStream (org.ojai.DocumentStream)1