use of com.mapr.db.ojai.DBDocumentReaderBase in project drill by axbaretto.
the class MaprDBJsonRecordReader method next.
@Override
public int next() {
Stopwatch watch = Stopwatch.createUnstarted();
watch.start();
vectorWriter.allocate();
vectorWriter.reset();
int recordCount = 0;
DBDocumentReaderBase reader = null;
while (recordCount < BaseValueVector.INITIAL_VALUE_ALLOCATION) {
vectorWriter.setPosition(recordCount);
try {
reader = nextDocumentReader();
if (reader == null) {
// no more documents for this scanner
break;
} else if (isSkipQuery()) {
vectorWriter.rootAsMap().bit("count").writeBit(1);
} else {
MapOrListWriterImpl writer = new MapOrListWriterImpl(vectorWriter.rootAsMap());
if (idOnly) {
writeId(writer, reader.getId());
} else {
if (reader.next() != EventType.START_MAP) {
throw dataReadError("The document did not start with START_MAP!");
}
writeToListOrMap(writer, reader);
}
}
recordCount++;
} catch (UserException e) {
throw UserException.unsupportedError(e).addContext(String.format("Table: %s, document id: '%s'", table.getPath(), reader == null ? null : IdCodec.asString(reader.getId()))).build(logger);
} catch (SchemaChangeException e) {
if (ignoreSchemaChange) {
logger.warn("{}. Dropping the row from result.", e.getMessage());
logger.debug("Stack trace:", e);
} else {
throw dataReadError(e);
}
}
}
if (nonExistentColumnsProjection && recordCount > 0) {
JsonReaderUtils.ensureAtLeastOneField(vectorWriter, getColumns(), allTextMode, Collections.EMPTY_LIST);
}
vectorWriter.setValueCount(recordCount);
logger.debug("Took {} ms to get {} records", watch.elapsed(TimeUnit.MILLISECONDS), recordCount);
return recordCount;
}
use of com.mapr.db.ojai.DBDocumentReaderBase in project drill by apache.
the class MaprDBJsonRecordReader method next.
@Override
public int next() {
Stopwatch watch = Stopwatch.createUnstarted();
watch.start();
vectorWriter.allocate();
vectorWriter.reset();
int recordCount = 0;
reader = null;
document = null;
int maxRecordsForThisBatch = this.maxRecordsToRead >= 0 ? Math.min(BaseValueVector.INITIAL_VALUE_ALLOCATION, this.maxRecordsToRead) : BaseValueVector.INITIAL_VALUE_ALLOCATION;
try {
// If the last document caused a SchemaChange create a new output schema for this scan batch
if (schemaState == SchemaState.SCHEMA_CHANGE && !ignoreSchemaChange) {
// Clear the ScanBatch vector container writer/mutator in order to be able to generate the new schema
vectorWriterMutator.clear();
vectorWriter = new VectorContainerWriter(vectorWriterMutator, unionEnabled);
logger.debug("Encountered schema change earlier use new writer {}", vectorWriter.toString());
document = lastDocument;
setupWriter();
if (recordCount < maxRecordsForThisBatch) {
vectorWriter.setPosition(recordCount);
if (document != null) {
reader = (DBDocumentReaderBase) document.asReader();
documentWriter.writeDBDocument(vectorWriter, reader);
recordCount++;
}
}
}
} catch (SchemaChangeException e) {
String err_row = reader.getId().asJsonString();
if (ignoreSchemaChange) {
logger.warn("{}. Dropping row '{}' from result.", e.getMessage(), err_row);
logger.debug("Stack trace:", e);
} else {
/* We should not encounter a SchemaChangeException here since this is the first document for this
* new schema. Something is very wrong - cannot handle any further!
*/
throw dataReadError(logger, e, "SchemaChangeException for row '%s'.", err_row);
}
}
schemaState = SchemaState.SCHEMA_INIT;
while (recordCount < maxRecordsForThisBatch) {
vectorWriter.setPosition(recordCount);
try {
document = nextDocument();
if (document == null) {
// no more documents for this reader
break;
} else {
documentWriter.writeDBDocument(vectorWriter, (DBDocumentReaderBase) document.asReader());
}
recordCount++;
} catch (UserException e) {
throw UserException.unsupportedError(e).addContext(String.format("Table: %s, document id: '%s'", table.getPath(), document.asReader() == null ? null : IdCodec.asString(((DBDocumentReaderBase) document.asReader()).getId()))).build(logger);
} catch (SchemaChangeException e) {
String err_row = ((DBDocumentReaderBase) document.asReader()).getId().asJsonString();
if (ignoreSchemaChange) {
logger.warn("{}. Dropping row '{}' from result.", e.getMessage(), err_row);
logger.debug("Stack trace:", e);
} else {
/* Save the current document reader for next iteration. The recordCount is not updated so we
* would start from this reader on the next next() call
*/
lastDocument = document;
schemaState = SchemaState.SCHEMA_CHANGE;
break;
}
}
}
if (nonExistentColumnsProjection && recordCount > 0) {
if (schema == null || schema.isEmpty()) {
JsonReaderUtils.ensureAtLeastOneField(vectorWriter, getColumns(), allTextMode, Collections.emptyList());
} else {
JsonReaderUtils.writeColumnsUsingSchema(vectorWriter, getColumns(), schema, allTextMode);
}
}
vectorWriter.setValueCount(recordCount);
if (maxRecordsToRead > 0) {
maxRecordsToRead -= recordCount;
}
logger.debug("Took {} ms to get {} records", watch.elapsed(TimeUnit.MILLISECONDS), recordCount);
return recordCount;
}
use of com.mapr.db.ojai.DBDocumentReaderBase in project drill by apache.
the class RestrictedJsonRecordReader method next.
@Override
public int next() {
Stopwatch watch = Stopwatch.createUnstarted();
watch.start();
RestrictedMapRDBSubScanSpec rss = ((RestrictedMapRDBSubScanSpec) this.subScanSpec);
vectorWriter.allocate();
vectorWriter.reset();
if (!rss.readyToGetRowKey()) {
// when we are in the build schema phase
if (rss.isBuildSchemaPhase()) {
readToInitSchema();
}
return 0;
}
Table table = super.formatPlugin.getJsonTableCache().getTable(subScanSpec.getTableName(), subScanSpec.getUserName());
final MultiGet multiGet = new MultiGet((BaseJsonTable) table, condition, false, projections);
int recordCount = 0;
DBDocumentReaderBase reader = null;
int maxRecordsForThisBatch = this.maxRecordsToRead > 0 ? Math.min(rss.getMaxRowKeysToBeRead(), this.maxRecordsToRead) : this.maxRecordsToRead == -1 ? rss.getMaxRowKeysToBeRead() : 0;
Stopwatch timer = Stopwatch.createUnstarted();
while (recordCount < maxRecordsForThisBatch) {
ByteBuffer[] rowKeyIds = rss.getRowKeyIdsToRead(batchSize);
if (rowKeyIds == null) {
break;
}
try {
timer.start();
final List<Document> docList = multiGet.doGet(rowKeyIds);
int index = 0;
long docsToRead = docList.size();
// If limit pushdown then stop once we have `limit` rows from multiget i.e. maxRecordsForThisBatch
if (this.maxRecordsToRead != -1) {
docsToRead = Math.min(docsToRead, maxRecordsForThisBatch);
}
while (index < docsToRead) {
vectorWriter.setPosition(recordCount);
reader = (DBDocumentReaderBase) docList.get(index).asReader();
documentWriter.writeDBDocument(vectorWriter, reader);
recordCount++;
index++;
}
timer.stop();
} catch (UserException e) {
throw UserException.unsupportedError(e).addContext(String.format("Table: %s, document id: '%s'", getTable().getPath(), reader == null ? null : IdCodec.asString(reader.getId()))).build(logger);
} catch (SchemaChangeException e) {
if (getIgnoreSchemaChange()) {
logger.warn("{}. Dropping the row from result.", e.getMessage());
logger.debug("Stack trace:", e);
} else {
throw dataReadError(logger, e);
}
}
}
vectorWriter.setValueCount(recordCount);
if (maxRecordsToRead > 0) {
if (maxRecordsToRead - recordCount >= 0) {
maxRecordsToRead -= recordCount;
} else {
maxRecordsToRead = 0;
}
}
logger.debug("Took {} ms to get {} records, getrowkey {}", watch.elapsed(TimeUnit.MILLISECONDS), recordCount, timer.elapsed(TimeUnit.MILLISECONDS));
return recordCount;
}
use of com.mapr.db.ojai.DBDocumentReaderBase in project drill by apache.
the class RestrictedJsonRecordReader method readToInitSchema.
public void readToInitSchema() {
DBDocumentReaderBase reader = null;
vectorWriter.setPosition(0);
try (DocumentStream dstream = table.find()) {
reader = (DBDocumentReaderBase) dstream.iterator().next().asReader();
documentWriter.writeDBDocument(vectorWriter, reader);
} catch (UserException e) {
throw UserException.unsupportedError(e).addContext(String.format("Table: %s, document id: '%s'", getTable().getPath(), reader == null ? null : IdCodec.asString(reader.getId()))).build(logger);
} catch (SchemaChangeException e) {
if (getIgnoreSchemaChange()) {
logger.warn("{}. Dropping the row from result.", e.getMessage());
logger.debug("Stack trace:", e);
} else {
throw dataReadError(logger, e);
}
} finally {
vectorWriter.setPosition(0);
}
}
Aggregations