use of org.apache.avro.file.DataFileStream in project nifi by apache.
the class TestExecuteSQL method invokeOnTrigger.
public void invokeOnTrigger(final Integer queryTimeout, final String query, final boolean incomingFlowFile, final Map<String, String> attrs, final boolean setQueryProperty) throws InitializationException, ClassNotFoundException, SQLException, IOException {
if (queryTimeout != null) {
runner.setProperty(ExecuteSQL.QUERY_TIMEOUT, queryTimeout.toString() + " secs");
}
// remove previous test database, if any
final File dbLocation = new File(DB_LOCATION);
dbLocation.delete();
// load test data to database
final Connection con = ((DBCPService) runner.getControllerService("dbcp")).getConnection();
TestJdbcHugeStream.loadTestData2Database(con, 100, 200, 100);
LOGGER.info("test data loaded");
// ResultSet size will be 1x200x100 = 20 000 rows
// because of where PER.ID = ${person.id}
final int nrOfRows = 20000;
if (incomingFlowFile) {
// incoming FlowFile content is not used, but attributes are used
final Map<String, String> attributes = (attrs == null) ? new HashMap<>() : attrs;
attributes.put("person.id", "10");
if (!setQueryProperty) {
runner.enqueue(query.getBytes(), attributes);
} else {
runner.enqueue("Hello".getBytes(), attributes);
}
}
if (setQueryProperty) {
runner.setProperty(ExecuteSQL.SQL_SELECT_QUERY, query);
}
runner.run();
runner.assertAllFlowFilesTransferred(ExecuteSQL.REL_SUCCESS, 1);
runner.assertAllFlowFilesContainAttribute(ExecuteSQL.REL_SUCCESS, ExecuteSQL.RESULT_QUERY_DURATION);
runner.assertAllFlowFilesContainAttribute(ExecuteSQL.REL_SUCCESS, ExecuteSQL.RESULT_ROW_COUNT);
final List<MockFlowFile> flowfiles = runner.getFlowFilesForRelationship(ExecuteSQL.REL_SUCCESS);
final InputStream in = new ByteArrayInputStream(flowfiles.get(0).toByteArray());
final DatumReader<GenericRecord> datumReader = new GenericDatumReader<>();
try (DataFileStream<GenericRecord> dataFileReader = new DataFileStream<>(in, datumReader)) {
GenericRecord record = null;
long recordsFromStream = 0;
while (dataFileReader.hasNext()) {
// Reuse record object by passing it to next(). This saves us from
// allocating and garbage collecting many objects for files with
// many items.
record = dataFileReader.next(record);
recordsFromStream += 1;
}
LOGGER.info("total nr of records from stream: " + recordsFromStream);
assertEquals(nrOfRows, recordsFromStream);
}
}
use of org.apache.avro.file.DataFileStream in project nifi by apache.
the class ExtractAvroMetadata method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final Map<String, String> avroMetadata = new HashMap<>();
final Set<String> requestedMetadataKeys = new HashSet<>();
final boolean countRecords = context.getProperty(COUNT_ITEMS).asBoolean();
final String fingerprintAlgorithm = context.getProperty(FINGERPRINT_ALGORITHM).getValue();
final String metadataKeysValue = context.getProperty(METADATA_KEYS).getValue();
if (!StringUtils.isEmpty(metadataKeysValue)) {
final String[] keys = metadataKeysValue.split("\\s*,\\s*");
for (final String key : keys) {
requestedMetadataKeys.add(key.trim());
}
}
try {
session.read(flowFile, new InputStreamCallback() {
@Override
public void process(InputStream rawIn) throws IOException {
try (final InputStream in = new BufferedInputStream(rawIn);
final DataFileStream<GenericRecord> reader = new DataFileStream<>(in, new GenericDatumReader<GenericRecord>())) {
final Schema schema = reader.getSchema();
if (schema == null) {
throw new ProcessException("Avro schema was null");
}
for (String key : reader.getMetaKeys()) {
if (requestedMetadataKeys.contains(key)) {
avroMetadata.put(key, reader.getMetaString(key));
}
}
try {
final byte[] rawFingerprint = SchemaNormalization.parsingFingerprint(fingerprintAlgorithm, schema);
avroMetadata.put(SCHEMA_FINGERPRINT_ATTR, Hex.encodeHexString(rawFingerprint));
avroMetadata.put(SCHEMA_TYPE_ATTR, schema.getType().getName());
avroMetadata.put(SCHEMA_NAME_ATTR, schema.getName());
} catch (NoSuchAlgorithmException e) {
// shouldn't happen since allowable values are valid algorithms
throw new ProcessException(e);
}
if (countRecords) {
long recordCount = reader.getBlockCount();
try {
while (reader.nextBlock() != null) {
recordCount += reader.getBlockCount();
}
} catch (NoSuchElementException e) {
// happens at end of file
}
avroMetadata.put(ITEM_COUNT_ATTR, String.valueOf(recordCount));
}
}
}
});
} catch (final ProcessException pe) {
getLogger().error("Failed to extract Avro metadata for {} due to {}; transferring to failure", new Object[] { flowFile, pe });
session.transfer(flowFile, REL_FAILURE);
return;
}
flowFile = session.putAllAttributes(flowFile, avroMetadata);
session.transfer(flowFile, REL_SUCCESS);
}
use of org.apache.avro.file.DataFileStream in project nifi by apache.
the class TestSplitAvro method checkDataFileSplitSize.
private void checkDataFileSplitSize(List<MockFlowFile> flowFiles, int expectedRecordsPerSplit, boolean checkMetadata) throws IOException {
for (final MockFlowFile flowFile : flowFiles) {
try (final ByteArrayInputStream in = new ByteArrayInputStream(flowFile.toByteArray());
final DataFileStream<GenericRecord> reader = new DataFileStream<>(in, new GenericDatumReader<GenericRecord>())) {
int count = 0;
GenericRecord record = null;
while (reader.hasNext()) {
record = reader.next(record);
Assert.assertNotNull(record.get("name"));
Assert.assertNotNull(record.get("favorite_number"));
count++;
}
assertEquals(expectedRecordsPerSplit, count);
if (checkMetadata) {
assertEquals(META_VALUE1, reader.getMetaString(META_KEY1));
assertEquals(META_VALUE2, reader.getMetaLong(META_KEY2));
assertEquals(META_VALUE3, new String(reader.getMeta(META_KEY3), "UTF-8"));
}
}
}
}
use of org.apache.avro.file.DataFileStream in project nifi by apache.
the class EmbeddedAvroSchemaAccessStrategy method getSchema.
@Override
public RecordSchema getSchema(Map<String, String> variables, final InputStream contentStream, final RecordSchema readSchema) throws SchemaNotFoundException, IOException {
final DataFileStream<GenericRecord> dataFileStream = new DataFileStream<>(contentStream, new GenericDatumReader<GenericRecord>());
final Schema avroSchema = dataFileStream.getSchema();
final RecordSchema recordSchema = AvroTypeUtil.createSchema(avroSchema);
return recordSchema;
}
use of org.apache.avro.file.DataFileStream in project nifi by apache.
the class TestWriteAvroResultWithSchema method readRecord.
@Override
protected GenericRecord readRecord(final InputStream in, final Schema schema) throws IOException {
final DataFileStream<GenericRecord> dataFileStream = new DataFileStream<>(in, new GenericDatumReader<GenericRecord>());
final Schema avroSchema = dataFileStream.getSchema();
GenericData.setStringType(avroSchema, StringType.String);
final GenericRecord avroRecord = dataFileStream.next();
return avroRecord;
}
Aggregations