use of org.apache.hop.beam.core.HopRow in project hop by apache.
the class BQSchemaAndRecordToHopFn method apply.
@Override
public HopRow apply(SchemaAndRecord schemaAndRecord) {
try {
GenericRecord record = schemaAndRecord.getRecord();
TableSchema tableSchema = schemaAndRecord.getTableSchema();
if (rowMeta == null) {
inputCounter = Metrics.counter(Pipeline.METRIC_NAME_INPUT, transformName);
writtenCounter = Metrics.counter(Pipeline.METRIC_NAME_WRITTEN, transformName);
errorCounter = Metrics.counter(Pipeline.METRIC_NAME_ERROR, transformName);
// Initialize Hop
//
BeamHop.init(transformPluginClasses, xpPluginClasses);
rowMeta = JsonRowMeta.fromJson(rowMetaJson);
int[] valueTypes = new int[rowMeta.size()];
List<TableFieldSchema> fields = tableSchema.getFields();
for (int i = 0; i < fields.size(); i++) {
TableFieldSchema fieldSchema = fields.get(i);
String name = fieldSchema.getName();
int index = rowMeta.indexOfValue(name);
//
if (index >= 0) {
String avroTypeString = fieldSchema.getType();
try {
AvroType avroType = AvroType.valueOf(avroTypeString);
valueTypes[index] = avroType.getHopType();
} catch (IllegalArgumentException e) {
throw new RuntimeException("Unable to recognize data type '" + avroTypeString + "'", e);
}
}
}
//
for (int i = 0; i < rowMeta.size(); i++) {
if (valueTypes[i] == 0) {
IValueMeta valueMeta = rowMeta.getValueMeta(i);
throw new RuntimeException("Unable to find field '" + valueMeta.getName() + "'");
}
}
simpleDateTimeFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss");
simpleDateTimeFormat.setLenient(true);
simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd");
simpleDateFormat.setLenient(true);
Metrics.counter(Pipeline.METRIC_NAME_INIT, transformName).inc();
}
inputCounter.inc();
// Convert to the requested Hop Data types
//
Object[] row = RowDataUtil.allocateRowData(rowMeta.size());
for (int index = 0; index < rowMeta.size(); index++) {
IValueMeta valueMeta = rowMeta.getValueMeta(index);
Object srcData = record.get(valueMeta.getName());
if (srcData != null) {
switch(valueMeta.getType()) {
case IValueMeta.TYPE_STRING:
row[index] = srcData.toString();
break;
case IValueMeta.TYPE_INTEGER:
row[index] = (Long) srcData;
break;
case IValueMeta.TYPE_NUMBER:
row[index] = (Double) srcData;
break;
case IValueMeta.TYPE_BOOLEAN:
row[index] = (Boolean) srcData;
break;
case IValueMeta.TYPE_DATE:
// We get a Long back
//
String datetimeString = ((Utf8) srcData).toString();
if (datetimeString.length() == 10) {
row[index] = simpleDateFormat.parse(datetimeString);
} else {
row[index] = simpleDateTimeFormat.parse(datetimeString);
}
break;
default:
throw new RuntimeException("Conversion from Avro JSON to Hop is not yet supported for Hop data type '" + valueMeta.getTypeDesc() + "'");
}
}
}
// Pass the row to the process context
//
writtenCounter.inc();
return new HopRow(row);
} catch (Exception e) {
errorCounter.inc();
LOG.error("Error converting BQ Avro data into Hop rows : " + e.getMessage());
throw new RuntimeException("Error converting BQ Avro data into Hop rows", e);
}
}
use of org.apache.hop.beam.core.HopRow in project hop by apache.
the class GroupByFn method processElement.
@ProcessElement
public void processElement(ProcessContext processContext) {
try {
// Get a KV
//
KV<HopRow, Iterable<HopRow>> inputElement = processContext.element();
// Get the key row
//
HopRow groupHopRow = inputElement.getKey();
Object[] groupRow = groupHopRow.getRow();
// Initialize the aggregation results for this window
//
Object[] results = new Object[aggregationTypes.length];
long[] counts = new long[aggregationTypes.length];
for (int i = 0; i < results.length; i++) {
results[i] = null;
counts[i] = 0L;
}
Iterable<HopRow> subjectHopRows = inputElement.getValue();
for (HopRow subjectHopRow : subjectHopRows) {
Object[] subjectRow = subjectHopRow.getRow();
readCounter.inc();
//
for (int i = 0; i < aggregationTypes.length; i++) {
IValueMeta subjectValueMeta = subjectRowMeta.getValueMeta(i);
Object subject = subjectRow[i];
Object result = results[i];
switch(aggregationTypes[i]) {
case AVERAGE:
//
if (!subjectValueMeta.isNull(subject)) {
counts[i]++;
}
case SUM:
{
if (result == null) {
result = subject;
} else {
switch(subjectValueMeta.getType()) {
case IValueMeta.TYPE_INTEGER:
result = (Long) result + (Long) subject;
break;
case IValueMeta.TYPE_NUMBER:
result = (Double) result + (Double) subject;
break;
default:
throw new HopException("SUM aggregation not yet implemented for field and data type : " + subjectValueMeta.toString());
}
}
}
break;
case COUNT_ALL:
if (subject != null) {
if (result == null) {
result = Long.valueOf(1L);
} else {
result = (Long) result + 1L;
}
}
break;
case MIN:
if (subjectValueMeta.isNull(result)) {
// Previous result was null? Then take the subject
result = subject;
} else {
if (subjectValueMeta.compare(subject, result) < 0) {
result = subject;
}
}
break;
case MAX:
if (subjectValueMeta.isNull(result)) {
// Previous result was null? Then take the subject
result = subject;
} else {
if (subjectValueMeta.compare(subject, result) > 0) {
result = subject;
}
}
break;
case FIRST_INCL_NULL:
if (counts[i] == 0) {
counts[i]++;
result = subject;
}
break;
case LAST_INCL_NULL:
result = subject;
break;
case FIRST:
if (!subjectValueMeta.isNull(subject) && counts[i] == 0) {
counts[i]++;
result = subject;
}
break;
case LAST:
if (!subjectValueMeta.isNull(subject)) {
result = subject;
}
break;
default:
throw new HopException("Sorry, aggregation type yet: " + aggregationTypes[i].name() + " isn't implemented yet");
}
results[i] = result;
}
}
//
for (int i = 0; i < results.length; i++) {
IValueMeta subjectValueMeta = subjectRowMeta.getValueMeta(i);
switch(aggregationTypes[i]) {
case AVERAGE:
switch(subjectValueMeta.getType()) {
case IValueMeta.TYPE_NUMBER:
double dbl = (Double) results[i];
if (counts[i] != 0) {
dbl /= counts[i];
}
results[i] = dbl;
break;
case IValueMeta.TYPE_INTEGER:
long lng = (Long) results[i];
if (counts[i] != 0) {
lng /= counts[i];
}
results[i] = lng;
break;
case IValueMeta.TYPE_BIGNUMBER:
BigDecimal bd = (BigDecimal) results[i];
if (counts[i] != 0) {
bd = bd.divide(BigDecimal.valueOf(counts[i]));
}
results[i] = bd;
default:
throw new HopException("Unable to calculate average on data type : " + subjectValueMeta.getTypeDesc());
}
}
}
// Now we have the results
// Concatenate both group and result...
//
Object[] resultRow = RowDataUtil.allocateRowData(groupRowMeta.size() + subjectRowMeta.size());
int index = 0;
for (int i = 0; i < groupRowMeta.size(); i++) {
resultRow[index++] = groupRow[i];
}
for (int i = 0; i < subjectRowMeta.size(); i++) {
resultRow[index++] = results[i];
}
// Send it on its way
//
processContext.output(new HopRow(resultRow));
writtenCounter.inc();
} catch (Exception e) {
errorCounter.inc();
LOG.error("Error grouping by ", e);
throw new RuntimeException("Unable to split row into group and subject ", e);
}
}
use of org.apache.hop.beam.core.HopRow in project hop by apache.
the class HopKeyValueFn method processElement.
@ProcessElement
public void processElement(ProcessContext processContext) {
try {
// Get an input row
//
HopRow inputHopRow = processContext.element();
readCounter.inc();
Object[] inputRow = inputHopRow.getRow();
// Copy over the data...
//
Object[] keyRow = RowDataUtil.allocateRowData(keyIndexes.length);
for (int i = 0; i < keyIndexes.length; i++) {
keyRow[i] = inputRow[keyIndexes[i]];
}
// Copy over the values...
//
Object[] valueRow = RowDataUtil.allocateRowData(valueIndexes.length);
for (int i = 0; i < valueIndexes.length; i++) {
valueRow[i] = inputRow[valueIndexes[i]];
}
KV<HopRow, HopRow> keyValue = KV.of(new HopRow(keyRow), new HopRow(valueRow));
processContext.output(keyValue);
} catch (Exception e) {
errorCounter.inc();
LOG.error("Error splitting row into key and value", e);
throw new RuntimeException("Unable to split row into key and value", e);
}
}
use of org.apache.hop.beam.core.HopRow in project hop by apache.
the class PublishMessagesFn method processElement.
@ProcessElement
public void processElement(ProcessContext processContext) {
try {
HopRow hopRow = processContext.element();
readCounter.inc();
try {
byte[] bytes = rowMeta.getBinary(hopRow.getRow(), fieldIndex);
PubsubMessage message = new PubsubMessage(bytes, new HashMap<>());
processContext.output(message);
outputCounter.inc();
} catch (Exception e) {
throw new RuntimeException("Unable to pass message", e);
}
} catch (Exception e) {
numErrors.inc();
LOG.error("Error in pub/sub publish messages function", e);
throw new RuntimeException("Error in pub/sub publish messages function", e);
}
}
use of org.apache.hop.beam.core.HopRow in project hop by apache.
the class PublishStringsFn method processElement.
@ProcessElement
public void processElement(ProcessContext processContext) {
try {
HopRow hopRow = processContext.element();
readCounter.inc();
try {
String string = rowMeta.getString(hopRow.getRow(), fieldIndex);
processContext.output(string);
outputCounter.inc();
} catch (Exception e) {
throw new RuntimeException("Unable to pass string", e);
}
} catch (Exception e) {
numErrors.inc();
LOG.error("Error in pub/sub publish messages function", e);
throw new RuntimeException("Error in pub/sub publish messages function", e);
}
}
Aggregations