Search in sources :

Example 1 with HopRow

use of org.apache.hop.beam.core.HopRow in project hop by apache.

the class BQSchemaAndRecordToHopFn method apply.

@Override
public HopRow apply(SchemaAndRecord schemaAndRecord) {
    try {
        GenericRecord record = schemaAndRecord.getRecord();
        TableSchema tableSchema = schemaAndRecord.getTableSchema();
        if (rowMeta == null) {
            inputCounter = Metrics.counter(Pipeline.METRIC_NAME_INPUT, transformName);
            writtenCounter = Metrics.counter(Pipeline.METRIC_NAME_WRITTEN, transformName);
            errorCounter = Metrics.counter(Pipeline.METRIC_NAME_ERROR, transformName);
            // Initialize Hop
            // 
            BeamHop.init(transformPluginClasses, xpPluginClasses);
            rowMeta = JsonRowMeta.fromJson(rowMetaJson);
            int[] valueTypes = new int[rowMeta.size()];
            List<TableFieldSchema> fields = tableSchema.getFields();
            for (int i = 0; i < fields.size(); i++) {
                TableFieldSchema fieldSchema = fields.get(i);
                String name = fieldSchema.getName();
                int index = rowMeta.indexOfValue(name);
                // 
                if (index >= 0) {
                    String avroTypeString = fieldSchema.getType();
                    try {
                        AvroType avroType = AvroType.valueOf(avroTypeString);
                        valueTypes[index] = avroType.getHopType();
                    } catch (IllegalArgumentException e) {
                        throw new RuntimeException("Unable to recognize data type '" + avroTypeString + "'", e);
                    }
                }
            }
            // 
            for (int i = 0; i < rowMeta.size(); i++) {
                if (valueTypes[i] == 0) {
                    IValueMeta valueMeta = rowMeta.getValueMeta(i);
                    throw new RuntimeException("Unable to find field '" + valueMeta.getName() + "'");
                }
            }
            simpleDateTimeFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss");
            simpleDateTimeFormat.setLenient(true);
            simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd");
            simpleDateFormat.setLenient(true);
            Metrics.counter(Pipeline.METRIC_NAME_INIT, transformName).inc();
        }
        inputCounter.inc();
        // Convert to the requested Hop Data types
        // 
        Object[] row = RowDataUtil.allocateRowData(rowMeta.size());
        for (int index = 0; index < rowMeta.size(); index++) {
            IValueMeta valueMeta = rowMeta.getValueMeta(index);
            Object srcData = record.get(valueMeta.getName());
            if (srcData != null) {
                switch(valueMeta.getType()) {
                    case IValueMeta.TYPE_STRING:
                        row[index] = srcData.toString();
                        break;
                    case IValueMeta.TYPE_INTEGER:
                        row[index] = (Long) srcData;
                        break;
                    case IValueMeta.TYPE_NUMBER:
                        row[index] = (Double) srcData;
                        break;
                    case IValueMeta.TYPE_BOOLEAN:
                        row[index] = (Boolean) srcData;
                        break;
                    case IValueMeta.TYPE_DATE:
                        // We get a Long back
                        // 
                        String datetimeString = ((Utf8) srcData).toString();
                        if (datetimeString.length() == 10) {
                            row[index] = simpleDateFormat.parse(datetimeString);
                        } else {
                            row[index] = simpleDateTimeFormat.parse(datetimeString);
                        }
                        break;
                    default:
                        throw new RuntimeException("Conversion from Avro JSON to Hop is not yet supported for Hop data type '" + valueMeta.getTypeDesc() + "'");
                }
            }
        }
        // Pass the row to the process context
        // 
        writtenCounter.inc();
        return new HopRow(row);
    } catch (Exception e) {
        errorCounter.inc();
        LOG.error("Error converting BQ Avro data into Hop rows : " + e.getMessage());
        throw new RuntimeException("Error converting BQ Avro data into Hop rows", e);
    }
}
Also used : TableSchema(com.google.api.services.bigquery.model.TableSchema) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) IValueMeta(org.apache.hop.core.row.IValueMeta) Utf8(org.apache.avro.util.Utf8) GenericRecord(org.apache.avro.generic.GenericRecord) HopRow(org.apache.hop.beam.core.HopRow) SimpleDateFormat(java.text.SimpleDateFormat)

Example 2 with HopRow

use of org.apache.hop.beam.core.HopRow in project hop by apache.

the class GroupByFn method processElement.

@ProcessElement
public void processElement(ProcessContext processContext) {
    try {
        // Get a KV
        // 
        KV<HopRow, Iterable<HopRow>> inputElement = processContext.element();
        // Get the key row
        // 
        HopRow groupHopRow = inputElement.getKey();
        Object[] groupRow = groupHopRow.getRow();
        // Initialize the aggregation results for this window
        // 
        Object[] results = new Object[aggregationTypes.length];
        long[] counts = new long[aggregationTypes.length];
        for (int i = 0; i < results.length; i++) {
            results[i] = null;
            counts[i] = 0L;
        }
        Iterable<HopRow> subjectHopRows = inputElement.getValue();
        for (HopRow subjectHopRow : subjectHopRows) {
            Object[] subjectRow = subjectHopRow.getRow();
            readCounter.inc();
            // 
            for (int i = 0; i < aggregationTypes.length; i++) {
                IValueMeta subjectValueMeta = subjectRowMeta.getValueMeta(i);
                Object subject = subjectRow[i];
                Object result = results[i];
                switch(aggregationTypes[i]) {
                    case AVERAGE:
                        // 
                        if (!subjectValueMeta.isNull(subject)) {
                            counts[i]++;
                        }
                    case SUM:
                        {
                            if (result == null) {
                                result = subject;
                            } else {
                                switch(subjectValueMeta.getType()) {
                                    case IValueMeta.TYPE_INTEGER:
                                        result = (Long) result + (Long) subject;
                                        break;
                                    case IValueMeta.TYPE_NUMBER:
                                        result = (Double) result + (Double) subject;
                                        break;
                                    default:
                                        throw new HopException("SUM aggregation not yet implemented for field and data type : " + subjectValueMeta.toString());
                                }
                            }
                        }
                        break;
                    case COUNT_ALL:
                        if (subject != null) {
                            if (result == null) {
                                result = Long.valueOf(1L);
                            } else {
                                result = (Long) result + 1L;
                            }
                        }
                        break;
                    case MIN:
                        if (subjectValueMeta.isNull(result)) {
                            // Previous result was null?  Then take the subject
                            result = subject;
                        } else {
                            if (subjectValueMeta.compare(subject, result) < 0) {
                                result = subject;
                            }
                        }
                        break;
                    case MAX:
                        if (subjectValueMeta.isNull(result)) {
                            // Previous result was null?  Then take the subject
                            result = subject;
                        } else {
                            if (subjectValueMeta.compare(subject, result) > 0) {
                                result = subject;
                            }
                        }
                        break;
                    case FIRST_INCL_NULL:
                        if (counts[i] == 0) {
                            counts[i]++;
                            result = subject;
                        }
                        break;
                    case LAST_INCL_NULL:
                        result = subject;
                        break;
                    case FIRST:
                        if (!subjectValueMeta.isNull(subject) && counts[i] == 0) {
                            counts[i]++;
                            result = subject;
                        }
                        break;
                    case LAST:
                        if (!subjectValueMeta.isNull(subject)) {
                            result = subject;
                        }
                        break;
                    default:
                        throw new HopException("Sorry, aggregation type yet: " + aggregationTypes[i].name() + " isn't implemented yet");
                }
                results[i] = result;
            }
        }
        // 
        for (int i = 0; i < results.length; i++) {
            IValueMeta subjectValueMeta = subjectRowMeta.getValueMeta(i);
            switch(aggregationTypes[i]) {
                case AVERAGE:
                    switch(subjectValueMeta.getType()) {
                        case IValueMeta.TYPE_NUMBER:
                            double dbl = (Double) results[i];
                            if (counts[i] != 0) {
                                dbl /= counts[i];
                            }
                            results[i] = dbl;
                            break;
                        case IValueMeta.TYPE_INTEGER:
                            long lng = (Long) results[i];
                            if (counts[i] != 0) {
                                lng /= counts[i];
                            }
                            results[i] = lng;
                            break;
                        case IValueMeta.TYPE_BIGNUMBER:
                            BigDecimal bd = (BigDecimal) results[i];
                            if (counts[i] != 0) {
                                bd = bd.divide(BigDecimal.valueOf(counts[i]));
                            }
                            results[i] = bd;
                        default:
                            throw new HopException("Unable to calculate average on data type : " + subjectValueMeta.getTypeDesc());
                    }
            }
        }
        // Now we have the results
        // Concatenate both group and result...
        // 
        Object[] resultRow = RowDataUtil.allocateRowData(groupRowMeta.size() + subjectRowMeta.size());
        int index = 0;
        for (int i = 0; i < groupRowMeta.size(); i++) {
            resultRow[index++] = groupRow[i];
        }
        for (int i = 0; i < subjectRowMeta.size(); i++) {
            resultRow[index++] = results[i];
        }
        // Send it on its way
        // 
        processContext.output(new HopRow(resultRow));
        writtenCounter.inc();
    } catch (Exception e) {
        errorCounter.inc();
        LOG.error("Error grouping by ", e);
        throw new RuntimeException("Unable to split row into group and subject ", e);
    }
}
Also used : HopException(org.apache.hop.core.exception.HopException) BigDecimal(java.math.BigDecimal) HopException(org.apache.hop.core.exception.HopException) IValueMeta(org.apache.hop.core.row.IValueMeta) HopRow(org.apache.hop.beam.core.HopRow)

Example 3 with HopRow

use of org.apache.hop.beam.core.HopRow in project hop by apache.

the class HopKeyValueFn method processElement.

@ProcessElement
public void processElement(ProcessContext processContext) {
    try {
        // Get an input row
        // 
        HopRow inputHopRow = processContext.element();
        readCounter.inc();
        Object[] inputRow = inputHopRow.getRow();
        // Copy over the data...
        // 
        Object[] keyRow = RowDataUtil.allocateRowData(keyIndexes.length);
        for (int i = 0; i < keyIndexes.length; i++) {
            keyRow[i] = inputRow[keyIndexes[i]];
        }
        // Copy over the values...
        // 
        Object[] valueRow = RowDataUtil.allocateRowData(valueIndexes.length);
        for (int i = 0; i < valueIndexes.length; i++) {
            valueRow[i] = inputRow[valueIndexes[i]];
        }
        KV<HopRow, HopRow> keyValue = KV.of(new HopRow(keyRow), new HopRow(valueRow));
        processContext.output(keyValue);
    } catch (Exception e) {
        errorCounter.inc();
        LOG.error("Error splitting row into key and value", e);
        throw new RuntimeException("Unable to split row into key and value", e);
    }
}
Also used : HopRow(org.apache.hop.beam.core.HopRow) HopException(org.apache.hop.core.exception.HopException)

Example 4 with HopRow

use of org.apache.hop.beam.core.HopRow in project hop by apache.

the class PublishMessagesFn method processElement.

@ProcessElement
public void processElement(ProcessContext processContext) {
    try {
        HopRow hopRow = processContext.element();
        readCounter.inc();
        try {
            byte[] bytes = rowMeta.getBinary(hopRow.getRow(), fieldIndex);
            PubsubMessage message = new PubsubMessage(bytes, new HashMap<>());
            processContext.output(message);
            outputCounter.inc();
        } catch (Exception e) {
            throw new RuntimeException("Unable to pass message", e);
        }
    } catch (Exception e) {
        numErrors.inc();
        LOG.error("Error in pub/sub publish messages function", e);
        throw new RuntimeException("Error in pub/sub publish messages function", e);
    }
}
Also used : HopRow(org.apache.hop.beam.core.HopRow) PubsubMessage(org.apache.beam.sdk.io.gcp.pubsub.PubsubMessage)

Example 5 with HopRow

use of org.apache.hop.beam.core.HopRow in project hop by apache.

the class PublishStringsFn method processElement.

@ProcessElement
public void processElement(ProcessContext processContext) {
    try {
        HopRow hopRow = processContext.element();
        readCounter.inc();
        try {
            String string = rowMeta.getString(hopRow.getRow(), fieldIndex);
            processContext.output(string);
            outputCounter.inc();
        } catch (Exception e) {
            throw new RuntimeException("Unable to pass string", e);
        }
    } catch (Exception e) {
        numErrors.inc();
        LOG.error("Error in pub/sub publish messages function", e);
        throw new RuntimeException("Error in pub/sub publish messages function", e);
    }
}
Also used : HopRow(org.apache.hop.beam.core.HopRow)

Aggregations

HopRow (org.apache.hop.beam.core.HopRow)35 HopException (org.apache.hop.core.exception.HopException)14 IRowMeta (org.apache.hop.core.row.IRowMeta)10 ArrayList (java.util.ArrayList)6 KV (org.apache.beam.sdk.values.KV)5 PCollection (org.apache.beam.sdk.values.PCollection)5 IValueMeta (org.apache.hop.core.row.IValueMeta)5 RowMeta (org.apache.hop.core.row.RowMeta)5 JsonRowMeta (org.apache.hop.beam.core.util.JsonRowMeta)4 TransformMeta (org.apache.hop.pipeline.transform.TransformMeta)4 Date (java.util.Date)3 GenericRecord (org.apache.avro.generic.GenericRecord)3 TableFieldSchema (com.google.api.services.bigquery.model.TableFieldSchema)2 TableReference (com.google.api.services.bigquery.model.TableReference)2 TableSchema (com.google.api.services.bigquery.model.TableSchema)2 ByteArrayInputStream (java.io.ByteArrayInputStream)2 Utf8 (org.apache.avro.util.Utf8)2 BigQueryIO (org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO)2 PubsubMessage (org.apache.beam.sdk.io.gcp.pubsub.PubsubMessage)2 HopKeyValueFn (org.apache.hop.beam.core.fn.HopKeyValueFn)2