Search in sources :

Example 1 with MemoryGroupByMeta

use of org.apache.hop.pipeline.transforms.memgroupby.MemoryGroupByMeta in project hop by apache.

the class BeamPipelineMetaUtil method generateStreamLookupPipelineMeta.

public static final PipelineMeta generateStreamLookupPipelineMeta(String transname, String inputTransformName, String outputTransformName, IHopMetadataProvider metadataProvider) throws Exception {
    IHopMetadataSerializer<FileDefinition> serializer = metadataProvider.getSerializer(FileDefinition.class);
    FileDefinition customerFileDefinition = createCustomersInputFileDefinition();
    serializer.save(customerFileDefinition);
    PipelineMeta pipelineMeta = new PipelineMeta();
    pipelineMeta.setName(transname);
    pipelineMeta.setMetadataProvider(metadataProvider);
    // Add the main io transform
    // 
    BeamInputMeta beamInputMeta = new BeamInputMeta();
    beamInputMeta.setInputLocation(PipelineTestBase.INPUT_CUSTOMERS_FILE);
    beamInputMeta.setFileDefinitionName(customerFileDefinition.getName());
    TransformMeta beamInputTransformMeta = new TransformMeta(inputTransformName, beamInputMeta);
    beamInputTransformMeta.setTransformPluginId(BeamConst.STRING_BEAM_INPUT_PLUGIN_ID);
    pipelineMeta.addTransform(beamInputTransformMeta);
    TransformMeta lookupBeamInputTransformMeta = beamInputTransformMeta;
    // Add a Memory Group By transform which will
    MemoryGroupByMeta memoryGroupByMeta = new MemoryGroupByMeta();
    memoryGroupByMeta.allocate(1, 1);
    memoryGroupByMeta.getGroupField()[0] = "stateCode";
    memoryGroupByMeta.getAggregateType()[0] = MemoryGroupByMeta.TYPE_GROUP_COUNT_ALL;
    memoryGroupByMeta.getAggregateField()[0] = "rowsPerState";
    memoryGroupByMeta.getSubjectField()[0] = "id";
    TransformMeta memoryGroupByTransformMeta = new TransformMeta("rowsPerState", memoryGroupByMeta);
    pipelineMeta.addTransform(memoryGroupByTransformMeta);
    pipelineMeta.addPipelineHop(new PipelineHopMeta(lookupBeamInputTransformMeta, memoryGroupByTransformMeta));
    // Add a Stream Lookup transform ...
    // 
    StreamLookupMeta streamLookupMeta = new StreamLookupMeta();
    streamLookupMeta.allocate(1, 1);
    streamLookupMeta.getKeystream()[0] = "stateCode";
    streamLookupMeta.getKeylookup()[0] = "stateCode";
    streamLookupMeta.getValue()[0] = "rowsPerState";
    streamLookupMeta.getValueName()[0] = "nrPerState";
    streamLookupMeta.getValueDefault()[0] = null;
    streamLookupMeta.getValueDefaultType()[0] = IValueMeta.TYPE_INTEGER;
    streamLookupMeta.setMemoryPreservationActive(false);
    streamLookupMeta.getTransformIOMeta().getInfoStreams().get(0).setTransformMeta(// Read from Mem.GroupBy
    memoryGroupByTransformMeta);
    TransformMeta streamLookupTransformMeta = new TransformMeta("Stream Lookup", streamLookupMeta);
    pipelineMeta.addTransform(streamLookupTransformMeta);
    pipelineMeta.addPipelineHop(// Main io
    new PipelineHopMeta(beamInputTransformMeta, streamLookupTransformMeta));
    pipelineMeta.addPipelineHop(// info stream
    new PipelineHopMeta(memoryGroupByTransformMeta, streamLookupTransformMeta));
    // Add the output transform to write results
    // 
    BeamOutputMeta beamOutputMeta = new BeamOutputMeta();
    beamOutputMeta.setOutputLocation("/tmp/customers/output/");
    beamOutputMeta.setFileDefinitionName(null);
    beamOutputMeta.setFilePrefix("stream-lookup");
    beamOutputMeta.setFileSuffix(".csv");
    // Not yet supported
    beamOutputMeta.setWindowed(false);
    TransformMeta beamOutputTransformMeta = new TransformMeta(outputTransformName, beamOutputMeta);
    beamOutputTransformMeta.setTransformPluginId("BeamOutput");
    pipelineMeta.addTransform(beamOutputTransformMeta);
    pipelineMeta.addPipelineHop(new PipelineHopMeta(streamLookupTransformMeta, beamOutputTransformMeta));
    return pipelineMeta;
}
Also used : MemoryGroupByMeta(org.apache.hop.pipeline.transforms.memgroupby.MemoryGroupByMeta) FileDefinition(org.apache.hop.beam.metadata.FileDefinition) BeamOutputMeta(org.apache.hop.beam.transforms.io.BeamOutputMeta) BeamInputMeta(org.apache.hop.beam.transforms.io.BeamInputMeta) TransformMeta(org.apache.hop.pipeline.transform.TransformMeta) StreamLookupMeta(org.apache.hop.pipeline.transforms.streamlookup.StreamLookupMeta) PipelineHopMeta(org.apache.hop.pipeline.PipelineHopMeta) PipelineMeta(org.apache.hop.pipeline.PipelineMeta)

Example 2 with MemoryGroupByMeta

use of org.apache.hop.pipeline.transforms.memgroupby.MemoryGroupByMeta in project hop by apache.

the class BeamPipelineMetaUtil method generateBeamGroupByPipelineMeta.

public static final PipelineMeta generateBeamGroupByPipelineMeta(String transname, String inputTransformName, String outputTransformName, IHopMetadataProvider metadataProvider) throws Exception {
    IHopMetadataSerializer<FileDefinition> serializer = metadataProvider.getSerializer(FileDefinition.class);
    FileDefinition customerFileDefinition = createCustomersInputFileDefinition();
    serializer.save(customerFileDefinition);
    PipelineMeta pipelineMeta = new PipelineMeta();
    pipelineMeta.setName(transname);
    pipelineMeta.setMetadataProvider(metadataProvider);
    // Add the io transform
    // 
    BeamInputMeta beamInputMeta = new BeamInputMeta();
    beamInputMeta.setInputLocation(PipelineTestBase.INPUT_CUSTOMERS_FILE);
    beamInputMeta.setFileDefinitionName(customerFileDefinition.getName());
    TransformMeta beamInputTransformMeta = new TransformMeta(inputTransformName, beamInputMeta);
    beamInputTransformMeta.setTransformPluginId(BeamConst.STRING_BEAM_INPUT_PLUGIN_ID);
    pipelineMeta.addTransform(beamInputTransformMeta);
    // Add a dummy in between to get started...
    // 
    MemoryGroupByMeta memoryGroupByMeta = new MemoryGroupByMeta();
    memoryGroupByMeta.allocate(1, 2);
    memoryGroupByMeta.getGroupField()[0] = "state";
    // count(id)
    memoryGroupByMeta.getAggregateField()[0] = "nrIds";
    memoryGroupByMeta.getSubjectField()[0] = "id";
    memoryGroupByMeta.getAggregateType()[0] = MemoryGroupByMeta.TYPE_GROUP_COUNT_ALL;
    // sum(id)
    memoryGroupByMeta.getAggregateField()[1] = "sumIds";
    memoryGroupByMeta.getSubjectField()[1] = "id";
    memoryGroupByMeta.getAggregateType()[1] = MemoryGroupByMeta.TYPE_GROUP_SUM;
    TransformMeta memoryGroupByTransformMeta = new TransformMeta("Group By", memoryGroupByMeta);
    pipelineMeta.addTransform(memoryGroupByTransformMeta);
    pipelineMeta.addPipelineHop(new PipelineHopMeta(beamInputTransformMeta, memoryGroupByTransformMeta));
    // Add the output transform
    // 
    BeamOutputMeta beamOutputMeta = new BeamOutputMeta();
    beamOutputMeta.setOutputLocation("/tmp/customers/output/");
    beamOutputMeta.setFileDefinitionName(null);
    beamOutputMeta.setFilePrefix("grouped");
    beamOutputMeta.setFileSuffix(".csv");
    // Not yet supported
    beamOutputMeta.setWindowed(false);
    TransformMeta beamOutputTransformMeta = new TransformMeta(outputTransformName, beamOutputMeta);
    beamOutputTransformMeta.setTransformPluginId("BeamOutput");
    pipelineMeta.addTransform(beamOutputTransformMeta);
    pipelineMeta.addPipelineHop(new PipelineHopMeta(memoryGroupByTransformMeta, beamOutputTransformMeta));
    return pipelineMeta;
}
Also used : MemoryGroupByMeta(org.apache.hop.pipeline.transforms.memgroupby.MemoryGroupByMeta) FileDefinition(org.apache.hop.beam.metadata.FileDefinition) BeamOutputMeta(org.apache.hop.beam.transforms.io.BeamOutputMeta) BeamInputMeta(org.apache.hop.beam.transforms.io.BeamInputMeta) TransformMeta(org.apache.hop.pipeline.transform.TransformMeta) PipelineHopMeta(org.apache.hop.pipeline.PipelineHopMeta) PipelineMeta(org.apache.hop.pipeline.PipelineMeta)

Example 3 with MemoryGroupByMeta

use of org.apache.hop.pipeline.transforms.memgroupby.MemoryGroupByMeta in project hop by apache.

the class MemoryGroupByMetaGetFieldsTest method setup.

@Before
public void setup() throws HopPluginException {
    mockSpace = mock(IVariables.class);
    doReturn("N").when(mockSpace).getVariable(any(), anyString());
    rowMeta = spy(new RowMeta());
    memoryGroupByMeta = spy(new MemoryGroupByMeta());
    mockStatic(ValueMetaFactory.class);
    when(ValueMetaFactory.createValueMeta(anyInt())).thenCallRealMethod();
    when(ValueMetaFactory.createValueMeta(anyString(), anyInt())).thenCallRealMethod();
    when(ValueMetaFactory.createValueMeta("maxDate", 3, -1, -1)).thenReturn(new ValueMetaDate("maxDate"));
    when(ValueMetaFactory.createValueMeta("minDate", 3, -1, -1)).thenReturn(new ValueMetaDate("minDate"));
    when(ValueMetaFactory.createValueMeta("countDate", 5, -1, -1)).thenReturn(new ValueMetaInteger("countDate"));
    when(ValueMetaFactory.getValueMetaName(3)).thenReturn("Date");
    when(ValueMetaFactory.getValueMetaName(5)).thenReturn("Integer");
}
Also used : MemoryGroupByMeta(org.apache.hop.pipeline.transforms.memgroupby.MemoryGroupByMeta) IRowMeta(org.apache.hop.core.row.IRowMeta) RowMeta(org.apache.hop.core.row.RowMeta) IVariables(org.apache.hop.core.variables.IVariables) ValueMetaInteger(org.apache.hop.core.row.value.ValueMetaInteger) ValueMetaDate(org.apache.hop.core.row.value.ValueMetaDate) Before(org.junit.Before)

Example 4 with MemoryGroupByMeta

use of org.apache.hop.pipeline.transforms.memgroupby.MemoryGroupByMeta in project hop by apache.

the class BeamGroupByTransformHandler method handleTransform.

@Override
public void handleTransform(ILogChannel log, IVariables variables, IBeamPipelineEngineRunConfiguration runConfiguration, IHopMetadataProvider metadataProvider, PipelineMeta pipelineMeta, List<String> transformPluginClasses, List<String> xpPluginClasses, TransformMeta transformMeta, Map<String, PCollection<HopRow>> transformCollectionMap, Pipeline pipeline, IRowMeta rowMeta, List<TransformMeta> previousTransforms, PCollection<HopRow> input) throws HopException {
    MemoryGroupByMeta meta = new MemoryGroupByMeta();
    loadTransformMetadata(meta, transformMeta, metadataProvider, pipelineMeta);
    String[] aggregates = new String[meta.getAggregateType().length];
    for (int i = 0; i < aggregates.length; i++) {
        aggregates[i] = MemoryGroupByMeta.getTypeDesc(meta.getAggregateType()[i]);
    }
    PTransform<PCollection<HopRow>, PCollection<HopRow>> transformTransform = new GroupByTransform(transformMeta.getName(), // The io row
    JsonRowMeta.toJson(rowMeta), transformPluginClasses, xpPluginClasses, meta.getGroupField(), meta.getSubjectField(), aggregates, meta.getAggregateField());
    // Apply the transform transform to the previous io transform PCollection(s)
    // 
    PCollection<HopRow> transformPCollection = input.apply(transformMeta.getName(), transformTransform);
    // Save this in the map
    // 
    transformCollectionMap.put(transformMeta.getName(), transformPCollection);
    log.logBasic("Handled Group By (TRANSFORM) : " + transformMeta.getName() + ", gets data from " + previousTransforms.size() + " previous transform(s)");
}
Also used : MemoryGroupByMeta(org.apache.hop.pipeline.transforms.memgroupby.MemoryGroupByMeta) PCollection(org.apache.beam.sdk.values.PCollection) HopRow(org.apache.hop.beam.core.HopRow) GroupByTransform(org.apache.hop.beam.core.transform.GroupByTransform)

Aggregations

MemoryGroupByMeta (org.apache.hop.pipeline.transforms.memgroupby.MemoryGroupByMeta)4 FileDefinition (org.apache.hop.beam.metadata.FileDefinition)2 BeamInputMeta (org.apache.hop.beam.transforms.io.BeamInputMeta)2 BeamOutputMeta (org.apache.hop.beam.transforms.io.BeamOutputMeta)2 PipelineHopMeta (org.apache.hop.pipeline.PipelineHopMeta)2 PipelineMeta (org.apache.hop.pipeline.PipelineMeta)2 TransformMeta (org.apache.hop.pipeline.transform.TransformMeta)2 PCollection (org.apache.beam.sdk.values.PCollection)1 HopRow (org.apache.hop.beam.core.HopRow)1 GroupByTransform (org.apache.hop.beam.core.transform.GroupByTransform)1 IRowMeta (org.apache.hop.core.row.IRowMeta)1 RowMeta (org.apache.hop.core.row.RowMeta)1 ValueMetaDate (org.apache.hop.core.row.value.ValueMetaDate)1 ValueMetaInteger (org.apache.hop.core.row.value.ValueMetaInteger)1 IVariables (org.apache.hop.core.variables.IVariables)1 StreamLookupMeta (org.apache.hop.pipeline.transforms.streamlookup.StreamLookupMeta)1 Before (org.junit.Before)1