use of org.apache.hop.pipeline.transforms.memgroupby.MemoryGroupByMeta in project hop by apache.
the class BeamPipelineMetaUtil method generateStreamLookupPipelineMeta.
public static final PipelineMeta generateStreamLookupPipelineMeta(String transname, String inputTransformName, String outputTransformName, IHopMetadataProvider metadataProvider) throws Exception {
IHopMetadataSerializer<FileDefinition> serializer = metadataProvider.getSerializer(FileDefinition.class);
FileDefinition customerFileDefinition = createCustomersInputFileDefinition();
serializer.save(customerFileDefinition);
PipelineMeta pipelineMeta = new PipelineMeta();
pipelineMeta.setName(transname);
pipelineMeta.setMetadataProvider(metadataProvider);
// Add the main io transform
//
BeamInputMeta beamInputMeta = new BeamInputMeta();
beamInputMeta.setInputLocation(PipelineTestBase.INPUT_CUSTOMERS_FILE);
beamInputMeta.setFileDefinitionName(customerFileDefinition.getName());
TransformMeta beamInputTransformMeta = new TransformMeta(inputTransformName, beamInputMeta);
beamInputTransformMeta.setTransformPluginId(BeamConst.STRING_BEAM_INPUT_PLUGIN_ID);
pipelineMeta.addTransform(beamInputTransformMeta);
TransformMeta lookupBeamInputTransformMeta = beamInputTransformMeta;
// Add a Memory Group By transform which will
MemoryGroupByMeta memoryGroupByMeta = new MemoryGroupByMeta();
memoryGroupByMeta.allocate(1, 1);
memoryGroupByMeta.getGroupField()[0] = "stateCode";
memoryGroupByMeta.getAggregateType()[0] = MemoryGroupByMeta.TYPE_GROUP_COUNT_ALL;
memoryGroupByMeta.getAggregateField()[0] = "rowsPerState";
memoryGroupByMeta.getSubjectField()[0] = "id";
TransformMeta memoryGroupByTransformMeta = new TransformMeta("rowsPerState", memoryGroupByMeta);
pipelineMeta.addTransform(memoryGroupByTransformMeta);
pipelineMeta.addPipelineHop(new PipelineHopMeta(lookupBeamInputTransformMeta, memoryGroupByTransformMeta));
// Add a Stream Lookup transform ...
//
StreamLookupMeta streamLookupMeta = new StreamLookupMeta();
streamLookupMeta.allocate(1, 1);
streamLookupMeta.getKeystream()[0] = "stateCode";
streamLookupMeta.getKeylookup()[0] = "stateCode";
streamLookupMeta.getValue()[0] = "rowsPerState";
streamLookupMeta.getValueName()[0] = "nrPerState";
streamLookupMeta.getValueDefault()[0] = null;
streamLookupMeta.getValueDefaultType()[0] = IValueMeta.TYPE_INTEGER;
streamLookupMeta.setMemoryPreservationActive(false);
streamLookupMeta.getTransformIOMeta().getInfoStreams().get(0).setTransformMeta(// Read from Mem.GroupBy
memoryGroupByTransformMeta);
TransformMeta streamLookupTransformMeta = new TransformMeta("Stream Lookup", streamLookupMeta);
pipelineMeta.addTransform(streamLookupTransformMeta);
pipelineMeta.addPipelineHop(// Main io
new PipelineHopMeta(beamInputTransformMeta, streamLookupTransformMeta));
pipelineMeta.addPipelineHop(// info stream
new PipelineHopMeta(memoryGroupByTransformMeta, streamLookupTransformMeta));
// Add the output transform to write results
//
BeamOutputMeta beamOutputMeta = new BeamOutputMeta();
beamOutputMeta.setOutputLocation("/tmp/customers/output/");
beamOutputMeta.setFileDefinitionName(null);
beamOutputMeta.setFilePrefix("stream-lookup");
beamOutputMeta.setFileSuffix(".csv");
// Not yet supported
beamOutputMeta.setWindowed(false);
TransformMeta beamOutputTransformMeta = new TransformMeta(outputTransformName, beamOutputMeta);
beamOutputTransformMeta.setTransformPluginId("BeamOutput");
pipelineMeta.addTransform(beamOutputTransformMeta);
pipelineMeta.addPipelineHop(new PipelineHopMeta(streamLookupTransformMeta, beamOutputTransformMeta));
return pipelineMeta;
}
use of org.apache.hop.pipeline.transforms.memgroupby.MemoryGroupByMeta in project hop by apache.
the class BeamPipelineMetaUtil method generateBeamGroupByPipelineMeta.
public static final PipelineMeta generateBeamGroupByPipelineMeta(String transname, String inputTransformName, String outputTransformName, IHopMetadataProvider metadataProvider) throws Exception {
IHopMetadataSerializer<FileDefinition> serializer = metadataProvider.getSerializer(FileDefinition.class);
FileDefinition customerFileDefinition = createCustomersInputFileDefinition();
serializer.save(customerFileDefinition);
PipelineMeta pipelineMeta = new PipelineMeta();
pipelineMeta.setName(transname);
pipelineMeta.setMetadataProvider(metadataProvider);
// Add the io transform
//
BeamInputMeta beamInputMeta = new BeamInputMeta();
beamInputMeta.setInputLocation(PipelineTestBase.INPUT_CUSTOMERS_FILE);
beamInputMeta.setFileDefinitionName(customerFileDefinition.getName());
TransformMeta beamInputTransformMeta = new TransformMeta(inputTransformName, beamInputMeta);
beamInputTransformMeta.setTransformPluginId(BeamConst.STRING_BEAM_INPUT_PLUGIN_ID);
pipelineMeta.addTransform(beamInputTransformMeta);
// Add a dummy in between to get started...
//
MemoryGroupByMeta memoryGroupByMeta = new MemoryGroupByMeta();
memoryGroupByMeta.allocate(1, 2);
memoryGroupByMeta.getGroupField()[0] = "state";
// count(id)
memoryGroupByMeta.getAggregateField()[0] = "nrIds";
memoryGroupByMeta.getSubjectField()[0] = "id";
memoryGroupByMeta.getAggregateType()[0] = MemoryGroupByMeta.TYPE_GROUP_COUNT_ALL;
// sum(id)
memoryGroupByMeta.getAggregateField()[1] = "sumIds";
memoryGroupByMeta.getSubjectField()[1] = "id";
memoryGroupByMeta.getAggregateType()[1] = MemoryGroupByMeta.TYPE_GROUP_SUM;
TransformMeta memoryGroupByTransformMeta = new TransformMeta("Group By", memoryGroupByMeta);
pipelineMeta.addTransform(memoryGroupByTransformMeta);
pipelineMeta.addPipelineHop(new PipelineHopMeta(beamInputTransformMeta, memoryGroupByTransformMeta));
// Add the output transform
//
BeamOutputMeta beamOutputMeta = new BeamOutputMeta();
beamOutputMeta.setOutputLocation("/tmp/customers/output/");
beamOutputMeta.setFileDefinitionName(null);
beamOutputMeta.setFilePrefix("grouped");
beamOutputMeta.setFileSuffix(".csv");
// Not yet supported
beamOutputMeta.setWindowed(false);
TransformMeta beamOutputTransformMeta = new TransformMeta(outputTransformName, beamOutputMeta);
beamOutputTransformMeta.setTransformPluginId("BeamOutput");
pipelineMeta.addTransform(beamOutputTransformMeta);
pipelineMeta.addPipelineHop(new PipelineHopMeta(memoryGroupByTransformMeta, beamOutputTransformMeta));
return pipelineMeta;
}
use of org.apache.hop.pipeline.transforms.memgroupby.MemoryGroupByMeta in project hop by apache.
the class MemoryGroupByMetaGetFieldsTest method setup.
@Before
public void setup() throws HopPluginException {
mockSpace = mock(IVariables.class);
doReturn("N").when(mockSpace).getVariable(any(), anyString());
rowMeta = spy(new RowMeta());
memoryGroupByMeta = spy(new MemoryGroupByMeta());
mockStatic(ValueMetaFactory.class);
when(ValueMetaFactory.createValueMeta(anyInt())).thenCallRealMethod();
when(ValueMetaFactory.createValueMeta(anyString(), anyInt())).thenCallRealMethod();
when(ValueMetaFactory.createValueMeta("maxDate", 3, -1, -1)).thenReturn(new ValueMetaDate("maxDate"));
when(ValueMetaFactory.createValueMeta("minDate", 3, -1, -1)).thenReturn(new ValueMetaDate("minDate"));
when(ValueMetaFactory.createValueMeta("countDate", 5, -1, -1)).thenReturn(new ValueMetaInteger("countDate"));
when(ValueMetaFactory.getValueMetaName(3)).thenReturn("Date");
when(ValueMetaFactory.getValueMetaName(5)).thenReturn("Integer");
}
use of org.apache.hop.pipeline.transforms.memgroupby.MemoryGroupByMeta in project hop by apache.
the class BeamGroupByTransformHandler method handleTransform.
@Override
public void handleTransform(ILogChannel log, IVariables variables, IBeamPipelineEngineRunConfiguration runConfiguration, IHopMetadataProvider metadataProvider, PipelineMeta pipelineMeta, List<String> transformPluginClasses, List<String> xpPluginClasses, TransformMeta transformMeta, Map<String, PCollection<HopRow>> transformCollectionMap, Pipeline pipeline, IRowMeta rowMeta, List<TransformMeta> previousTransforms, PCollection<HopRow> input) throws HopException {
MemoryGroupByMeta meta = new MemoryGroupByMeta();
loadTransformMetadata(meta, transformMeta, metadataProvider, pipelineMeta);
String[] aggregates = new String[meta.getAggregateType().length];
for (int i = 0; i < aggregates.length; i++) {
aggregates[i] = MemoryGroupByMeta.getTypeDesc(meta.getAggregateType()[i]);
}
PTransform<PCollection<HopRow>, PCollection<HopRow>> transformTransform = new GroupByTransform(transformMeta.getName(), // The io row
JsonRowMeta.toJson(rowMeta), transformPluginClasses, xpPluginClasses, meta.getGroupField(), meta.getSubjectField(), aggregates, meta.getAggregateField());
// Apply the transform transform to the previous io transform PCollection(s)
//
PCollection<HopRow> transformPCollection = input.apply(transformMeta.getName(), transformTransform);
// Save this in the map
//
transformCollectionMap.put(transformMeta.getName(), transformPCollection);
log.logBasic("Handled Group By (TRANSFORM) : " + transformMeta.getName() + ", gets data from " + previousTransforms.size() + " previous transform(s)");
}
Aggregations