Search in sources :

Example 1 with GroupByMeta

use of org.pentaho.di.trans.steps.groupby.GroupByMeta in project pentaho-metaverse by pentaho.

the class MetaverseValidationIT method testGroupByStep.

@Test
public void testGroupByStep() throws Exception {
    GroupByStepNode groupByStepNode = root.getGroupByStepNode();
    assertNotNull(groupByStepNode);
    GroupByMeta meta = (GroupByMeta) getStepMeta(groupByStepNode);
    int countUses = getIterableSize(groupByStepNode.getStreamFieldNodesUses());
    int expectedUsesLinksCount = meta.getSubjectField().length + meta.getGroupField().length;
    assertEquals(expectedUsesLinksCount, countUses);
}
Also used : GroupByMeta(org.pentaho.di.trans.steps.groupby.GroupByMeta) GroupByStepNode(org.pentaho.metaverse.frames.GroupByStepNode) Test(org.junit.Test)

Example 2 with GroupByMeta

use of org.pentaho.di.trans.steps.groupby.GroupByMeta in project pentaho-kettle by pentaho.

the class TransProfileFactory method generateTransformation.

public TransMeta generateTransformation(LoggingObjectInterface parentLoggingInterface) throws KettleException {
    PluginRegistry registry = PluginRegistry.getInstance();
    // Get the list of fields from the table...
    // 
    tableLayout = getTableFields(parentLoggingInterface);
    // Now start building the transformation...
    // 
    TransMeta transMeta = new TransMeta(databaseMeta);
    transMeta.addDatabase(databaseMeta);
    // Create a step to read the content of the table
    // Read the data from the database table...
    // For now we read it all, later we add options to only read the first X rows
    // 
    TableInputMeta readMeta = new TableInputMeta();
    readMeta.setSQL("SELECT * FROM " + schemaTable);
    readMeta.setDatabaseMeta(databaseMeta);
    StepMeta read = new StepMeta(registry.getPluginId(StepPluginType.class, readMeta), "Read data", readMeta);
    read.setLocation(50, 50);
    read.setDraw(true);
    transMeta.addStep(read);
    // Grab the data types too
    // 
    // Now calculate the requested statistics for all fields...
    // TODO: create configuration possibility
    // For now, just do : min, max, sum, count, avg, std dev. (7)
    // 
    int[] numericCalculations = new int[] { GroupByMeta.TYPE_GROUP_MIN, GroupByMeta.TYPE_GROUP_MAX, GroupByMeta.TYPE_GROUP_SUM, GroupByMeta.TYPE_GROUP_COUNT_ALL, GroupByMeta.TYPE_GROUP_AVERAGE, GroupByMeta.TYPE_GROUP_STANDARD_DEVIATION };
    int[] stringCalculations = new int[] { GroupByMeta.TYPE_GROUP_MIN, GroupByMeta.TYPE_GROUP_MAX, GroupByMeta.TYPE_GROUP_COUNT_ALL };
    int[] dateCalculations = new int[] { GroupByMeta.TYPE_GROUP_MIN, GroupByMeta.TYPE_GROUP_MAX, GroupByMeta.TYPE_GROUP_COUNT_ALL };
    int[] booleanCalculations = new int[] { GroupByMeta.TYPE_GROUP_MIN, GroupByMeta.TYPE_GROUP_MAX, GroupByMeta.TYPE_GROUP_COUNT_ALL };
    // Run it through the "group by" step without a grouping.
    // Later, we can use the UnivariateStats plugin/step perhaps.
    // 
    GroupByMeta statsMeta = new GroupByMeta();
    int nrNumeric = 0;
    int nrDates = 0;
    int nrStrings = 0;
    int nrBooleans = 0;
    for (ValueMetaInterface valueMeta : tableLayout.getValueMetaList()) {
        if (valueMeta.isNumeric()) {
            nrNumeric++;
        }
        if (valueMeta.isDate()) {
            nrDates++;
        }
        if (valueMeta.isString()) {
            nrStrings++;
        }
        if (valueMeta.isBoolean()) {
            nrBooleans++;
        }
    }
    int nrCalculations = nrNumeric * numericCalculations.length + nrDates * dateCalculations.length + nrStrings * stringCalculations.length + nrBooleans * booleanCalculations.length;
    statsMeta.allocate(0, nrCalculations);
    int calcIndex = 0;
    for (int i = 0; i < tableLayout.size(); i++) {
        ValueMetaInterface valueMeta = tableLayout.getValueMeta(i);
        // 
        if (valueMeta.isNumeric()) {
            // CHECKSTYLE:LineLength:OFF
            for (int c = 0; c < numericCalculations.length; c++) {
                statsMeta.getAggregateField()[calcIndex] = valueMeta.getName() + "(" + GroupByMeta.getTypeDesc(numericCalculations[c]) + ")";
                statsMeta.getSubjectField()[calcIndex] = valueMeta.getName();
                statsMeta.getAggregateType()[calcIndex] = numericCalculations[c];
                calcIndex++;
            }
        }
        // 
        if (valueMeta.isString()) {
            // CHECKSTYLE:LineLength:OFF
            for (int c = 0; c < stringCalculations.length; c++) {
                statsMeta.getAggregateField()[calcIndex] = valueMeta.getName() + "(" + GroupByMeta.getTypeDesc(stringCalculations[c]) + ")";
                statsMeta.getSubjectField()[calcIndex] = valueMeta.getName();
                statsMeta.getAggregateType()[calcIndex] = stringCalculations[c];
                calcIndex++;
            }
        }
        // 
        if (valueMeta.isDate()) {
            for (int c = 0; c < dateCalculations.length; c++) {
                statsMeta.getAggregateField()[calcIndex] = valueMeta.getName() + "(" + GroupByMeta.getTypeDesc(dateCalculations[c]) + ")";
                statsMeta.getSubjectField()[calcIndex] = valueMeta.getName();
                statsMeta.getAggregateType()[calcIndex] = dateCalculations[c];
                calcIndex++;
            }
        }
        // 
        if (valueMeta.isBoolean()) {
            for (int c = 0; c < booleanCalculations.length; c++) {
                statsMeta.getAggregateField()[calcIndex] = valueMeta.getName() + "(" + GroupByMeta.getTypeDesc(booleanCalculations[c]) + ")";
                statsMeta.getSubjectField()[calcIndex] = valueMeta.getName();
                statsMeta.getAggregateType()[calcIndex] = booleanCalculations[c];
                calcIndex++;
            }
        }
    }
    StepMeta calc = new StepMeta(registry.getPluginId(StepPluginType.class, statsMeta), "Calc", statsMeta);
    calc.setLocation(250, 50);
    calc.setDraw(true);
    transMeta.addStep(calc);
    TransHopMeta hop = new TransHopMeta(read, calc);
    transMeta.addTransHop(hop);
    DummyTransMeta dummyMeta = new DummyTransMeta();
    StepMeta result = new StepMeta(registry.getPluginId(StepPluginType.class, dummyMeta), RESULT_STEP_NAME, dummyMeta);
    result.setLocation(450, 50);
    result.setDraw(true);
    transMeta.addStep(result);
    TransHopMeta hop2 = new TransHopMeta(calc, result);
    transMeta.addTransHop(hop2);
    return transMeta;
}
Also used : GroupByMeta(org.pentaho.di.trans.steps.groupby.GroupByMeta) StepPluginType(org.pentaho.di.core.plugins.StepPluginType) PluginRegistry(org.pentaho.di.core.plugins.PluginRegistry) DummyTransMeta(org.pentaho.di.trans.steps.dummytrans.DummyTransMeta) StepMeta(org.pentaho.di.trans.step.StepMeta) TableInputMeta(org.pentaho.di.trans.steps.tableinput.TableInputMeta) ValueMetaInterface(org.pentaho.di.core.row.ValueMetaInterface) DummyTransMeta(org.pentaho.di.trans.steps.dummytrans.DummyTransMeta)

Aggregations

GroupByMeta (org.pentaho.di.trans.steps.groupby.GroupByMeta)2 Test (org.junit.Test)1 PluginRegistry (org.pentaho.di.core.plugins.PluginRegistry)1 StepPluginType (org.pentaho.di.core.plugins.StepPluginType)1 ValueMetaInterface (org.pentaho.di.core.row.ValueMetaInterface)1 StepMeta (org.pentaho.di.trans.step.StepMeta)1 DummyTransMeta (org.pentaho.di.trans.steps.dummytrans.DummyTransMeta)1 TableInputMeta (org.pentaho.di.trans.steps.tableinput.TableInputMeta)1 GroupByStepNode (org.pentaho.metaverse.frames.GroupByStepNode)1