use of org.pentaho.di.trans.steps.groupby.GroupByMeta in project pentaho-metaverse by pentaho.
the class MetaverseValidationIT method testGroupByStep.
@Test
public void testGroupByStep() throws Exception {
GroupByStepNode groupByStepNode = root.getGroupByStepNode();
assertNotNull(groupByStepNode);
GroupByMeta meta = (GroupByMeta) getStepMeta(groupByStepNode);
int countUses = getIterableSize(groupByStepNode.getStreamFieldNodesUses());
int expectedUsesLinksCount = meta.getSubjectField().length + meta.getGroupField().length;
assertEquals(expectedUsesLinksCount, countUses);
}
use of org.pentaho.di.trans.steps.groupby.GroupByMeta in project pentaho-kettle by pentaho.
the class TransProfileFactory method generateTransformation.
public TransMeta generateTransformation(LoggingObjectInterface parentLoggingInterface) throws KettleException {
PluginRegistry registry = PluginRegistry.getInstance();
// Get the list of fields from the table...
//
tableLayout = getTableFields(parentLoggingInterface);
// Now start building the transformation...
//
TransMeta transMeta = new TransMeta(databaseMeta);
transMeta.addDatabase(databaseMeta);
// Create a step to read the content of the table
// Read the data from the database table...
// For now we read it all, later we add options to only read the first X rows
//
TableInputMeta readMeta = new TableInputMeta();
readMeta.setSQL("SELECT * FROM " + schemaTable);
readMeta.setDatabaseMeta(databaseMeta);
StepMeta read = new StepMeta(registry.getPluginId(StepPluginType.class, readMeta), "Read data", readMeta);
read.setLocation(50, 50);
read.setDraw(true);
transMeta.addStep(read);
// Grab the data types too
//
// Now calculate the requested statistics for all fields...
// TODO: create configuration possibility
// For now, just do : min, max, sum, count, avg, std dev. (7)
//
int[] numericCalculations = new int[] { GroupByMeta.TYPE_GROUP_MIN, GroupByMeta.TYPE_GROUP_MAX, GroupByMeta.TYPE_GROUP_SUM, GroupByMeta.TYPE_GROUP_COUNT_ALL, GroupByMeta.TYPE_GROUP_AVERAGE, GroupByMeta.TYPE_GROUP_STANDARD_DEVIATION };
int[] stringCalculations = new int[] { GroupByMeta.TYPE_GROUP_MIN, GroupByMeta.TYPE_GROUP_MAX, GroupByMeta.TYPE_GROUP_COUNT_ALL };
int[] dateCalculations = new int[] { GroupByMeta.TYPE_GROUP_MIN, GroupByMeta.TYPE_GROUP_MAX, GroupByMeta.TYPE_GROUP_COUNT_ALL };
int[] booleanCalculations = new int[] { GroupByMeta.TYPE_GROUP_MIN, GroupByMeta.TYPE_GROUP_MAX, GroupByMeta.TYPE_GROUP_COUNT_ALL };
// Run it through the "group by" step without a grouping.
// Later, we can use the UnivariateStats plugin/step perhaps.
//
GroupByMeta statsMeta = new GroupByMeta();
int nrNumeric = 0;
int nrDates = 0;
int nrStrings = 0;
int nrBooleans = 0;
for (ValueMetaInterface valueMeta : tableLayout.getValueMetaList()) {
if (valueMeta.isNumeric()) {
nrNumeric++;
}
if (valueMeta.isDate()) {
nrDates++;
}
if (valueMeta.isString()) {
nrStrings++;
}
if (valueMeta.isBoolean()) {
nrBooleans++;
}
}
int nrCalculations = nrNumeric * numericCalculations.length + nrDates * dateCalculations.length + nrStrings * stringCalculations.length + nrBooleans * booleanCalculations.length;
statsMeta.allocate(0, nrCalculations);
int calcIndex = 0;
for (int i = 0; i < tableLayout.size(); i++) {
ValueMetaInterface valueMeta = tableLayout.getValueMeta(i);
//
if (valueMeta.isNumeric()) {
// CHECKSTYLE:LineLength:OFF
for (int c = 0; c < numericCalculations.length; c++) {
statsMeta.getAggregateField()[calcIndex] = valueMeta.getName() + "(" + GroupByMeta.getTypeDesc(numericCalculations[c]) + ")";
statsMeta.getSubjectField()[calcIndex] = valueMeta.getName();
statsMeta.getAggregateType()[calcIndex] = numericCalculations[c];
calcIndex++;
}
}
//
if (valueMeta.isString()) {
// CHECKSTYLE:LineLength:OFF
for (int c = 0; c < stringCalculations.length; c++) {
statsMeta.getAggregateField()[calcIndex] = valueMeta.getName() + "(" + GroupByMeta.getTypeDesc(stringCalculations[c]) + ")";
statsMeta.getSubjectField()[calcIndex] = valueMeta.getName();
statsMeta.getAggregateType()[calcIndex] = stringCalculations[c];
calcIndex++;
}
}
//
if (valueMeta.isDate()) {
for (int c = 0; c < dateCalculations.length; c++) {
statsMeta.getAggregateField()[calcIndex] = valueMeta.getName() + "(" + GroupByMeta.getTypeDesc(dateCalculations[c]) + ")";
statsMeta.getSubjectField()[calcIndex] = valueMeta.getName();
statsMeta.getAggregateType()[calcIndex] = dateCalculations[c];
calcIndex++;
}
}
//
if (valueMeta.isBoolean()) {
for (int c = 0; c < booleanCalculations.length; c++) {
statsMeta.getAggregateField()[calcIndex] = valueMeta.getName() + "(" + GroupByMeta.getTypeDesc(booleanCalculations[c]) + ")";
statsMeta.getSubjectField()[calcIndex] = valueMeta.getName();
statsMeta.getAggregateType()[calcIndex] = booleanCalculations[c];
calcIndex++;
}
}
}
StepMeta calc = new StepMeta(registry.getPluginId(StepPluginType.class, statsMeta), "Calc", statsMeta);
calc.setLocation(250, 50);
calc.setDraw(true);
transMeta.addStep(calc);
TransHopMeta hop = new TransHopMeta(read, calc);
transMeta.addTransHop(hop);
DummyTransMeta dummyMeta = new DummyTransMeta();
StepMeta result = new StepMeta(registry.getPluginId(StepPluginType.class, dummyMeta), RESULT_STEP_NAME, dummyMeta);
result.setLocation(450, 50);
result.setDraw(true);
transMeta.addStep(result);
TransHopMeta hop2 = new TransHopMeta(calc, result);
transMeta.addTransHop(hop2);
return transMeta;
}
Aggregations