use of org.pentaho.di.trans.BasePartitioner in project pentaho-kettle by pentaho.
the class BaseStep method specialPartitioning.
private void specialPartitioning(RowMetaInterface rowMeta, Object[] row) throws KettleStepException {
if (nextStepPartitioningMeta == null) {
// Look up the partitioning of the next step.
// This is the case for non-clustered partitioning...
//
List<StepMeta> nextSteps = transMeta.findNextSteps(stepMeta);
if (nextSteps.size() > 0) {
nextStepPartitioningMeta = nextSteps.get(0).getStepPartitioningMeta();
}
// TODO: throw exception if we're not partitioning yet.
// For now it throws a NP Exception.
}
int partitionNr;
try {
partitionNr = nextStepPartitioningMeta.getPartition(rowMeta, row);
} catch (KettleException e) {
throw new KettleStepException("Unable to convert a value to integer while calculating the partition number", e);
}
RowSet selectedRowSet = null;
if (clusteredPartitioningFirst) {
clusteredPartitioningFirst = false;
// We are only running remotely if both the distribution is there AND if the distribution is actually contains
// something.
//
clusteredPartitioning = transMeta.getSlaveStepCopyPartitionDistribution() != null && !transMeta.getSlaveStepCopyPartitionDistribution().getDistribution().isEmpty();
}
//
if (clusteredPartitioning) {
//
if (partitionNrRowSetList == null) {
partitionNrRowSetList = new RowSet[outputRowSets.size()];
// The distribution is calculated during transformation split
// The slave-step-copy distribution is passed onto the slave transformation
//
SlaveStepCopyPartitionDistribution distribution = transMeta.getSlaveStepCopyPartitionDistribution();
String nextPartitionSchemaName = TransSplitter.createPartitionSchemaNameFromTarget(nextStepPartitioningMeta.getPartitionSchema().getName());
for (RowSet outputRowSet : outputRowSets) {
try {
// Look at the pre-determined distribution, decided at "transformation split" time.
//
int partNr = distribution.getPartition(outputRowSet.getRemoteSlaveServerName(), nextPartitionSchemaName, outputRowSet.getDestinationStepCopy());
if (partNr < 0) {
throw new KettleStepException("Unable to find partition using rowset data, slave=" + outputRowSet.getRemoteSlaveServerName() + ", partition schema=" + nextStepPartitioningMeta.getPartitionSchema().getName() + ", copy=" + outputRowSet.getDestinationStepCopy());
}
partitionNrRowSetList[partNr] = outputRowSet;
} catch (NullPointerException e) {
throw (e);
}
}
}
//
if (partitionNr < partitionNrRowSetList.length) {
selectedRowSet = partitionNrRowSetList[partitionNr];
} else {
String rowsets = "";
for (RowSet rowSet : partitionNrRowSetList) {
rowsets += "[" + rowSet.toString() + "] ";
}
throw new KettleStepException("Internal error: the referenced partition nr '" + partitionNr + "' is higher than the maximum of '" + (partitionNrRowSetList.length - 1) + ". The available row sets are: {" + rowsets + "}");
}
if (selectedRowSet == null) {
logBasic(BaseMessages.getString(PKG, "BaseStep.TargetRowsetIsNotAvailable", partitionNr));
} else {
// Wait
putRowToRowSet(selectedRowSet, rowMeta, row);
incrementLinesWritten();
if (log.isRowLevel()) {
try {
logRowlevel("Partitioned #" + partitionNr + " to " + selectedRowSet + ", row=" + rowMeta.getString(row));
} catch (KettleValueException e) {
throw new KettleStepException(e);
}
}
}
} else {
// Local partitioning...
// Put the row forward to the next step according to the partition rule.
//
// Count of partitioned row at one step
int partCount = ((BasePartitioner) nextStepPartitioningMeta.getPartitioner()).getNrPartitions();
for (int i = 0; i < nextSteps.length; i++) {
selectedRowSet = outputRowSets.get(partitionNr + i * partCount);
if (selectedRowSet == null) {
logBasic(BaseMessages.getString(PKG, "BaseStep.TargetRowsetIsNotAvailable", partitionNr));
} else {
// Wait
putRowToRowSet(selectedRowSet, rowMeta, row);
incrementLinesWritten();
if (log.isRowLevel()) {
try {
logRowlevel(BaseMessages.getString(PKG, "BaseStep.PartitionedToRow", partitionNr, selectedRowSet, rowMeta.getString(row)));
} catch (KettleValueException e) {
throw new KettleStepException(e);
}
}
}
}
}
}
use of org.pentaho.di.trans.BasePartitioner in project pentaho-kettle by pentaho.
the class BaseStepTest method testBaseStepPutRowLocalSpecialPartitioning.
/**
* This test checks that data from one non-partitioned step copies to 2 partitioned steps right.
*
* @throws KettleException
* @see {@link <a href="http://jira.pentaho.com/browse/PDI-12211">http://jira.pentaho.com/browse/PDI-12211<a>}
*/
@Test
public void testBaseStepPutRowLocalSpecialPartitioning() throws KettleException {
List<StepMeta> stepMetas = new ArrayList<StepMeta>();
stepMetas.add(mockHelper.stepMeta);
stepMetas.add(mockHelper.stepMeta);
StepPartitioningMeta stepPartitioningMeta = spy(new StepPartitioningMeta());
BasePartitioner partitioner = mock(BasePartitioner.class);
when(mockHelper.logChannelInterfaceFactory.create(any(), any(LoggingObjectInterface.class))).thenAnswer(new Answer<LogChannelInterface>() {
@Override
public LogChannelInterface answer(InvocationOnMock invocation) throws Throwable {
((BaseStep) invocation.getArguments()[0]).getLogLevel();
return mockHelper.logChannelInterface;
}
});
when(mockHelper.trans.isRunning()).thenReturn(true);
when(mockHelper.transMeta.findNextSteps(any(StepMeta.class))).thenReturn(stepMetas);
when(mockHelper.stepMeta.getStepPartitioningMeta()).thenReturn(stepPartitioningMeta);
when(stepPartitioningMeta.getPartitioner()).thenReturn(partitioner);
when(partitioner.getNrPartitions()).thenReturn(2);
Object object0 = "name0";
ValueMetaInterface meta0 = new ValueMetaString(object0.toString());
Object object1 = "name1";
ValueMetaInterface meta2 = new ValueMetaString(object1.toString());
RowMetaInterface rowMeta0 = new RowMeta();
rowMeta0.addValueMeta(meta0);
Object[] objects0 = { object0 };
RowMetaInterface rowMeta1 = new RowMeta();
rowMeta1.addValueMeta(meta2);
Object[] objects1 = { object1 };
when(stepPartitioningMeta.getPartition(rowMeta0, objects0)).thenReturn(0);
when(stepPartitioningMeta.getPartition(rowMeta1, objects1)).thenReturn(1);
BlockingRowSet[] rowSet = { new BlockingRowSet(2), new BlockingRowSet(2), new BlockingRowSet(2), new BlockingRowSet(2) };
List<RowSet> outputRowSets = new ArrayList<RowSet>();
outputRowSets.addAll(Arrays.asList(rowSet));
BaseStep baseStep = new BaseStep(mockHelper.stepMeta, mockHelper.stepDataInterface, 0, mockHelper.transMeta, mockHelper.trans);
baseStep.setStopped(false);
baseStep.setRepartitioning(StepPartitioningMeta.PARTITIONING_METHOD_SPECIAL);
baseStep.setOutputRowSets(outputRowSets);
baseStep.putRow(rowMeta0, objects0);
baseStep.putRow(rowMeta1, objects1);
assertEquals(object0, baseStep.getOutputRowSets().get(0).getRow()[0]);
assertEquals(object1, baseStep.getOutputRowSets().get(1).getRow()[0]);
assertEquals(object0, baseStep.getOutputRowSets().get(2).getRow()[0]);
assertEquals(object1, baseStep.getOutputRowSets().get(3).getRow()[0]);
}
Aggregations