use of org.pentaho.di.trans.step.StepMetaDataCombi in project pentaho-kettle by pentaho.
the class TransSingleThreadIT method testSingleThreadedTrans.
public void testSingleThreadedTrans() throws Exception {
KettleEnvironment.init();
//
// Create a new transformation...
//
TransMeta transMeta = new TransMeta("src/it/resources/SingleThreadedTest - Stream Lookup.ktr");
transMeta.setTransformationType(TransformationType.SingleThreaded);
long transStart = System.currentTimeMillis();
// Now execute the transformation...
Trans trans = new Trans(transMeta);
trans.setLogLevel(LogLevel.MINIMAL);
trans.prepareExecution(null);
StepInterface si = trans.getStepInterface("OUTPUT", 0);
RowStepCollector rc = new RowStepCollector();
si.addRowListener(rc);
RowProducer rp = trans.addRowProducer("INPUT", 0);
trans.startThreads();
// The single threaded transformation type expects us to run the steps
// ourselves.
//
SingleThreadedTransExecutor executor = new SingleThreadedTransExecutor(trans);
// Initialize all steps
//
executor.init();
int iterations = 1000000;
long totalWait = 0;
List<RowMetaAndData> inputList = createData();
for (int i = 0; i < iterations; i++) {
// add rows
for (RowMetaAndData rm : inputList) {
Object[] copy = rm.getRowMeta().cloneRow(rm.getData());
rp.putRow(rm.getRowMeta(), copy);
}
long start = System.currentTimeMillis();
boolean cont = executor.oneIteration();
if (!cont) {
fail("We don't expect any step or the transformation to be done before the end of all iterations.");
}
long end = System.currentTimeMillis();
long delay = end - start;
totalWait += delay;
if (i > 0 && (i % 100000) == 0) {
long rowsProcessed = trans.findRunThread("bottles").getLinesRead();
double speed = Const.round((rowsProcessed) / ((double) (end - transStart) / 1000), 1);
int totalRows = 0;
for (StepMetaDataCombi combi : trans.getSteps()) {
for (RowSet rowSet : combi.step.getInputRowSets()) {
totalRows += rowSet.size();
}
for (RowSet rowSet : combi.step.getOutputRowSets()) {
totalRows += rowSet.size();
}
}
System.out.println("#" + i + " : Finished processing one iteration in " + delay + "ms, average is: " + Const.round(((double) totalWait / (i + 1)), 1) + ", speed=" + speed + " row/s, total rows buffered: " + totalRows);
}
List<RowMetaAndData> resultRows = rc.getRowsWritten();
// Result has one row less because we filter out one.
// We also join with 3 identical rows in a data grid, giving 9 rows of which 3 are filtered out
//
assertEquals("Error found in iteration " + i, 6, resultRows.size());
rc.clear();
}
rp.finished();
// Dispose all steps.
//
executor.dispose();
long rowsProcessed = trans.findRunThread("bottles").getLinesRead();
long transEnd = System.currentTimeMillis();
long transTime = transEnd - transStart;
System.out.println("Average delay before idle : " + Const.round(((double) totalWait / iterations), 1));
double transTimeSeconds = Const.round(((double) transTime / 1000), 1);
System.out.println("Total transformation runtime for " + iterations + " iterations :" + transTimeSeconds + " seconds");
double transTimePerIteration = Const.round(((double) transTime / iterations), 2);
System.out.println("Runtime per iteration: " + transTimePerIteration + " miliseconds");
double rowsPerSecond = Const.round((rowsProcessed) / ((double) transTime / 1000), 1);
System.out.println("Average speed: " + rowsPerSecond + " rows/second");
}
use of org.pentaho.di.trans.step.StepMetaDataCombi in project pentaho-kettle by pentaho.
the class CsvProcessRowInParallelTest method createAndRunOneStep.
private int createAndRunOneStep(File sharedFile, int stepNr, int totalNumberOfSteps, boolean headersPresent, String delimiter) throws Exception {
StepMetaDataCombi combiStep1 = createBaseCombi(sharedFile, headersPresent, delimiter);
configureData((CsvInputData) combiStep1.data, stepNr, totalNumberOfSteps);
return processRows(combiStep1);
}
use of org.pentaho.di.trans.step.StepMetaDataCombi in project pdi-platform-plugin by pentaho.
the class PdiAction method registerAsStepListener.
/**
* Registers this component as a step listener of a transformation. This allows this component to receive rows of data
* from the transformation when it executes. These rows are made available to other components in the action sequence
* as a result set.
*
* @param stepName
* @param trans
* @return
* @throws KettleStepException
*/
protected void registerAsStepListener(String stepName, Trans trans) throws KettleStepException {
if (trans != null) {
List<StepMetaDataCombi> stepList = trans.getSteps();
// find the specified step
for (StepMetaDataCombi step : stepList) {
if (step.stepname.equals(stepName)) {
if (log.isDebugEnabled()) {
// $NON-NLS-1$
log.debug(Messages.getInstance().getString("Kettle.DEBUG_FOUND_STEP_IMPORTER"));
}
// this is the step we are looking for
if (log.isDebugEnabled()) {
// $NON-NLS-1$
log.debug(Messages.getInstance().getString("Kettle.DEBUG_GETTING_STEP_METADATA"));
}
RowMetaInterface row = trans.getTransMeta().getStepFields(stepName);
// create the metadata that the Pentaho result sets need
String[] fieldNames = row.getFieldNames();
String[][] columns = new String[1][fieldNames.length];
for (int column = 0; column < fieldNames.length; column++) {
columns[0][column] = fieldNames[column];
}
if (log.isDebugEnabled()) {
// $NON-NLS-1$
log.debug(Messages.getInstance().getString("Kettle.DEBUG_CREATING_RESULTSET_METADATA"));
}
MemoryMetaData metaData = new MemoryMetaData(columns, null);
transformationOutputRows = new MemoryResultSet(metaData);
transformationOutputErrorRows = new MemoryResultSet(metaData);
// add ourself as a row listener
step.step.addRowListener(this);
break;
}
}
}
}
use of org.pentaho.di.trans.step.StepMetaDataCombi in project pdi-dataservice-server-plugin by pentaho.
the class CachedServiceTest method testReplayPartialCache.
@Test
public void testReplayPartialCache() throws Exception {
DataServiceExecutor executor = dataServiceExecutor(BASE_QUERY + " LIMIT 20");
CachedService cachedService = CachedService.complete(testData);
RowProducer rowProducer = genTrans.addRowProducer(INJECTOR_STEP, 0);
// Activate cachedServiceLoader
Executor mockExecutor = mock(Executor.class);
final CachedServiceLoader cachedServiceLoader = new CachedServiceLoader(cachedService, mockExecutor);
ListenableFuture<Integer> replay = cachedServiceLoader.replay(executor);
ArgumentCaptor<Runnable> replayRunnable = ArgumentCaptor.forClass(Runnable.class);
verify(mockExecutor).execute(replayRunnable.capture());
stepMetaDataCombi.step = inputStep;
stepMetaDataCombi.meta = inputStepMetaInterface;
stepMetaDataCombi.data = inputStepDataInterface;
List<StepMetaDataCombi> stepMetaDataCombis = new ArrayList<>();
stepMetaDataCombis.add(stepMetaDataCombi);
when(serviceTrans.getSteps()).thenReturn(stepMetaDataCombis);
// Simulate executing data service
executor.executeListeners(DataServiceExecutor.ExecutionPoint.READY);
executor.executeListeners(DataServiceExecutor.ExecutionPoint.START);
// Verify that serviceTrans never started, genTrans is accepting rows
verify(serviceTrans).stopAll();
verify(inputStep).setOutputDone();
verify(inputStep).dispose(inputStepMetaInterface, inputStepDataInterface);
verify(inputStep).markStop();
verify(serviceTrans, never()).startThreads();
verify(genTrans).startThreads();
final AtomicInteger rowsProduced = new AtomicInteger(0);
when(rowProducer.putRowWait(any(RowMetaInterface.class), any(Object[].class), anyInt(), any(TimeUnit.class))).then(new Answer<Boolean>() {
@Override
public Boolean answer(InvocationOnMock invocation) throws Throwable {
rowsProduced.getAndIncrement();
return true;
}
});
when(genTrans.isRunning()).then(new Answer<Boolean>() {
@Override
public Boolean answer(InvocationOnMock invocation) throws Throwable {
return rowsProduced.get() < 20;
}
});
// Run cache loader (would be asynchronous)
replayRunnable.getValue().run();
verify(rowProducer).finished();
assertThat(replay.get(1, TimeUnit.SECONDS), equalTo(20));
assertThat(rowsProduced.get(), equalTo(20));
for (RowMetaAndData metaAndData : Iterables.limit(testData, 20)) {
Object[] data = metaAndData.getData();
verify(rowProducer).putRowWait(eq(metaAndData.getRowMeta()), and(eq(data), AdditionalMatchers.not(same(data))), anyInt(), any(TimeUnit.class));
}
}
use of org.pentaho.di.trans.step.StepMetaDataCombi in project pdi-dataservice-server-plugin by pentaho.
the class CachedServiceLoader method replay.
ListenableFuture<Integer> replay(DataServiceExecutor dataServiceExecutor) throws KettleException {
final Trans serviceTrans = dataServiceExecutor.getServiceTrans(), genTrans = dataServiceExecutor.getGenTrans();
final CountDownLatch startReplay = new CountDownLatch(1);
final RowProducer rowProducer = dataServiceExecutor.addRowProducer();
List<Runnable> startTrans = dataServiceExecutor.getListenerMap().get(DataServiceExecutor.ExecutionPoint.START), postOptimization = dataServiceExecutor.getListenerMap().get(DataServiceExecutor.ExecutionPoint.READY);
Iterables.removeIf(postOptimization, Predicates.instanceOf(DefaultTransWiring.class));
Iterables.removeIf(startTrans, new Predicate<Runnable>() {
@Override
public boolean apply(Runnable runnable) {
return runnable instanceof TransStarter && ((TransStarter) runnable).getTrans().equals(serviceTrans);
}
});
postOptimization.add(new Runnable() {
@Override
public void run() {
serviceTrans.stopAll();
for (StepMetaDataCombi stepMetaDataCombi : serviceTrans.getSteps()) {
stepMetaDataCombi.step.setOutputDone();
stepMetaDataCombi.step.dispose(stepMetaDataCombi.meta, stepMetaDataCombi.data);
stepMetaDataCombi.step.markStop();
}
}
});
startTrans.add(new Runnable() {
@Override
public void run() {
startReplay.countDown();
}
});
ListenableFutureTask<Integer> replay = ListenableFutureTask.create(new Callable<Integer>() {
@Override
public Integer call() throws Exception {
Preconditions.checkState(startReplay.await(30, TimeUnit.SECONDS), "Cache replay did not start");
int rowCount = 0;
for (Iterator<RowMetaAndData> iterator = rowSupplier.get(); iterator.hasNext() && genTrans.isRunning(); ) {
RowMetaAndData metaAndData = iterator.next();
boolean rowAdded = false;
RowMetaInterface rowMeta = metaAndData.getRowMeta();
Object[] rowData = rowMeta.cloneRow(metaAndData.getData());
while (!rowAdded && genTrans.isRunning()) {
rowAdded = rowProducer.putRowWait(rowMeta, rowData, 10, TimeUnit.SECONDS);
}
if (rowAdded) {
rowCount += 1;
}
}
rowProducer.finished();
return rowCount;
}
});
executor.execute(replay);
return replay;
}
Aggregations