Search in sources :

Example 51 with StepMetaDataCombi

use of org.pentaho.di.trans.step.StepMetaDataCombi in project pentaho-kettle by pentaho.

the class TransSingleThreadIT method testSingleThreadedTrans.

public void testSingleThreadedTrans() throws Exception {
    KettleEnvironment.init();
    // 
    // Create a new transformation...
    // 
    TransMeta transMeta = new TransMeta("src/it/resources/SingleThreadedTest - Stream Lookup.ktr");
    transMeta.setTransformationType(TransformationType.SingleThreaded);
    long transStart = System.currentTimeMillis();
    // Now execute the transformation...
    Trans trans = new Trans(transMeta);
    trans.setLogLevel(LogLevel.MINIMAL);
    trans.prepareExecution(null);
    StepInterface si = trans.getStepInterface("OUTPUT", 0);
    RowStepCollector rc = new RowStepCollector();
    si.addRowListener(rc);
    RowProducer rp = trans.addRowProducer("INPUT", 0);
    trans.startThreads();
    // The single threaded transformation type expects us to run the steps
    // ourselves.
    // 
    SingleThreadedTransExecutor executor = new SingleThreadedTransExecutor(trans);
    // Initialize all steps
    // 
    executor.init();
    int iterations = 1000000;
    long totalWait = 0;
    List<RowMetaAndData> inputList = createData();
    for (int i = 0; i < iterations; i++) {
        // add rows
        for (RowMetaAndData rm : inputList) {
            Object[] copy = rm.getRowMeta().cloneRow(rm.getData());
            rp.putRow(rm.getRowMeta(), copy);
        }
        long start = System.currentTimeMillis();
        boolean cont = executor.oneIteration();
        if (!cont) {
            fail("We don't expect any step or the transformation to be done before the end of all iterations.");
        }
        long end = System.currentTimeMillis();
        long delay = end - start;
        totalWait += delay;
        if (i > 0 && (i % 100000) == 0) {
            long rowsProcessed = trans.findRunThread("bottles").getLinesRead();
            double speed = Const.round((rowsProcessed) / ((double) (end - transStart) / 1000), 1);
            int totalRows = 0;
            for (StepMetaDataCombi combi : trans.getSteps()) {
                for (RowSet rowSet : combi.step.getInputRowSets()) {
                    totalRows += rowSet.size();
                }
                for (RowSet rowSet : combi.step.getOutputRowSets()) {
                    totalRows += rowSet.size();
                }
            }
            System.out.println("#" + i + " : Finished processing one iteration in " + delay + "ms, average is: " + Const.round(((double) totalWait / (i + 1)), 1) + ", speed=" + speed + " row/s, total rows buffered: " + totalRows);
        }
        List<RowMetaAndData> resultRows = rc.getRowsWritten();
        // Result has one row less because we filter out one.
        // We also join with 3 identical rows in a data grid, giving 9 rows of which 3 are filtered out
        // 
        assertEquals("Error found in iteration " + i, 6, resultRows.size());
        rc.clear();
    }
    rp.finished();
    // Dispose all steps.
    // 
    executor.dispose();
    long rowsProcessed = trans.findRunThread("bottles").getLinesRead();
    long transEnd = System.currentTimeMillis();
    long transTime = transEnd - transStart;
    System.out.println("Average delay before idle : " + Const.round(((double) totalWait / iterations), 1));
    double transTimeSeconds = Const.round(((double) transTime / 1000), 1);
    System.out.println("Total transformation runtime for " + iterations + " iterations :" + transTimeSeconds + " seconds");
    double transTimePerIteration = Const.round(((double) transTime / iterations), 2);
    System.out.println("Runtime per iteration: " + transTimePerIteration + " miliseconds");
    double rowsPerSecond = Const.round((rowsProcessed) / ((double) transTime / 1000), 1);
    System.out.println("Average speed: " + rowsPerSecond + " rows/second");
}
Also used : RowSet(org.pentaho.di.core.RowSet) StepInterface(org.pentaho.di.trans.step.StepInterface) RowMetaAndData(org.pentaho.di.core.RowMetaAndData) StepMetaDataCombi(org.pentaho.di.trans.step.StepMetaDataCombi)

Example 52 with StepMetaDataCombi

use of org.pentaho.di.trans.step.StepMetaDataCombi in project pentaho-kettle by pentaho.

the class CsvProcessRowInParallelTest method createAndRunOneStep.

private int createAndRunOneStep(File sharedFile, int stepNr, int totalNumberOfSteps, boolean headersPresent, String delimiter) throws Exception {
    StepMetaDataCombi combiStep1 = createBaseCombi(sharedFile, headersPresent, delimiter);
    configureData((CsvInputData) combiStep1.data, stepNr, totalNumberOfSteps);
    return processRows(combiStep1);
}
Also used : StepMetaDataCombi(org.pentaho.di.trans.step.StepMetaDataCombi)

Example 53 with StepMetaDataCombi

use of org.pentaho.di.trans.step.StepMetaDataCombi in project pdi-platform-plugin by pentaho.

the class PdiAction method registerAsStepListener.

/**
 * Registers this component as a step listener of a transformation. This allows this component to receive rows of data
 * from the transformation when it executes. These rows are made available to other components in the action sequence
 * as a result set.
 *
 * @param stepName
 * @param trans
 * @return
 * @throws KettleStepException
 */
protected void registerAsStepListener(String stepName, Trans trans) throws KettleStepException {
    if (trans != null) {
        List<StepMetaDataCombi> stepList = trans.getSteps();
        // find the specified step
        for (StepMetaDataCombi step : stepList) {
            if (step.stepname.equals(stepName)) {
                if (log.isDebugEnabled()) {
                    // $NON-NLS-1$
                    log.debug(Messages.getInstance().getString("Kettle.DEBUG_FOUND_STEP_IMPORTER"));
                }
                // this is the step we are looking for
                if (log.isDebugEnabled()) {
                    // $NON-NLS-1$
                    log.debug(Messages.getInstance().getString("Kettle.DEBUG_GETTING_STEP_METADATA"));
                }
                RowMetaInterface row = trans.getTransMeta().getStepFields(stepName);
                // create the metadata that the Pentaho result sets need
                String[] fieldNames = row.getFieldNames();
                String[][] columns = new String[1][fieldNames.length];
                for (int column = 0; column < fieldNames.length; column++) {
                    columns[0][column] = fieldNames[column];
                }
                if (log.isDebugEnabled()) {
                    // $NON-NLS-1$
                    log.debug(Messages.getInstance().getString("Kettle.DEBUG_CREATING_RESULTSET_METADATA"));
                }
                MemoryMetaData metaData = new MemoryMetaData(columns, null);
                transformationOutputRows = new MemoryResultSet(metaData);
                transformationOutputErrorRows = new MemoryResultSet(metaData);
                // add ourself as a row listener
                step.step.addRowListener(this);
                break;
            }
        }
    }
}
Also used : StepMetaDataCombi(org.pentaho.di.trans.step.StepMetaDataCombi) RowMetaInterface(org.pentaho.di.core.row.RowMetaInterface) MemoryMetaData(org.pentaho.commons.connection.memory.MemoryMetaData) MemoryResultSet(org.pentaho.commons.connection.memory.MemoryResultSet)

Example 54 with StepMetaDataCombi

use of org.pentaho.di.trans.step.StepMetaDataCombi in project pdi-dataservice-server-plugin by pentaho.

the class CachedServiceTest method testReplayPartialCache.

@Test
public void testReplayPartialCache() throws Exception {
    DataServiceExecutor executor = dataServiceExecutor(BASE_QUERY + " LIMIT 20");
    CachedService cachedService = CachedService.complete(testData);
    RowProducer rowProducer = genTrans.addRowProducer(INJECTOR_STEP, 0);
    // Activate cachedServiceLoader
    Executor mockExecutor = mock(Executor.class);
    final CachedServiceLoader cachedServiceLoader = new CachedServiceLoader(cachedService, mockExecutor);
    ListenableFuture<Integer> replay = cachedServiceLoader.replay(executor);
    ArgumentCaptor<Runnable> replayRunnable = ArgumentCaptor.forClass(Runnable.class);
    verify(mockExecutor).execute(replayRunnable.capture());
    stepMetaDataCombi.step = inputStep;
    stepMetaDataCombi.meta = inputStepMetaInterface;
    stepMetaDataCombi.data = inputStepDataInterface;
    List<StepMetaDataCombi> stepMetaDataCombis = new ArrayList<>();
    stepMetaDataCombis.add(stepMetaDataCombi);
    when(serviceTrans.getSteps()).thenReturn(stepMetaDataCombis);
    // Simulate executing data service
    executor.executeListeners(DataServiceExecutor.ExecutionPoint.READY);
    executor.executeListeners(DataServiceExecutor.ExecutionPoint.START);
    // Verify that serviceTrans never started, genTrans is accepting rows
    verify(serviceTrans).stopAll();
    verify(inputStep).setOutputDone();
    verify(inputStep).dispose(inputStepMetaInterface, inputStepDataInterface);
    verify(inputStep).markStop();
    verify(serviceTrans, never()).startThreads();
    verify(genTrans).startThreads();
    final AtomicInteger rowsProduced = new AtomicInteger(0);
    when(rowProducer.putRowWait(any(RowMetaInterface.class), any(Object[].class), anyInt(), any(TimeUnit.class))).then(new Answer<Boolean>() {

        @Override
        public Boolean answer(InvocationOnMock invocation) throws Throwable {
            rowsProduced.getAndIncrement();
            return true;
        }
    });
    when(genTrans.isRunning()).then(new Answer<Boolean>() {

        @Override
        public Boolean answer(InvocationOnMock invocation) throws Throwable {
            return rowsProduced.get() < 20;
        }
    });
    // Run cache loader (would be asynchronous)
    replayRunnable.getValue().run();
    verify(rowProducer).finished();
    assertThat(replay.get(1, TimeUnit.SECONDS), equalTo(20));
    assertThat(rowsProduced.get(), equalTo(20));
    for (RowMetaAndData metaAndData : Iterables.limit(testData, 20)) {
        Object[] data = metaAndData.getData();
        verify(rowProducer).putRowWait(eq(metaAndData.getRowMeta()), and(eq(data), AdditionalMatchers.not(same(data))), anyInt(), any(TimeUnit.class));
    }
}
Also used : RowProducer(org.pentaho.di.trans.RowProducer) DataServiceExecutor(org.pentaho.di.trans.dataservice.DataServiceExecutor) ArrayList(java.util.ArrayList) RowMetaInterface(org.pentaho.di.core.row.RowMetaInterface) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ValueMetaInteger(org.pentaho.di.core.row.value.ValueMetaInteger) DataServiceExecutor(org.pentaho.di.trans.dataservice.DataServiceExecutor) Executor(java.util.concurrent.Executor) RowMetaAndData(org.pentaho.di.core.RowMetaAndData) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) InvocationOnMock(org.mockito.invocation.InvocationOnMock) StepMetaDataCombi(org.pentaho.di.trans.step.StepMetaDataCombi) TimeUnit(java.util.concurrent.TimeUnit) Test(org.junit.Test)

Example 55 with StepMetaDataCombi

use of org.pentaho.di.trans.step.StepMetaDataCombi in project pdi-dataservice-server-plugin by pentaho.

the class CachedServiceLoader method replay.

ListenableFuture<Integer> replay(DataServiceExecutor dataServiceExecutor) throws KettleException {
    final Trans serviceTrans = dataServiceExecutor.getServiceTrans(), genTrans = dataServiceExecutor.getGenTrans();
    final CountDownLatch startReplay = new CountDownLatch(1);
    final RowProducer rowProducer = dataServiceExecutor.addRowProducer();
    List<Runnable> startTrans = dataServiceExecutor.getListenerMap().get(DataServiceExecutor.ExecutionPoint.START), postOptimization = dataServiceExecutor.getListenerMap().get(DataServiceExecutor.ExecutionPoint.READY);
    Iterables.removeIf(postOptimization, Predicates.instanceOf(DefaultTransWiring.class));
    Iterables.removeIf(startTrans, new Predicate<Runnable>() {

        @Override
        public boolean apply(Runnable runnable) {
            return runnable instanceof TransStarter && ((TransStarter) runnable).getTrans().equals(serviceTrans);
        }
    });
    postOptimization.add(new Runnable() {

        @Override
        public void run() {
            serviceTrans.stopAll();
            for (StepMetaDataCombi stepMetaDataCombi : serviceTrans.getSteps()) {
                stepMetaDataCombi.step.setOutputDone();
                stepMetaDataCombi.step.dispose(stepMetaDataCombi.meta, stepMetaDataCombi.data);
                stepMetaDataCombi.step.markStop();
            }
        }
    });
    startTrans.add(new Runnable() {

        @Override
        public void run() {
            startReplay.countDown();
        }
    });
    ListenableFutureTask<Integer> replay = ListenableFutureTask.create(new Callable<Integer>() {

        @Override
        public Integer call() throws Exception {
            Preconditions.checkState(startReplay.await(30, TimeUnit.SECONDS), "Cache replay did not start");
            int rowCount = 0;
            for (Iterator<RowMetaAndData> iterator = rowSupplier.get(); iterator.hasNext() && genTrans.isRunning(); ) {
                RowMetaAndData metaAndData = iterator.next();
                boolean rowAdded = false;
                RowMetaInterface rowMeta = metaAndData.getRowMeta();
                Object[] rowData = rowMeta.cloneRow(metaAndData.getData());
                while (!rowAdded && genTrans.isRunning()) {
                    rowAdded = rowProducer.putRowWait(rowMeta, rowData, 10, TimeUnit.SECONDS);
                }
                if (rowAdded) {
                    rowCount += 1;
                }
            }
            rowProducer.finished();
            return rowCount;
        }
    });
    executor.execute(replay);
    return replay;
}
Also used : RowProducer(org.pentaho.di.trans.RowProducer) RowMetaInterface(org.pentaho.di.core.row.RowMetaInterface) CountDownLatch(java.util.concurrent.CountDownLatch) KettleException(org.pentaho.di.core.exception.KettleException) TransStarter(org.pentaho.di.trans.dataservice.execution.TransStarter) RowMetaAndData(org.pentaho.di.core.RowMetaAndData) StepMetaDataCombi(org.pentaho.di.trans.step.StepMetaDataCombi) Iterator(java.util.Iterator) Trans(org.pentaho.di.trans.Trans) DefaultTransWiring(org.pentaho.di.trans.dataservice.execution.DefaultTransWiring)

Aggregations

StepMetaDataCombi (org.pentaho.di.trans.step.StepMetaDataCombi)55 StepInterface (org.pentaho.di.trans.step.StepInterface)21 KettleExtensionPoint (org.pentaho.di.core.extension.KettleExtensionPoint)18 Test (org.junit.Test)13 KettleException (org.pentaho.di.core.exception.KettleException)10 ArrayList (java.util.ArrayList)8 StepMeta (org.pentaho.di.trans.step.StepMeta)8 Trans (org.pentaho.di.trans.Trans)7 RowMetaAndData (org.pentaho.di.core.RowMetaAndData)6 KettleValueException (org.pentaho.di.core.exception.KettleValueException)6 RowMetaInterface (org.pentaho.di.core.row.RowMetaInterface)6 StepMetaInterface (org.pentaho.di.trans.step.StepMetaInterface)6 RowSet (org.pentaho.di.core.RowSet)5 UnknownParamException (org.pentaho.di.core.parameters.UnknownParamException)5 TransMeta (org.pentaho.di.trans.TransMeta)5 UnsupportedEncodingException (java.io.UnsupportedEncodingException)4 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)4 Before (org.junit.Before)4 KettleDatabaseException (org.pentaho.di.core.exception.KettleDatabaseException)4 KettleFileException (org.pentaho.di.core.exception.KettleFileException)4