Search in sources :

Example 1 with DataRow

use of org.pentaho.platform.dataaccess.datasource.wizard.models.DataRow in project data-access by pentaho.

the class CsvUtilsTest method generateFields_OneHeaderLine_OneDataLine.

@Test
public void generateFields_OneHeaderLine_OneDataLine() throws Exception {
    prepareFile(new String[] { "col1", "col2" }, new String[] { "1", "2" });
    ModelInfo info = generateFields(2, null, 1);
    ColumnInfo[] columns = info.getColumns();
    assertEquals(2, columns.length);
    assertEquals("col1", columns[0].getId());
    assertEquals("col2", columns[1].getId());
    DataRow[] data = info.getData();
    assertEquals(1, data.length);
    assertEquals(2, data[0].getCells().length);
    assertEquals("1", data[0].getCells()[0]);
    assertEquals("2", data[0].getCells()[1]);
}
Also used : ModelInfo(org.pentaho.platform.dataaccess.datasource.wizard.models.ModelInfo) ColumnInfo(org.pentaho.platform.dataaccess.datasource.wizard.models.ColumnInfo) DataRow(org.pentaho.platform.dataaccess.datasource.wizard.models.DataRow) Test(org.junit.Test)

Example 2 with DataRow

use of org.pentaho.platform.dataaccess.datasource.wizard.models.DataRow in project data-access by pentaho.

the class CsvTransformGeneratorIT method testGoodTransform.

public void testGoodTransform() throws Exception {
    IPentahoSession session = new StandaloneSession("test");
    KettleSystemListener.environmentInit(session);
    String KETTLE_EMPTY_STRING_DIFFERS_FROM_NULL = System.getProperty("KETTLE_EMPTY_STRING_DIFFERS_FROM_NULL", "N");
    ModelInfo info = createModel();
    CsvTransformGenerator gen = new CsvTransformGenerator(info, getDatabaseMeta());
    gen.preview(session);
    DataRow[] rows = info.getData();
    assertNotNull(rows);
    assertEquals(235, rows.length);
    Date testDate = new Date();
    testDate.setDate(1);
    testDate.setHours(0);
    testDate.setMinutes(0);
    testDate.setMonth(0);
    testDate.setSeconds(0);
    testDate.setYear(110);
    // test the first row
    // test the data types
    DataRow row = rows[0];
    assertNotNull(row);
    Object[] cells = row.getCells();
    assertNotNull(cells);
    // assertEquals( 8, cells.length );
    assertTrue(cells[0] instanceof Long);
    assertTrue(cells[1] instanceof Double);
    assertTrue(cells[2] instanceof Long);
    assertTrue(cells[3] instanceof Date);
    assertTrue(cells[4] instanceof String);
    assertTrue(cells[5] instanceof Long);
    assertTrue(cells[6] instanceof Double);
    assertTrue(cells[7] instanceof Boolean);
    // test the values
    assertEquals((long) 3, cells[0]);
    assertEquals(25677.96525, cells[1]);
    assertEquals((long) 1231, cells[2]);
    assertEquals(testDate.getYear(), ((Date) cells[3]).getYear());
    assertEquals(testDate.getMonth(), ((Date) cells[3]).getMonth());
    assertEquals(testDate.getDate(), ((Date) cells[3]).getDate());
    assertEquals(testDate.getHours(), ((Date) cells[3]).getHours());
    // assertEquals( testDate.getMinutes(), ((Date)cells[3]).getMinutes() ); this fails, a bug in the PDI date
    // parsing?
    assertEquals(testDate.getSeconds(), ((Date) cells[3]).getSeconds());
    // assertEquals( testDate, cells[3] );
    assertEquals("Afghanistan", cells[4]);
    assertEquals((long) 11, cells[5]);
    assertEquals(111.9090909, cells[6]);
    assertEquals(false, cells[7]);
    // test the second row
    testDate.setDate(2);
    // test the data types
    row = rows[1];
    assertNotNull(row);
    cells = row.getCells();
    assertNotNull(cells);
    assertTrue(cells[0] instanceof Long);
    assertTrue(cells[1] instanceof Double);
    assertTrue(cells[2] instanceof Long);
    assertTrue(cells[3] instanceof Date);
    if ("Y".equals(KETTLE_EMPTY_STRING_DIFFERS_FROM_NULL)) {
        assertTrue("".equals(cells[4]));
    } else {
        assertTrue(cells[4] == null);
    }
    assertTrue(cells[5] instanceof Long);
    assertTrue(cells[6] instanceof Double);
    assertTrue(cells[7] instanceof Boolean);
    // test the values
    assertEquals((long) 4, cells[0]);
    assertEquals(24261.81026, cells[1]);
    assertEquals((long) 1663, cells[2]);
    assertEquals(testDate.getYear(), ((Date) cells[3]).getYear());
    assertEquals(testDate.getMonth(), ((Date) cells[3]).getMonth());
    assertEquals(testDate.getDate(), ((Date) cells[3]).getDate());
    assertEquals(testDate.getHours(), ((Date) cells[3]).getHours());
    // assertEquals( testDate.getMinutes(), ((Date)cells[3]).getMinutes() ); this fails, a bug in the PDI date
    // parsing?
    assertEquals(testDate.getSeconds(), ((Date) cells[3]).getSeconds());
    // assertEquals( testDate, cells[3] );
    if ("Y".equals(KETTLE_EMPTY_STRING_DIFFERS_FROM_NULL)) {
        assertEquals("", cells[4]);
        assertEquals(cells[4], "");
    } else {
        // IfNull value does not seem to work
        assertEquals(null, cells[4]);
    }
    assertEquals((long) 7, cells[5]);
    assertEquals(237.5714286, cells[6]);
    assertEquals(true, cells[7]);
}
Also used : ModelInfo(org.pentaho.platform.dataaccess.datasource.wizard.models.ModelInfo) StandaloneSession(org.pentaho.platform.engine.core.system.StandaloneSession) IPentahoSession(org.pentaho.platform.api.engine.IPentahoSession) DataRow(org.pentaho.platform.dataaccess.datasource.wizard.models.DataRow) Date(java.util.Date)

Example 3 with DataRow

use of org.pentaho.platform.dataaccess.datasource.wizard.models.DataRow in project data-access by pentaho.

the class CsvUtils method getDataProfile.

private DataProfile getDataProfile(CsvFileInfo fileInfo, int rowLimit, String fileLocation, int fileType, String encoding) throws Exception {
    DataProfile result = new DataProfile();
    String line = null;
    int row = 0;
    List<List<String>> headerSample = new ArrayList<List<String>>();
    List<List<String>> dataSample = new ArrayList<List<String>>(rowLimit);
    int maxColumns = 0;
    InputStreamReader reader = null;
    try {
        InputStream inputStream = new FileInputStream(fileLocation);
        UnicodeBOMInputStream bomIs = new UnicodeBOMInputStream(inputStream);
        reader = new InputStreamReader(bomIs, encoding);
        bomIs.skipBOM();
        // read each line of text file
        StringBuilder stringBuilder = new StringBuilder(1000);
        line = TextFileInput.getLine(null, reader, fileType, stringBuilder);
        while (line != null && row < rowLimit) {
            CSVTokenizer csvt = new CSVTokenizer(line, fileInfo.getDelimiter(), fileInfo.getEnclosure());
            List<String> rowData = new ArrayList<String>();
            int count = 0;
            while (csvt.hasMoreTokens()) {
                String token = csvt.nextToken();
                if (token != null) {
                    token = token.trim();
                }
                rowData.add(token);
                count++;
            }
            if (maxColumns < count) {
                maxColumns = count;
            }
            if (row < fileInfo.getHeaderRows()) {
                headerSample.add(rowData);
            } else {
                dataSample.add(rowData);
            }
            line = TextFileInput.getLine(null, reader, fileType, stringBuilder);
            row++;
        }
    } catch (IllegalArgumentException iae) {
        // $NON-NLS-1$
        Logger.error(getClass().getSimpleName(), "There was an issue parsing the CSV file", iae);
        throw new CsvParseException(row + 1, line);
    } catch (Exception e) {
        // $NON-NLS-1$
        Logger.error(getClass().getSimpleName(), "Could not read CSV", e);
        throw e;
    } finally {
        // close the file
        try {
            if (reader != null) {
                reader.close();
            }
        } catch (Exception e) {
            throw e;
        // ignore
        }
    }
    String[][] headerValues = new String[headerSample.size()][maxColumns];
    int rowNo = 0;
    for (List<String> values : headerSample) {
        int colNo = 0;
        for (String value : values) {
            headerValues[rowNo][colNo] = value;
            colNo++;
        }
        rowNo++;
    }
    int[] fieldLengths = new int[maxColumns];
    String[][] dataValues = new String[dataSample.size()][maxColumns];
    DataRow[] data = new DataRow[dataSample.size()];
    rowNo = 0;
    for (List<String> values : dataSample) {
        int colNo = 0;
        for (String value : values) {
            dataValues[rowNo][colNo] = value;
            int currentMaxLength = fieldLengths[colNo];
            if (value.length() > currentMaxLength) {
                fieldLengths[colNo] = value.length();
            }
            colNo++;
        }
        data[rowNo] = new DataRow();
        data[rowNo].setCells(dataValues[rowNo]);
        rowNo++;
    }
    result.setRows(data);
    // $NON-NLS-1$
    DecimalFormat df = new DecimalFormat("000");
    ColumnInfo[] profiles = new ColumnInfo[maxColumns];
    for (int idx = 0; idx < maxColumns; idx++) {
        ColumnInfo profile = new ColumnInfo();
        profiles[idx] = profile;
        String title = CsvFileInfo.DEFAULT_COLUMN_NAME_PREFIX + df.format(idx + 1);
        // $NON-NLS-1$
        String colId = "PC_" + idx;
        if (headerValues.length > 0) {
            if (headerValues[headerValues.length - 1][idx] != null) {
                title = headerValues[headerValues.length - 1][idx];
                colId = title;
                if (!Util.validateId(title)) {
                    colId = Util.toId(colId);
                }
            }
        }
        profile.setTitle(title);
        profile.setId(colId);
        List<String> samples = getColumnData(idx, dataValues);
        assumeColumnDetails(profile, samples);
    }
    result.setColumns(profiles);
    return result;
}
Also used : InputStreamReader(java.io.InputStreamReader) CsvParseException(org.pentaho.platform.dataaccess.datasource.wizard.models.CsvParseException) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) DecimalFormat(java.text.DecimalFormat) ArrayList(java.util.ArrayList) ColumnInfo(org.pentaho.platform.dataaccess.datasource.wizard.models.ColumnInfo) CSVTokenizer(org.pentaho.reporting.libraries.base.util.CSVTokenizer) DataRow(org.pentaho.platform.dataaccess.datasource.wizard.models.DataRow) FileInputStream(java.io.FileInputStream) IOException(java.io.IOException) CsvParseException(org.pentaho.platform.dataaccess.datasource.wizard.models.CsvParseException) FileNotFoundException(java.io.FileNotFoundException) ArrayList(java.util.ArrayList) List(java.util.List)

Example 4 with DataRow

use of org.pentaho.platform.dataaccess.datasource.wizard.models.DataRow in project data-access by pentaho.

the class PdiRowListener method getDataRows.

private DataRow[] getDataRows(List<Object[]> list) {
    DataRow[] rows = new DataRow[list.size()];
    int idx = 0;
    for (Object[] cells : list) {
        rows[idx] = new DataRow();
        rows[idx].setCells(cells);
        idx++;
    }
    return rows;
}
Also used : DataRow(org.pentaho.platform.dataaccess.datasource.wizard.models.DataRow)

Aggregations

DataRow (org.pentaho.platform.dataaccess.datasource.wizard.models.DataRow)4 ColumnInfo (org.pentaho.platform.dataaccess.datasource.wizard.models.ColumnInfo)2 ModelInfo (org.pentaho.platform.dataaccess.datasource.wizard.models.ModelInfo)2 FileInputStream (java.io.FileInputStream)1 FileNotFoundException (java.io.FileNotFoundException)1 IOException (java.io.IOException)1 InputStream (java.io.InputStream)1 InputStreamReader (java.io.InputStreamReader)1 DecimalFormat (java.text.DecimalFormat)1 ArrayList (java.util.ArrayList)1 Date (java.util.Date)1 List (java.util.List)1 Test (org.junit.Test)1 IPentahoSession (org.pentaho.platform.api.engine.IPentahoSession)1 CsvParseException (org.pentaho.platform.dataaccess.datasource.wizard.models.CsvParseException)1 StandaloneSession (org.pentaho.platform.engine.core.system.StandaloneSession)1 CSVTokenizer (org.pentaho.reporting.libraries.base.util.CSVTokenizer)1