Search in sources :

Example 1 with CsvParseException

use of org.pentaho.platform.dataaccess.datasource.wizard.models.CsvParseException in project data-access by pentaho.

the class CsvUtils method getDataProfile.

private DataProfile getDataProfile(CsvFileInfo fileInfo, int rowLimit, String fileLocation, int fileType, String encoding) throws Exception {
    DataProfile result = new DataProfile();
    String line = null;
    int row = 0;
    List<List<String>> headerSample = new ArrayList<List<String>>();
    List<List<String>> dataSample = new ArrayList<List<String>>(rowLimit);
    int maxColumns = 0;
    InputStreamReader reader = null;
    try {
        InputStream inputStream = new FileInputStream(fileLocation);
        UnicodeBOMInputStream bomIs = new UnicodeBOMInputStream(inputStream);
        reader = new InputStreamReader(bomIs, encoding);
        bomIs.skipBOM();
        // read each line of text file
        StringBuilder stringBuilder = new StringBuilder(1000);
        line = TextFileInput.getLine(null, reader, fileType, stringBuilder);
        while (line != null && row < rowLimit) {
            CSVTokenizer csvt = new CSVTokenizer(line, fileInfo.getDelimiter(), fileInfo.getEnclosure());
            List<String> rowData = new ArrayList<String>();
            int count = 0;
            while (csvt.hasMoreTokens()) {
                String token = csvt.nextToken();
                if (token != null) {
                    token = token.trim();
                }
                rowData.add(token);
                count++;
            }
            if (maxColumns < count) {
                maxColumns = count;
            }
            if (row < fileInfo.getHeaderRows()) {
                headerSample.add(rowData);
            } else {
                dataSample.add(rowData);
            }
            line = TextFileInput.getLine(null, reader, fileType, stringBuilder);
            row++;
        }
    } catch (IllegalArgumentException iae) {
        // $NON-NLS-1$
        Logger.error(getClass().getSimpleName(), "There was an issue parsing the CSV file", iae);
        throw new CsvParseException(row + 1, line);
    } catch (Exception e) {
        // $NON-NLS-1$
        Logger.error(getClass().getSimpleName(), "Could not read CSV", e);
        throw e;
    } finally {
        // close the file
        try {
            if (reader != null) {
                reader.close();
            }
        } catch (Exception e) {
            throw e;
        // ignore
        }
    }
    String[][] headerValues = new String[headerSample.size()][maxColumns];
    int rowNo = 0;
    for (List<String> values : headerSample) {
        int colNo = 0;
        for (String value : values) {
            headerValues[rowNo][colNo] = value;
            colNo++;
        }
        rowNo++;
    }
    int[] fieldLengths = new int[maxColumns];
    String[][] dataValues = new String[dataSample.size()][maxColumns];
    DataRow[] data = new DataRow[dataSample.size()];
    rowNo = 0;
    for (List<String> values : dataSample) {
        int colNo = 0;
        for (String value : values) {
            dataValues[rowNo][colNo] = value;
            int currentMaxLength = fieldLengths[colNo];
            if (value.length() > currentMaxLength) {
                fieldLengths[colNo] = value.length();
            }
            colNo++;
        }
        data[rowNo] = new DataRow();
        data[rowNo].setCells(dataValues[rowNo]);
        rowNo++;
    }
    result.setRows(data);
    // $NON-NLS-1$
    DecimalFormat df = new DecimalFormat("000");
    ColumnInfo[] profiles = new ColumnInfo[maxColumns];
    for (int idx = 0; idx < maxColumns; idx++) {
        ColumnInfo profile = new ColumnInfo();
        profiles[idx] = profile;
        String title = CsvFileInfo.DEFAULT_COLUMN_NAME_PREFIX + df.format(idx + 1);
        // $NON-NLS-1$
        String colId = "PC_" + idx;
        if (headerValues.length > 0) {
            if (headerValues[headerValues.length - 1][idx] != null) {
                title = headerValues[headerValues.length - 1][idx];
                colId = title;
                if (!Util.validateId(title)) {
                    colId = Util.toId(colId);
                }
            }
        }
        profile.setTitle(title);
        profile.setId(colId);
        List<String> samples = getColumnData(idx, dataValues);
        assumeColumnDetails(profile, samples);
    }
    result.setColumns(profiles);
    return result;
}
Also used : InputStreamReader(java.io.InputStreamReader) CsvParseException(org.pentaho.platform.dataaccess.datasource.wizard.models.CsvParseException) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) DecimalFormat(java.text.DecimalFormat) ArrayList(java.util.ArrayList) ColumnInfo(org.pentaho.platform.dataaccess.datasource.wizard.models.ColumnInfo) CSVTokenizer(org.pentaho.reporting.libraries.base.util.CSVTokenizer) DataRow(org.pentaho.platform.dataaccess.datasource.wizard.models.DataRow) FileInputStream(java.io.FileInputStream) IOException(java.io.IOException) CsvParseException(org.pentaho.platform.dataaccess.datasource.wizard.models.CsvParseException) FileNotFoundException(java.io.FileNotFoundException) ArrayList(java.util.ArrayList) List(java.util.List)

Aggregations

FileInputStream (java.io.FileInputStream)1 FileNotFoundException (java.io.FileNotFoundException)1 IOException (java.io.IOException)1 InputStream (java.io.InputStream)1 InputStreamReader (java.io.InputStreamReader)1 DecimalFormat (java.text.DecimalFormat)1 ArrayList (java.util.ArrayList)1 List (java.util.List)1 ColumnInfo (org.pentaho.platform.dataaccess.datasource.wizard.models.ColumnInfo)1 CsvParseException (org.pentaho.platform.dataaccess.datasource.wizard.models.CsvParseException)1 DataRow (org.pentaho.platform.dataaccess.datasource.wizard.models.DataRow)1 CSVTokenizer (org.pentaho.reporting.libraries.base.util.CSVTokenizer)1