Search in sources :

Example 1 with CSVReader

use of org.skife.csv.CSVReader in project tdi-studio-se by Talend.

the class MultiSchemasManager method retrieveCsvArrayByCSVOption.

public CSVArrayAndSeparator retrieveCsvArrayByCSVOption(final String filePath, final String encoding, final String fieldSeparator, final String rowSeparator, final boolean needSkpipEmptyRecord, final boolean splitRecord, int selectColumnIndex) {
    CSVArrayAndSeparator csvArrayBean = new CSVArrayAndSeparator();
    CSVReader multiSchameCsvReader = null;
    try {
        // read max columns
        multiSchameCsvReader = getCSVReader(filePath, encoding, fieldSeparator, rowSeparator, needSkpipEmptyRecord);
        List items = multiSchameCsvReader.parse(new File(filePath));
        int columsNum = -1;
        for (Object item : items) {
            String[] values = (String[]) item;
            if (columsNum < values.length) {
                columsNum = values.length;
            }
        }
        // multiSchameCsvReader.close();
        if (columsNum < 0) {
            return null;
        }
        // read data
        Set<String> uniqueKey = new HashSet<String>();
        //$NON-NLS-1$
        String currentRowKey = "";
        multiSchameCsvReader = getCSVReader(filePath, encoding, fieldSeparator, rowSeparator, needSkpipEmptyRecord);
        items = multiSchameCsvReader.parse(new File(filePath));
        for (Object item : items) {
            String[] values = (String[]) item;
            if (values == null || values.length < 1 || values.length <= selectColumnIndex) {
                continue;
            }
            final String first = values[selectColumnIndex];
            if ("".equals(first.trim())) {
                // must be contain first //$NON-NLS-1$
                continue;
            }
            currentRowKey = first;
            if (uniqueKey.contains(currentRowKey)) {
                // existed.
                continue;
            }
            uniqueKey.add(currentRowKey);
            csvArrayBean.getCsvArray().add(values);
            csvArrayBean.getSeparators().add(fieldSeparator);
        }
    // while (multiSchameCsvReader.readRecord()) {
    // String[] values = multiSchameCsvReader.getValues();
    // if (values == null || values.length < 1 || values.length <= selectColumnIndex) {
    // continue;
    // }
    // final String first = values[selectColumnIndex];
    //                if ("".equals(first.trim())) { // must be contain first //$NON-NLS-1$
    // continue;
    // }
    // currentRowKey = first;
    // if (uniqueKey.contains(currentRowKey)) { // existed.
    // continue;
    // }
    // uniqueKey.add(currentRowKey);
    // csvArrayBean.getCsvArray().add(values);
    // csvArrayBean.getSeparators().add(fieldSeparator);
    // }
    } catch (IOException e) {
        ExceptionHandler.process(e);
        return null;
    } finally {
    // if (multiSchameCsvReader != null) {
    // multiSchameCsvReader.close();
    // }
    }
    // return handlerCSVArray(csvArray, selectColumnIndex, multiSchameCsvReader);
    return csvArrayBean;
}
Also used : CSVReader(org.skife.csv.CSVReader) CSVArrayAndSeparator(org.talend.designer.filemultischemas.data.CSVArrayAndSeparator) List(java.util.List) ArrayList(java.util.ArrayList) IOException(java.io.IOException) File(java.io.File) HashSet(java.util.HashSet)

Example 2 with CSVReader

use of org.skife.csv.CSVReader in project tdi-studio-se by Talend.

the class MultiSchemasManager method getCsvArrayForMs.

private CSVArrayAndSeparator getCsvArrayForMs(String filePath, String separators, String encoding, int selectColumnIndex) throws UnsupportedEncodingException, IOException {
    File file = new File(TalendTextUtils.removeQuotes(filePath));
    separators = TalendTextUtils.removeQuotes(separators);
    CSVArrayAndSeparator csvArrayBean = new CSVArrayAndSeparator();
    Set<String> uniqueKey = new HashSet<String>();
    if (file.exists()) {
        BufferedReader reader = new BufferedReader(new FileReader(file));
        String readLine = "";
        String[] row = null;
        while ((readLine = reader.readLine()) != null) {
            int count = 0;
            boolean added = false;
            int sepIndex = 0;
            while (count < separators.length()) {
                CSVReader csvReader = getCsvReader(new ByteArrayInputStream(readLine.getBytes()), separators.charAt(count), encoding);
                List items = csvReader.parse(readLine);
                if (items.size() > 0) {
                    // csvReader.getValues();
                    row = (String[]) items.get(0);
                }
                if (row.length > 1 && row.length > selectColumnIndex && isInKeyValues(getKeyValues(), row[selectColumnIndex])) {
                    // this kind of record will not added in the if that deal with rows that only have one column
                    if (uniqueKey.contains(row[selectColumnIndex] + String.valueOf(separators.charAt(count)))) {
                        sepIndex = count;
                    }
                    if (!uniqueKey.contains(row[selectColumnIndex] + String.valueOf(separators.charAt(count)))) {
                        uniqueKey.add(row[selectColumnIndex] + String.valueOf(separators.charAt(count)));
                        csvArrayBean.getCsvArray().add(row);
                        csvArrayBean.getSeparators().add(String.valueOf(separators.charAt(count)));
                        // csvReader.close();
                        added = true;
                        break;
                    }
                }
                // csvReader.close();
                count++;
            }
            // separator
            if (!added && selectColumnIndex < row.length && isInKeyValues(getKeyValues(), row[selectColumnIndex]) && !uniqueKey.contains(row[selectColumnIndex] + String.valueOf(separators.charAt(sepIndex)))) {
                uniqueKey.add(row[selectColumnIndex] + String.valueOf(separators.charAt(sepIndex)));
                csvArrayBean.getCsvArray().add(row);
                csvArrayBean.getSeparators().add(String.valueOf(separators.charAt(sepIndex)));
            }
        }
    }
    return csvArrayBean;
}
Also used : CSVReader(org.skife.csv.CSVReader) CSVArrayAndSeparator(org.talend.designer.filemultischemas.data.CSVArrayAndSeparator) ByteArrayInputStream(java.io.ByteArrayInputStream) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader) List(java.util.List) ArrayList(java.util.ArrayList) File(java.io.File) HashSet(java.util.HashSet)

Example 3 with CSVReader

use of org.skife.csv.CSVReader in project tdi-studio-se by Talend.

the class MultiSchemasManager method getCsvArrayForMultiSchemaDelimited.

public CsvArray getCsvArrayForMultiSchemaDelimited(String filePath, String separators, String encoding, String keyValues, int keyIndex) throws UnsupportedEncodingException, IOException {
    File file = new File(TalendTextUtils.removeQuotes(filePath));
    separators = TalendTextUtils.removeQuotes(separators);
    CsvArray csvArray = new CsvArray();
    if (file.exists()) {
        BufferedReader reader = new BufferedReader(new FileReader(file));
        String readLine = "";
        String[] row = null;
        int i = 0;
        while ((readLine = reader.readLine()) != null && i < maximumRowsToPreview) {
            boolean added = false;
            int count = 0;
            while (count < separators.length()) {
                CSVReader csvReader = getCsvReader(new ByteArrayInputStream(readLine.getBytes()), separators.charAt(count), encoding);
                List items = csvReader.parse(readLine);
                // csvReader.readRecord();
                if (items.size() > 0) {
                    // csvReader.getValues();
                    row = (String[]) items.get(0);
                }
                if (row.length > 1 && row.length > keyIndex) {
                    if (isInKeyValues(keyValues, row[keyIndex])) {
                        added = true;
                        csvArray.add(row);
                        // csvReader.close();
                        i++;
                        break;
                    }
                }
                // csvReader.close();
                count++;
            }
            if (!added && row != null && row.length > keyIndex) {
                if (isInKeyValues(keyValues, row[keyIndex])) {
                    csvArray.add(row);
                    i++;
                }
            }
        }
    }
    return csvArray;
}
Also used : CsvArray(org.talend.core.utils.CsvArray) CSVReader(org.skife.csv.CSVReader) ByteArrayInputStream(java.io.ByteArrayInputStream) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader) List(java.util.List) ArrayList(java.util.ArrayList) File(java.io.File)

Example 4 with CSVReader

use of org.skife.csv.CSVReader in project tdi-studio-se by Talend.

the class MultiSchemasManager method getCsvReader.

private CSVReader getCsvReader(ByteArrayInputStream inputStream, char separator, String encoding) throws FileNotFoundException, UnsupportedEncodingException {
    encoding = TalendTextUtils.removeQuotes(encoding);
    // CSVReader csvReader = new CSVReader(new BufferedReader(new InputStreamReader(inputStream, encoding == null ?
    // ENCODING
    // : encoding)), separator);
    // csvReader.setRecordDelimiter('\n');
    // csvReader.setSkipEmptyRecords(true);
    // csvReader.setTextQualifier('"');
    //
    // csvReader.setEscapeMode(com.csvreader.CsvReader.ESCAPE_MODE_DOUBLED);
    CSVReader csvReader = new SimpleReader();
    csvReader.setSeperator(separator);
    return csvReader;
}
Also used : CSVReader(org.skife.csv.CSVReader) SimpleReader(org.skife.csv.SimpleReader)

Example 5 with CSVReader

use of org.skife.csv.CSVReader in project tdi-studio-se by Talend.

the class MultiSchemasManager method getCSVReader.

private CSVReader getCSVReader(final String filePath, final String encoding, final String fieldSeparator, final String rowSeparator, final boolean needSkpipEmptyRecord) throws IOException {
    CSVReader csvReadertFileInputDelimited = new SimpleReader();
    csvReadertFileInputDelimited.setSeperator(fieldSeparator.charAt(0));
    return csvReadertFileInputDelimited;
}
Also used : CSVReader(org.skife.csv.CSVReader) SimpleReader(org.skife.csv.SimpleReader)

Aggregations

CSVReader (org.skife.csv.CSVReader)5 File (java.io.File)3 ArrayList (java.util.ArrayList)3 List (java.util.List)3 BufferedReader (java.io.BufferedReader)2 ByteArrayInputStream (java.io.ByteArrayInputStream)2 FileReader (java.io.FileReader)2 HashSet (java.util.HashSet)2 SimpleReader (org.skife.csv.SimpleReader)2 CSVArrayAndSeparator (org.talend.designer.filemultischemas.data.CSVArrayAndSeparator)2 IOException (java.io.IOException)1 CsvArray (org.talend.core.utils.CsvArray)1