use of org.skife.csv.CSVReader in project tdi-studio-se by Talend.
the class MultiSchemasManager method retrieveCsvArrayByCSVOption.
public CSVArrayAndSeparator retrieveCsvArrayByCSVOption(final String filePath, final String encoding, final String fieldSeparator, final String rowSeparator, final boolean needSkpipEmptyRecord, final boolean splitRecord, int selectColumnIndex) {
CSVArrayAndSeparator csvArrayBean = new CSVArrayAndSeparator();
CSVReader multiSchameCsvReader = null;
try {
// read max columns
multiSchameCsvReader = getCSVReader(filePath, encoding, fieldSeparator, rowSeparator, needSkpipEmptyRecord);
List items = multiSchameCsvReader.parse(new File(filePath));
int columsNum = -1;
for (Object item : items) {
String[] values = (String[]) item;
if (columsNum < values.length) {
columsNum = values.length;
}
}
// multiSchameCsvReader.close();
if (columsNum < 0) {
return null;
}
// read data
Set<String> uniqueKey = new HashSet<String>();
//$NON-NLS-1$
String currentRowKey = "";
multiSchameCsvReader = getCSVReader(filePath, encoding, fieldSeparator, rowSeparator, needSkpipEmptyRecord);
items = multiSchameCsvReader.parse(new File(filePath));
for (Object item : items) {
String[] values = (String[]) item;
if (values == null || values.length < 1 || values.length <= selectColumnIndex) {
continue;
}
final String first = values[selectColumnIndex];
if ("".equals(first.trim())) {
// must be contain first //$NON-NLS-1$
continue;
}
currentRowKey = first;
if (uniqueKey.contains(currentRowKey)) {
// existed.
continue;
}
uniqueKey.add(currentRowKey);
csvArrayBean.getCsvArray().add(values);
csvArrayBean.getSeparators().add(fieldSeparator);
}
// while (multiSchameCsvReader.readRecord()) {
// String[] values = multiSchameCsvReader.getValues();
// if (values == null || values.length < 1 || values.length <= selectColumnIndex) {
// continue;
// }
// final String first = values[selectColumnIndex];
// if ("".equals(first.trim())) { // must be contain first //$NON-NLS-1$
// continue;
// }
// currentRowKey = first;
// if (uniqueKey.contains(currentRowKey)) { // existed.
// continue;
// }
// uniqueKey.add(currentRowKey);
// csvArrayBean.getCsvArray().add(values);
// csvArrayBean.getSeparators().add(fieldSeparator);
// }
} catch (IOException e) {
ExceptionHandler.process(e);
return null;
} finally {
// if (multiSchameCsvReader != null) {
// multiSchameCsvReader.close();
// }
}
// return handlerCSVArray(csvArray, selectColumnIndex, multiSchameCsvReader);
return csvArrayBean;
}
use of org.skife.csv.CSVReader in project tdi-studio-se by Talend.
the class MultiSchemasManager method getCsvArrayForMs.
private CSVArrayAndSeparator getCsvArrayForMs(String filePath, String separators, String encoding, int selectColumnIndex) throws UnsupportedEncodingException, IOException {
File file = new File(TalendTextUtils.removeQuotes(filePath));
separators = TalendTextUtils.removeQuotes(separators);
CSVArrayAndSeparator csvArrayBean = new CSVArrayAndSeparator();
Set<String> uniqueKey = new HashSet<String>();
if (file.exists()) {
BufferedReader reader = new BufferedReader(new FileReader(file));
String readLine = "";
String[] row = null;
while ((readLine = reader.readLine()) != null) {
int count = 0;
boolean added = false;
int sepIndex = 0;
while (count < separators.length()) {
CSVReader csvReader = getCsvReader(new ByteArrayInputStream(readLine.getBytes()), separators.charAt(count), encoding);
List items = csvReader.parse(readLine);
if (items.size() > 0) {
// csvReader.getValues();
row = (String[]) items.get(0);
}
if (row.length > 1 && row.length > selectColumnIndex && isInKeyValues(getKeyValues(), row[selectColumnIndex])) {
// this kind of record will not added in the if that deal with rows that only have one column
if (uniqueKey.contains(row[selectColumnIndex] + String.valueOf(separators.charAt(count)))) {
sepIndex = count;
}
if (!uniqueKey.contains(row[selectColumnIndex] + String.valueOf(separators.charAt(count)))) {
uniqueKey.add(row[selectColumnIndex] + String.valueOf(separators.charAt(count)));
csvArrayBean.getCsvArray().add(row);
csvArrayBean.getSeparators().add(String.valueOf(separators.charAt(count)));
// csvReader.close();
added = true;
break;
}
}
// csvReader.close();
count++;
}
// separator
if (!added && selectColumnIndex < row.length && isInKeyValues(getKeyValues(), row[selectColumnIndex]) && !uniqueKey.contains(row[selectColumnIndex] + String.valueOf(separators.charAt(sepIndex)))) {
uniqueKey.add(row[selectColumnIndex] + String.valueOf(separators.charAt(sepIndex)));
csvArrayBean.getCsvArray().add(row);
csvArrayBean.getSeparators().add(String.valueOf(separators.charAt(sepIndex)));
}
}
}
return csvArrayBean;
}
use of org.skife.csv.CSVReader in project tdi-studio-se by Talend.
the class MultiSchemasManager method getCsvArrayForMultiSchemaDelimited.
public CsvArray getCsvArrayForMultiSchemaDelimited(String filePath, String separators, String encoding, String keyValues, int keyIndex) throws UnsupportedEncodingException, IOException {
File file = new File(TalendTextUtils.removeQuotes(filePath));
separators = TalendTextUtils.removeQuotes(separators);
CsvArray csvArray = new CsvArray();
if (file.exists()) {
BufferedReader reader = new BufferedReader(new FileReader(file));
String readLine = "";
String[] row = null;
int i = 0;
while ((readLine = reader.readLine()) != null && i < maximumRowsToPreview) {
boolean added = false;
int count = 0;
while (count < separators.length()) {
CSVReader csvReader = getCsvReader(new ByteArrayInputStream(readLine.getBytes()), separators.charAt(count), encoding);
List items = csvReader.parse(readLine);
// csvReader.readRecord();
if (items.size() > 0) {
// csvReader.getValues();
row = (String[]) items.get(0);
}
if (row.length > 1 && row.length > keyIndex) {
if (isInKeyValues(keyValues, row[keyIndex])) {
added = true;
csvArray.add(row);
// csvReader.close();
i++;
break;
}
}
// csvReader.close();
count++;
}
if (!added && row != null && row.length > keyIndex) {
if (isInKeyValues(keyValues, row[keyIndex])) {
csvArray.add(row);
i++;
}
}
}
}
return csvArray;
}
use of org.skife.csv.CSVReader in project tdi-studio-se by Talend.
the class MultiSchemasManager method getCsvReader.
private CSVReader getCsvReader(ByteArrayInputStream inputStream, char separator, String encoding) throws FileNotFoundException, UnsupportedEncodingException {
encoding = TalendTextUtils.removeQuotes(encoding);
// CSVReader csvReader = new CSVReader(new BufferedReader(new InputStreamReader(inputStream, encoding == null ?
// ENCODING
// : encoding)), separator);
// csvReader.setRecordDelimiter('\n');
// csvReader.setSkipEmptyRecords(true);
// csvReader.setTextQualifier('"');
//
// csvReader.setEscapeMode(com.csvreader.CsvReader.ESCAPE_MODE_DOUBLED);
CSVReader csvReader = new SimpleReader();
csvReader.setSeperator(separator);
return csvReader;
}
use of org.skife.csv.CSVReader in project tdi-studio-se by Talend.
the class MultiSchemasManager method getCSVReader.
private CSVReader getCSVReader(final String filePath, final String encoding, final String fieldSeparator, final String rowSeparator, final boolean needSkpipEmptyRecord) throws IOException {
CSVReader csvReadertFileInputDelimited = new SimpleReader();
csvReadertFileInputDelimited.setSeperator(fieldSeparator.charAt(0));
return csvReadertFileInputDelimited;
}
Aggregations