use of org.pentaho.platform.dataaccess.datasource.wizard.models.CsvParseException in project data-access by pentaho.
the class CsvUtils method getDataProfile.
private DataProfile getDataProfile(CsvFileInfo fileInfo, int rowLimit, String fileLocation, int fileType, String encoding) throws Exception {
DataProfile result = new DataProfile();
String line = null;
int row = 0;
List<List<String>> headerSample = new ArrayList<List<String>>();
List<List<String>> dataSample = new ArrayList<List<String>>(rowLimit);
int maxColumns = 0;
InputStreamReader reader = null;
try {
InputStream inputStream = new FileInputStream(fileLocation);
UnicodeBOMInputStream bomIs = new UnicodeBOMInputStream(inputStream);
reader = new InputStreamReader(bomIs, encoding);
bomIs.skipBOM();
// read each line of text file
StringBuilder stringBuilder = new StringBuilder(1000);
line = TextFileInput.getLine(null, reader, fileType, stringBuilder);
while (line != null && row < rowLimit) {
CSVTokenizer csvt = new CSVTokenizer(line, fileInfo.getDelimiter(), fileInfo.getEnclosure());
List<String> rowData = new ArrayList<String>();
int count = 0;
while (csvt.hasMoreTokens()) {
String token = csvt.nextToken();
if (token != null) {
token = token.trim();
}
rowData.add(token);
count++;
}
if (maxColumns < count) {
maxColumns = count;
}
if (row < fileInfo.getHeaderRows()) {
headerSample.add(rowData);
} else {
dataSample.add(rowData);
}
line = TextFileInput.getLine(null, reader, fileType, stringBuilder);
row++;
}
} catch (IllegalArgumentException iae) {
// $NON-NLS-1$
Logger.error(getClass().getSimpleName(), "There was an issue parsing the CSV file", iae);
throw new CsvParseException(row + 1, line);
} catch (Exception e) {
// $NON-NLS-1$
Logger.error(getClass().getSimpleName(), "Could not read CSV", e);
throw e;
} finally {
// close the file
try {
if (reader != null) {
reader.close();
}
} catch (Exception e) {
throw e;
// ignore
}
}
String[][] headerValues = new String[headerSample.size()][maxColumns];
int rowNo = 0;
for (List<String> values : headerSample) {
int colNo = 0;
for (String value : values) {
headerValues[rowNo][colNo] = value;
colNo++;
}
rowNo++;
}
int[] fieldLengths = new int[maxColumns];
String[][] dataValues = new String[dataSample.size()][maxColumns];
DataRow[] data = new DataRow[dataSample.size()];
rowNo = 0;
for (List<String> values : dataSample) {
int colNo = 0;
for (String value : values) {
dataValues[rowNo][colNo] = value;
int currentMaxLength = fieldLengths[colNo];
if (value.length() > currentMaxLength) {
fieldLengths[colNo] = value.length();
}
colNo++;
}
data[rowNo] = new DataRow();
data[rowNo].setCells(dataValues[rowNo]);
rowNo++;
}
result.setRows(data);
// $NON-NLS-1$
DecimalFormat df = new DecimalFormat("000");
ColumnInfo[] profiles = new ColumnInfo[maxColumns];
for (int idx = 0; idx < maxColumns; idx++) {
ColumnInfo profile = new ColumnInfo();
profiles[idx] = profile;
String title = CsvFileInfo.DEFAULT_COLUMN_NAME_PREFIX + df.format(idx + 1);
// $NON-NLS-1$
String colId = "PC_" + idx;
if (headerValues.length > 0) {
if (headerValues[headerValues.length - 1][idx] != null) {
title = headerValues[headerValues.length - 1][idx];
colId = title;
if (!Util.validateId(title)) {
colId = Util.toId(colId);
}
}
}
profile.setTitle(title);
profile.setId(colId);
List<String> samples = getColumnData(idx, dataValues);
assumeColumnDetails(profile, samples);
}
result.setColumns(profiles);
return result;
}
Aggregations