use of org.pentaho.platform.dataaccess.datasource.wizard.models.ColumnInfo in project data-access by pentaho.
the class CsvUtils method getDataProfile.
private DataProfile getDataProfile(CsvFileInfo fileInfo, int rowLimit, String fileLocation, int fileType, String encoding) throws Exception {
DataProfile result = new DataProfile();
String line = null;
int row = 0;
List<List<String>> headerSample = new ArrayList<List<String>>();
List<List<String>> dataSample = new ArrayList<List<String>>(rowLimit);
int maxColumns = 0;
InputStreamReader reader = null;
try {
InputStream inputStream = new FileInputStream(fileLocation);
UnicodeBOMInputStream bomIs = new UnicodeBOMInputStream(inputStream);
reader = new InputStreamReader(bomIs, encoding);
bomIs.skipBOM();
// read each line of text file
StringBuilder stringBuilder = new StringBuilder(1000);
line = TextFileInput.getLine(null, reader, fileType, stringBuilder);
while (line != null && row < rowLimit) {
CSVTokenizer csvt = new CSVTokenizer(line, fileInfo.getDelimiter(), fileInfo.getEnclosure());
List<String> rowData = new ArrayList<String>();
int count = 0;
while (csvt.hasMoreTokens()) {
String token = csvt.nextToken();
if (token != null) {
token = token.trim();
}
rowData.add(token);
count++;
}
if (maxColumns < count) {
maxColumns = count;
}
if (row < fileInfo.getHeaderRows()) {
headerSample.add(rowData);
} else {
dataSample.add(rowData);
}
line = TextFileInput.getLine(null, reader, fileType, stringBuilder);
row++;
}
} catch (IllegalArgumentException iae) {
// $NON-NLS-1$
Logger.error(getClass().getSimpleName(), "There was an issue parsing the CSV file", iae);
throw new CsvParseException(row + 1, line);
} catch (Exception e) {
// $NON-NLS-1$
Logger.error(getClass().getSimpleName(), "Could not read CSV", e);
throw e;
} finally {
// close the file
try {
if (reader != null) {
reader.close();
}
} catch (Exception e) {
throw e;
// ignore
}
}
String[][] headerValues = new String[headerSample.size()][maxColumns];
int rowNo = 0;
for (List<String> values : headerSample) {
int colNo = 0;
for (String value : values) {
headerValues[rowNo][colNo] = value;
colNo++;
}
rowNo++;
}
int[] fieldLengths = new int[maxColumns];
String[][] dataValues = new String[dataSample.size()][maxColumns];
DataRow[] data = new DataRow[dataSample.size()];
rowNo = 0;
for (List<String> values : dataSample) {
int colNo = 0;
for (String value : values) {
dataValues[rowNo][colNo] = value;
int currentMaxLength = fieldLengths[colNo];
if (value.length() > currentMaxLength) {
fieldLengths[colNo] = value.length();
}
colNo++;
}
data[rowNo] = new DataRow();
data[rowNo].setCells(dataValues[rowNo]);
rowNo++;
}
result.setRows(data);
// $NON-NLS-1$
DecimalFormat df = new DecimalFormat("000");
ColumnInfo[] profiles = new ColumnInfo[maxColumns];
for (int idx = 0; idx < maxColumns; idx++) {
ColumnInfo profile = new ColumnInfo();
profiles[idx] = profile;
String title = CsvFileInfo.DEFAULT_COLUMN_NAME_PREFIX + df.format(idx + 1);
// $NON-NLS-1$
String colId = "PC_" + idx;
if (headerValues.length > 0) {
if (headerValues[headerValues.length - 1][idx] != null) {
title = headerValues[headerValues.length - 1][idx];
colId = title;
if (!Util.validateId(title)) {
colId = Util.toId(colId);
}
}
}
profile.setTitle(title);
profile.setId(colId);
List<String> samples = getColumnData(idx, dataValues);
assumeColumnDetails(profile, samples);
}
result.setColumns(profiles);
return result;
}
use of org.pentaho.platform.dataaccess.datasource.wizard.models.ColumnInfo in project data-access by pentaho.
the class StagingTransformGenerator method createCalcStep.
protected StepMeta createCalcStep(TransMeta transMeta, String stepName, ColumnInfo[] columns) {
CalculatorMeta meta = new CalculatorMeta();
List<CalculatorMetaFunction> funcs = new ArrayList<CalculatorMetaFunction>();
for (ColumnInfo column : columns) {
if (column != null && !column.isIgnore() && column.getDataType() == DataType.DATE) {
// see if we need to break out the date fields
int dateBreakOut = column.getDateFieldBreakout();
if ((dateBreakOut & ColumnInfo.DATE_LEVEL_YEAR) > 0) {
CalculatorMetaFunction func = createDateCalc(CalculatorMetaFunction.CALC_YEAR_OF_DATE, column.getTitle() + " (year)", column.getId(), 4);
funcs.add(func);
}
if ((dateBreakOut & ColumnInfo.DATE_LEVEL_QUARTER) > 0) {
CalculatorMetaFunction func = createDateCalc(CalculatorMetaFunction.CALC_QUARTER_OF_DATE, column.getTitle() + " (qtr)", column.getId(), 4);
funcs.add(func);
}
if ((dateBreakOut & ColumnInfo.DATE_LEVEL_MONTH) > 0) {
CalculatorMetaFunction func = createDateCalc(CalculatorMetaFunction.CALC_MONTH_OF_DATE, column.getTitle() + " (month)", column.getId(), 4);
funcs.add(func);
}
if ((dateBreakOut & ColumnInfo.DATE_LEVEL_WEEK) > 0) {
CalculatorMetaFunction func = createDateCalc(CalculatorMetaFunction.CALC_WEEK_OF_YEAR, column.getTitle() + " (week)", column.getId(), 4);
funcs.add(func);
}
if ((dateBreakOut & ColumnInfo.DATE_LEVEL_DAY) > 0) {
CalculatorMetaFunction func = createDateCalc(CalculatorMetaFunction.CALC_DAY_OF_MONTH, column.getTitle() + " (day)", column.getId(), 4);
funcs.add(func);
}
if ((dateBreakOut & ColumnInfo.DATE_LEVEL_DAYOFWEEK) > 0) {
CalculatorMetaFunction func = createDateCalc(CalculatorMetaFunction.CALC_DAY_OF_WEEK, column.getTitle() + " (day of week)", column.getId(), 4);
funcs.add(func);
}
}
}
if (funcs.size() == 0) {
return null;
}
meta.setCalculation(funcs.toArray(new CalculatorMetaFunction[funcs.size()]));
StepMeta stepMeta = new StepMeta(stepName, stepName, meta);
transMeta.addStep(stepMeta);
return stepMeta;
}
use of org.pentaho.platform.dataaccess.datasource.wizard.models.ColumnInfo in project data-access by pentaho.
the class StagingTransformGenerator method createDateCalc.
/**
* Creates a calculation. Used to break out date fields
*
* @param calcType
* @param fieldName
* @param fieldId
* @param valueLength
* @return
*/
protected CalculatorMetaFunction createDateCalc(int calcType, String fieldName, String fieldId, int valueLength) {
String fieldB = null;
System.out.println(99);
String fieldC = null;
int valueType = ValueMetaInterface.TYPE_INTEGER;
int valuePrecision = 0;
boolean removedFromResult = false;
// $NON-NLS-1$
String conversionMask = "";
// $NON-NLS-1$
String decimalSymbol = "";
// $NON-NLS-1$
String groupingSymbol = "";
// $NON-NLS-1$
String currencySymbol = "";
CalculatorMetaFunction func = new CalculatorMetaFunction(fieldName, calcType, fieldId, fieldB, fieldC, valueType, valueLength, valuePrecision, removedFromResult, conversionMask, decimalSymbol, groupingSymbol, currencySymbol);
// update the model
ColumnInfo column = new ColumnInfo();
column.setAggregateType(AggregationType.NONE.toString());
column.setDataType(DataType.NUMERIC);
column.setFieldType(ColumnInfo.FIELD_TYPE_DIMENSION);
column.setIgnore(false);
column.setId(fieldId);
column.setIndex(true);
column.setTitle(fieldName);
return func;
}
use of org.pentaho.platform.dataaccess.datasource.wizard.models.ColumnInfo in project data-access by pentaho.
the class CsvTransformGenerator method createInputStep.
protected StepMeta createInputStep(TransMeta transMeta) {
CsvInputMeta csvInputMeta = new CsvInputMeta();
CsvFileInfo fileInfo = getModelInfo().getFileInfo();
String fileName = fileInfo.getTmpFilename();
String path;
if (fileName.endsWith(".tmp")) {
// $NON-NLS-1$
path = PentahoSystem.getApplicationContext().getSolutionPath(TMP_FILE_PATH);
} else {
String relativePath = PentahoSystem.getSystemSetting("file-upload-defaults/relative-path", // $NON-NLS-1$
String.valueOf(DEFAULT_RELATIVE_UPLOAD_FILE_PATH));
path = PentahoSystem.getApplicationContext().getSolutionPath(relativePath);
}
File file = new File(path + fileInfo.getTmpFilename());
String filename = file.getAbsolutePath();
ColumnInfo[] columns = getModelInfo().getColumns();
TextFileInputField[] inputFields = new TextFileInputField[columns.length];
int idx = 0;
for (ColumnInfo column : columns) {
TextFileInputField field = new TextFileInputField();
field.setCurrencySymbol(fileInfo.getCurrencySymbol());
field.setDecimalSymbol(fileInfo.getCurrencySymbol());
field.setFormat(column.getFormat());
field.setGroupSymbol(fileInfo.getGroupSymbol());
field.setIfNullValue(fileInfo.getIfNull());
field.setIgnored(column.isIgnore());
field.setLength(column.getLength());
field.setName(column.getId());
field.setNullString(fileInfo.getNullStr());
// field.setPosition(position);
field.setPrecision(column.getPrecision());
field.setRepeated(false);
field.setSamples(null);
field.setTrimType(ValueMeta.TRIM_TYPE_BOTH);
field.setType(convertDataType(column));
inputFields[idx] = field;
idx++;
}
csvInputMeta.setAddResultFile(false);
// $NON-NLS-1$
csvInputMeta.setBufferSize("5000");
csvInputMeta.setDelimiter(fileInfo.getDelimiter());
csvInputMeta.setEnclosure(fileInfo.getEnclosure());
csvInputMeta.setEncoding(fileInfo.getEncoding());
csvInputMeta.setFilename(filename);
csvInputMeta.setFilenameField(null);
// TODO strip off more than one row if present...
csvInputMeta.setHeaderPresent(fileInfo.getHeaderRows() > 0);
// inputMeta.get.setID(1);
csvInputMeta.setIncludingFilename(false);
csvInputMeta.setInputFields(inputFields);
csvInputMeta.setLazyConversionActive(true);
// $NON-NLS-1$
csvInputMeta.setRowNumField("");
csvInputMeta.setRunningInParallel(false);
// inputMeta.setTargetSteps(null);
StepMeta csvInputStepMeta = new StepMeta(CSV_INPUT, CSV_INPUT, csvInputMeta);
csvInputStepMeta.setStepErrorMeta(new StepErrorMeta(transMeta, csvInputStepMeta));
transMeta.addStep(csvInputStepMeta);
csvErrorRowCount = 0;
final FileTransformStats stats = getTransformStats();
StepErrorMeta csvInputErrorMeta = new StepErrorMeta(transMeta, csvInputStepMeta) {
public void addErrorRowData(Object[] row, int startIndex, long nrErrors, String errorDescriptions, String fieldNames, String errorCodes) {
if (csvErrorRowCount < maxErrorRows) {
StringBuffer sb = new StringBuffer();
sb.append("Rejected Row: ");
for (Object rowData : row) {
sb.append(rowData);
sb.append(", ");
}
sb.append("\r\n");
stats.getErrors().add(sb.toString() + errorDescriptions);
}
csvErrorRowCount++;
stats.setErrorCount(csvErrorRowCount);
super.addErrorRowData(row, startIndex, nrErrors, errorDescriptions, fieldNames, errorCodes);
}
};
StepMeta outputDummyStepMeta = addDummyStep(transMeta, "CSVInputErrorDummy");
csvInputErrorMeta.setTargetStep(outputDummyStepMeta);
csvInputErrorMeta.setEnabled(true);
csvInputStepMeta.setStepErrorMeta(csvInputErrorMeta);
return csvInputStepMeta;
}
use of org.pentaho.platform.dataaccess.datasource.wizard.models.ColumnInfo in project data-access by pentaho.
the class CsvTransformGeneratorIT method addColumnToModel.
private static void addColumnToModel(ModelInfo info) {
ColumnInfo[] columns = info.getColumns();
ColumnInfo col = new ColumnInfo();
// col.setDataType(ValueMeta.getTypeDesc(ValueMeta.TYPE_INTEGER));
col.setDataType(DataType.NUMERIC);
col.setId("PC_999");
col.setTitle("NEW_COLUMN");
col.setIndex(true);
col.setFieldType(ColumnInfo.FIELD_TYPE_BOTH);
col.setAggregateType(AggregationType.SUM.toString());
ColumnInfo[] newColumns = (ColumnInfo[]) ArrayUtils.add(columns, col);
info.setColumns(newColumns);
}
Aggregations