Search in sources :

Example 6 with ColumnInfo

use of org.pentaho.platform.dataaccess.datasource.wizard.models.ColumnInfo in project data-access by pentaho.

the class CsvUtils method getDataProfile.

private DataProfile getDataProfile(CsvFileInfo fileInfo, int rowLimit, String fileLocation, int fileType, String encoding) throws Exception {
    DataProfile result = new DataProfile();
    String line = null;
    int row = 0;
    List<List<String>> headerSample = new ArrayList<List<String>>();
    List<List<String>> dataSample = new ArrayList<List<String>>(rowLimit);
    int maxColumns = 0;
    InputStreamReader reader = null;
    try {
        InputStream inputStream = new FileInputStream(fileLocation);
        UnicodeBOMInputStream bomIs = new UnicodeBOMInputStream(inputStream);
        reader = new InputStreamReader(bomIs, encoding);
        bomIs.skipBOM();
        // read each line of text file
        StringBuilder stringBuilder = new StringBuilder(1000);
        line = TextFileInput.getLine(null, reader, fileType, stringBuilder);
        while (line != null && row < rowLimit) {
            CSVTokenizer csvt = new CSVTokenizer(line, fileInfo.getDelimiter(), fileInfo.getEnclosure());
            List<String> rowData = new ArrayList<String>();
            int count = 0;
            while (csvt.hasMoreTokens()) {
                String token = csvt.nextToken();
                if (token != null) {
                    token = token.trim();
                }
                rowData.add(token);
                count++;
            }
            if (maxColumns < count) {
                maxColumns = count;
            }
            if (row < fileInfo.getHeaderRows()) {
                headerSample.add(rowData);
            } else {
                dataSample.add(rowData);
            }
            line = TextFileInput.getLine(null, reader, fileType, stringBuilder);
            row++;
        }
    } catch (IllegalArgumentException iae) {
        // $NON-NLS-1$
        Logger.error(getClass().getSimpleName(), "There was an issue parsing the CSV file", iae);
        throw new CsvParseException(row + 1, line);
    } catch (Exception e) {
        // $NON-NLS-1$
        Logger.error(getClass().getSimpleName(), "Could not read CSV", e);
        throw e;
    } finally {
        // close the file
        try {
            if (reader != null) {
                reader.close();
            }
        } catch (Exception e) {
            throw e;
        // ignore
        }
    }
    String[][] headerValues = new String[headerSample.size()][maxColumns];
    int rowNo = 0;
    for (List<String> values : headerSample) {
        int colNo = 0;
        for (String value : values) {
            headerValues[rowNo][colNo] = value;
            colNo++;
        }
        rowNo++;
    }
    int[] fieldLengths = new int[maxColumns];
    String[][] dataValues = new String[dataSample.size()][maxColumns];
    DataRow[] data = new DataRow[dataSample.size()];
    rowNo = 0;
    for (List<String> values : dataSample) {
        int colNo = 0;
        for (String value : values) {
            dataValues[rowNo][colNo] = value;
            int currentMaxLength = fieldLengths[colNo];
            if (value.length() > currentMaxLength) {
                fieldLengths[colNo] = value.length();
            }
            colNo++;
        }
        data[rowNo] = new DataRow();
        data[rowNo].setCells(dataValues[rowNo]);
        rowNo++;
    }
    result.setRows(data);
    // $NON-NLS-1$
    DecimalFormat df = new DecimalFormat("000");
    ColumnInfo[] profiles = new ColumnInfo[maxColumns];
    for (int idx = 0; idx < maxColumns; idx++) {
        ColumnInfo profile = new ColumnInfo();
        profiles[idx] = profile;
        String title = CsvFileInfo.DEFAULT_COLUMN_NAME_PREFIX + df.format(idx + 1);
        // $NON-NLS-1$
        String colId = "PC_" + idx;
        if (headerValues.length > 0) {
            if (headerValues[headerValues.length - 1][idx] != null) {
                title = headerValues[headerValues.length - 1][idx];
                colId = title;
                if (!Util.validateId(title)) {
                    colId = Util.toId(colId);
                }
            }
        }
        profile.setTitle(title);
        profile.setId(colId);
        List<String> samples = getColumnData(idx, dataValues);
        assumeColumnDetails(profile, samples);
    }
    result.setColumns(profiles);
    return result;
}
Also used : InputStreamReader(java.io.InputStreamReader) CsvParseException(org.pentaho.platform.dataaccess.datasource.wizard.models.CsvParseException) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) DecimalFormat(java.text.DecimalFormat) ArrayList(java.util.ArrayList) ColumnInfo(org.pentaho.platform.dataaccess.datasource.wizard.models.ColumnInfo) CSVTokenizer(org.pentaho.reporting.libraries.base.util.CSVTokenizer) DataRow(org.pentaho.platform.dataaccess.datasource.wizard.models.DataRow) FileInputStream(java.io.FileInputStream) IOException(java.io.IOException) CsvParseException(org.pentaho.platform.dataaccess.datasource.wizard.models.CsvParseException) FileNotFoundException(java.io.FileNotFoundException) ArrayList(java.util.ArrayList) List(java.util.List)

Example 7 with ColumnInfo

use of org.pentaho.platform.dataaccess.datasource.wizard.models.ColumnInfo in project data-access by pentaho.

the class StagingTransformGenerator method createCalcStep.

protected StepMeta createCalcStep(TransMeta transMeta, String stepName, ColumnInfo[] columns) {
    CalculatorMeta meta = new CalculatorMeta();
    List<CalculatorMetaFunction> funcs = new ArrayList<CalculatorMetaFunction>();
    for (ColumnInfo column : columns) {
        if (column != null && !column.isIgnore() && column.getDataType() == DataType.DATE) {
            // see if we need to break out the date fields
            int dateBreakOut = column.getDateFieldBreakout();
            if ((dateBreakOut & ColumnInfo.DATE_LEVEL_YEAR) > 0) {
                CalculatorMetaFunction func = createDateCalc(CalculatorMetaFunction.CALC_YEAR_OF_DATE, column.getTitle() + " (year)", column.getId(), 4);
                funcs.add(func);
            }
            if ((dateBreakOut & ColumnInfo.DATE_LEVEL_QUARTER) > 0) {
                CalculatorMetaFunction func = createDateCalc(CalculatorMetaFunction.CALC_QUARTER_OF_DATE, column.getTitle() + " (qtr)", column.getId(), 4);
                funcs.add(func);
            }
            if ((dateBreakOut & ColumnInfo.DATE_LEVEL_MONTH) > 0) {
                CalculatorMetaFunction func = createDateCalc(CalculatorMetaFunction.CALC_MONTH_OF_DATE, column.getTitle() + " (month)", column.getId(), 4);
                funcs.add(func);
            }
            if ((dateBreakOut & ColumnInfo.DATE_LEVEL_WEEK) > 0) {
                CalculatorMetaFunction func = createDateCalc(CalculatorMetaFunction.CALC_WEEK_OF_YEAR, column.getTitle() + " (week)", column.getId(), 4);
                funcs.add(func);
            }
            if ((dateBreakOut & ColumnInfo.DATE_LEVEL_DAY) > 0) {
                CalculatorMetaFunction func = createDateCalc(CalculatorMetaFunction.CALC_DAY_OF_MONTH, column.getTitle() + " (day)", column.getId(), 4);
                funcs.add(func);
            }
            if ((dateBreakOut & ColumnInfo.DATE_LEVEL_DAYOFWEEK) > 0) {
                CalculatorMetaFunction func = createDateCalc(CalculatorMetaFunction.CALC_DAY_OF_WEEK, column.getTitle() + " (day of week)", column.getId(), 4);
                funcs.add(func);
            }
        }
    }
    if (funcs.size() == 0) {
        return null;
    }
    meta.setCalculation(funcs.toArray(new CalculatorMetaFunction[funcs.size()]));
    StepMeta stepMeta = new StepMeta(stepName, stepName, meta);
    transMeta.addStep(stepMeta);
    return stepMeta;
}
Also used : ArrayList(java.util.ArrayList) ColumnInfo(org.pentaho.platform.dataaccess.datasource.wizard.models.ColumnInfo) CalculatorMeta(org.pentaho.di.trans.steps.calculator.CalculatorMeta) CalculatorMetaFunction(org.pentaho.di.trans.steps.calculator.CalculatorMetaFunction) StepMeta(org.pentaho.di.trans.step.StepMeta)

Example 8 with ColumnInfo

use of org.pentaho.platform.dataaccess.datasource.wizard.models.ColumnInfo in project data-access by pentaho.

the class StagingTransformGenerator method createDateCalc.

/**
 * Creates a calculation. Used to break out date fields
 *
 * @param calcType
 * @param fieldName
 * @param fieldId
 * @param valueLength
 * @return
 */
protected CalculatorMetaFunction createDateCalc(int calcType, String fieldName, String fieldId, int valueLength) {
    String fieldB = null;
    System.out.println(99);
    String fieldC = null;
    int valueType = ValueMetaInterface.TYPE_INTEGER;
    int valuePrecision = 0;
    boolean removedFromResult = false;
    // $NON-NLS-1$
    String conversionMask = "";
    // $NON-NLS-1$
    String decimalSymbol = "";
    // $NON-NLS-1$
    String groupingSymbol = "";
    // $NON-NLS-1$
    String currencySymbol = "";
    CalculatorMetaFunction func = new CalculatorMetaFunction(fieldName, calcType, fieldId, fieldB, fieldC, valueType, valueLength, valuePrecision, removedFromResult, conversionMask, decimalSymbol, groupingSymbol, currencySymbol);
    // update the model
    ColumnInfo column = new ColumnInfo();
    column.setAggregateType(AggregationType.NONE.toString());
    column.setDataType(DataType.NUMERIC);
    column.setFieldType(ColumnInfo.FIELD_TYPE_DIMENSION);
    column.setIgnore(false);
    column.setId(fieldId);
    column.setIndex(true);
    column.setTitle(fieldName);
    return func;
}
Also used : ColumnInfo(org.pentaho.platform.dataaccess.datasource.wizard.models.ColumnInfo) CalculatorMetaFunction(org.pentaho.di.trans.steps.calculator.CalculatorMetaFunction)

Example 9 with ColumnInfo

use of org.pentaho.platform.dataaccess.datasource.wizard.models.ColumnInfo in project data-access by pentaho.

the class CsvTransformGenerator method createInputStep.

protected StepMeta createInputStep(TransMeta transMeta) {
    CsvInputMeta csvInputMeta = new CsvInputMeta();
    CsvFileInfo fileInfo = getModelInfo().getFileInfo();
    String fileName = fileInfo.getTmpFilename();
    String path;
    if (fileName.endsWith(".tmp")) {
        // $NON-NLS-1$
        path = PentahoSystem.getApplicationContext().getSolutionPath(TMP_FILE_PATH);
    } else {
        String relativePath = PentahoSystem.getSystemSetting("file-upload-defaults/relative-path", // $NON-NLS-1$
        String.valueOf(DEFAULT_RELATIVE_UPLOAD_FILE_PATH));
        path = PentahoSystem.getApplicationContext().getSolutionPath(relativePath);
    }
    File file = new File(path + fileInfo.getTmpFilename());
    String filename = file.getAbsolutePath();
    ColumnInfo[] columns = getModelInfo().getColumns();
    TextFileInputField[] inputFields = new TextFileInputField[columns.length];
    int idx = 0;
    for (ColumnInfo column : columns) {
        TextFileInputField field = new TextFileInputField();
        field.setCurrencySymbol(fileInfo.getCurrencySymbol());
        field.setDecimalSymbol(fileInfo.getCurrencySymbol());
        field.setFormat(column.getFormat());
        field.setGroupSymbol(fileInfo.getGroupSymbol());
        field.setIfNullValue(fileInfo.getIfNull());
        field.setIgnored(column.isIgnore());
        field.setLength(column.getLength());
        field.setName(column.getId());
        field.setNullString(fileInfo.getNullStr());
        // field.setPosition(position);
        field.setPrecision(column.getPrecision());
        field.setRepeated(false);
        field.setSamples(null);
        field.setTrimType(ValueMeta.TRIM_TYPE_BOTH);
        field.setType(convertDataType(column));
        inputFields[idx] = field;
        idx++;
    }
    csvInputMeta.setAddResultFile(false);
    // $NON-NLS-1$
    csvInputMeta.setBufferSize("5000");
    csvInputMeta.setDelimiter(fileInfo.getDelimiter());
    csvInputMeta.setEnclosure(fileInfo.getEnclosure());
    csvInputMeta.setEncoding(fileInfo.getEncoding());
    csvInputMeta.setFilename(filename);
    csvInputMeta.setFilenameField(null);
    // TODO strip off more than one row if present...
    csvInputMeta.setHeaderPresent(fileInfo.getHeaderRows() > 0);
    // inputMeta.get.setID(1);
    csvInputMeta.setIncludingFilename(false);
    csvInputMeta.setInputFields(inputFields);
    csvInputMeta.setLazyConversionActive(true);
    // $NON-NLS-1$
    csvInputMeta.setRowNumField("");
    csvInputMeta.setRunningInParallel(false);
    // inputMeta.setTargetSteps(null);
    StepMeta csvInputStepMeta = new StepMeta(CSV_INPUT, CSV_INPUT, csvInputMeta);
    csvInputStepMeta.setStepErrorMeta(new StepErrorMeta(transMeta, csvInputStepMeta));
    transMeta.addStep(csvInputStepMeta);
    csvErrorRowCount = 0;
    final FileTransformStats stats = getTransformStats();
    StepErrorMeta csvInputErrorMeta = new StepErrorMeta(transMeta, csvInputStepMeta) {

        public void addErrorRowData(Object[] row, int startIndex, long nrErrors, String errorDescriptions, String fieldNames, String errorCodes) {
            if (csvErrorRowCount < maxErrorRows) {
                StringBuffer sb = new StringBuffer();
                sb.append("Rejected Row: ");
                for (Object rowData : row) {
                    sb.append(rowData);
                    sb.append(", ");
                }
                sb.append("\r\n");
                stats.getErrors().add(sb.toString() + errorDescriptions);
            }
            csvErrorRowCount++;
            stats.setErrorCount(csvErrorRowCount);
            super.addErrorRowData(row, startIndex, nrErrors, errorDescriptions, fieldNames, errorCodes);
        }
    };
    StepMeta outputDummyStepMeta = addDummyStep(transMeta, "CSVInputErrorDummy");
    csvInputErrorMeta.setTargetStep(outputDummyStepMeta);
    csvInputErrorMeta.setEnabled(true);
    csvInputStepMeta.setStepErrorMeta(csvInputErrorMeta);
    return csvInputStepMeta;
}
Also used : CsvInputMeta(org.pentaho.di.trans.steps.csvinput.CsvInputMeta) TextFileInputField(org.pentaho.di.trans.steps.textfileinput.TextFileInputField) StepErrorMeta(org.pentaho.di.trans.step.StepErrorMeta) ColumnInfo(org.pentaho.platform.dataaccess.datasource.wizard.models.ColumnInfo) StepMeta(org.pentaho.di.trans.step.StepMeta) CsvFileInfo(org.pentaho.platform.dataaccess.datasource.wizard.models.CsvFileInfo) File(java.io.File) FileTransformStats(org.pentaho.platform.dataaccess.datasource.wizard.sources.csv.FileTransformStats)

Example 10 with ColumnInfo

use of org.pentaho.platform.dataaccess.datasource.wizard.models.ColumnInfo in project data-access by pentaho.

the class CsvTransformGeneratorIT method addColumnToModel.

private static void addColumnToModel(ModelInfo info) {
    ColumnInfo[] columns = info.getColumns();
    ColumnInfo col = new ColumnInfo();
    // col.setDataType(ValueMeta.getTypeDesc(ValueMeta.TYPE_INTEGER));
    col.setDataType(DataType.NUMERIC);
    col.setId("PC_999");
    col.setTitle("NEW_COLUMN");
    col.setIndex(true);
    col.setFieldType(ColumnInfo.FIELD_TYPE_BOTH);
    col.setAggregateType(AggregationType.SUM.toString());
    ColumnInfo[] newColumns = (ColumnInfo[]) ArrayUtils.add(columns, col);
    info.setColumns(newColumns);
}
Also used : ColumnInfo(org.pentaho.platform.dataaccess.datasource.wizard.models.ColumnInfo)

Aggregations

ColumnInfo (org.pentaho.platform.dataaccess.datasource.wizard.models.ColumnInfo)16 Test (org.junit.Test)5 ArrayList (java.util.ArrayList)4 StepMeta (org.pentaho.di.trans.step.StepMeta)3 ModelInfo (org.pentaho.platform.dataaccess.datasource.wizard.models.ModelInfo)3 List (java.util.List)2 CalculatorMetaFunction (org.pentaho.di.trans.steps.calculator.CalculatorMetaFunction)2 Column (org.pentaho.metadata.model.thin.Column)2 CsvFileInfo (org.pentaho.platform.dataaccess.datasource.wizard.models.CsvFileInfo)2 DataRow (org.pentaho.platform.dataaccess.datasource.wizard.models.DataRow)2 File (java.io.File)1 FileInputStream (java.io.FileInputStream)1 FileNotFoundException (java.io.FileNotFoundException)1 IOException (java.io.IOException)1 InputStream (java.io.InputStream)1 InputStreamReader (java.io.InputStreamReader)1 DecimalFormat (java.text.DecimalFormat)1 Locale (java.util.Locale)1 Vector (java.util.Vector)1 Before (org.junit.Before)1