Search in sources :

Example 1 with FileInputDelimited

use of org.talend.fileprocess.FileInputDelimited in project tdi-studio-se by Talend.

the class MultiSchemasManager method retrieveCsvArrayByDelimited.

/**
     * cli Comment method "retrieveCsvArrayByDelimited".
     */
private CSVArrayAndSeparator retrieveCsvArrayByDelimited(final String filePath, final String encoding, final String fieldSeparator, final String rowSeparator, final boolean needSkpipEmptyRecord, final boolean splitRecord, int selectColumnIndex) {
    CSVArrayAndSeparator csvArrayBean = new CSVArrayAndSeparator();
    FileInputDelimited fileInputDelimited = null;
    try {
        fileInputDelimited = new FileInputDelimited(filePath, encoding, fieldSeparator, rowSeparator, needSkpipEmptyRecord, 0, 0, -1, -1, splitRecord);
        final int maxColumnCount = FileInputDelimited.getMaxColumnCount(filePath, encoding, fieldSeparator, rowSeparator, needSkpipEmptyRecord, splitRecord, 0, Integer.MAX_VALUE);
        if (maxColumnCount < 1) {
            return null;
        }
        Set<String> uniqueKey = new HashSet<String>();
        //$NON-NLS-1$
        String currentRowKey = "";
        while (fileInputDelimited.nextRecord()) {
            final String first = fileInputDelimited.get(selectColumnIndex);
            if ("".equals(first.trim())) {
                // must be contain first //$NON-NLS-1$
                continue;
            }
            currentRowKey = first;
            if (uniqueKey.contains(currentRowKey)) {
                // existed.
                continue;
            }
            uniqueKey.add(currentRowKey);
            handlerDelimitedArray(csvArrayBean, maxColumnCount, first, fileInputDelimited, fieldSeparator);
        }
    } catch (IOException e) {
        ExceptionHandler.process(e);
        return null;
    } finally {
        if (fileInputDelimited != null) {
            fileInputDelimited.close();
        }
    }
    return csvArrayBean;
}
Also used : FileInputDelimited(org.talend.fileprocess.FileInputDelimited) CSVArrayAndSeparator(org.talend.designer.filemultischemas.data.CSVArrayAndSeparator) IOException(java.io.IOException) HashSet(java.util.HashSet)

Example 2 with FileInputDelimited

use of org.talend.fileprocess.FileInputDelimited in project components by Talend.

the class FileInputDelimitedRuntime method previewData.

// Preview data and guess the columns
public String previewData(int maxRowsToPreview) throws IOException {
    init();
    Map<String, Object> result = new HashMap<String, Object>();
    boolean retrieveHeader = false;
    if (header > 0) {
        header = header - 1;
        retrieveHeader = true;
    }
    String[] rowData = null;
    List<String[]> data = new ArrayList<>();
    if (props.csvOptions.getValue()) {
        if (limit < 1) {
            limit = maxRowsToPreview;
        }
        CSVReader csvReader = getCsvReader();
        if (retrieveHeader) {
            lastLine = lastLine - 1;
        }
        try {
            if (csvReader != null && csvReader.readNext()) {
                rowData = csvReader.getValues();
                if (retrieveHeader) {
                    result.put("columnNames", rowData);
                    columnNames = Arrays.asList(rowData);
                    LOG.debug("columnNames " + columnNames);
                } else {
                    data.add(rowData);
                    updateColumnsLength(rowData);
                }
                while (csvReader.readNext()) {
                    rowData = csvReader.getValues();
                    if (props.removeEmptyRow.getValue() && (rowData.length == 1 && ("\015").equals(rowData[0]))) {
                        continue;
                    }
                    currentLine++;
                    if (lastLine > -1 && (currentLine > lastLine || currentLine > maxRowsToPreview)) {
                        break;
                    }
                    data.add(rowData);
                    updateColumnsLength(rowData);
                    LOG.debug("Preview row " + currentLine + " : " + Arrays.asList(rowData));
                }
            }
        } finally {
            if (csvReader != null) {
                csvReader.close();
            }
        }
    } else {
        if (retrieveHeader) {
            if (limit > 0) {
                limit = limit + 1;
            } else {
                if (limit < 1) {
                    limit = maxRowsToPreview + 1;
                }
            }
        }
        FileInputDelimited fid = getFileDelimited();
        try {
            while (fid != null && fid.nextRecord()) {
                int currentRowColsCount = fid.getColumnsCountOfCurrentRow();
                rowData = new String[currentRowColsCount];
                for (int i = 0; i < rowData.length; i++) {
                    rowData[i] = fid.get(i);
                }
                if (retrieveHeader) {
                    result.put("columnNames", rowData);
                    columnNames = Arrays.asList(rowData);
                    LOG.debug("columnNames " + columnNames);
                    retrieveHeader = false;
                } else {
                    currentLine++;
                    data.add(rowData);
                    updateColumnsLength(rowData);
                    LOG.debug("Preview row " + currentLine + " : " + Arrays.asList(rowData));
                }
            }
        } finally {
            if (fid != null) {
                fid.close();
            }
        }
    }
    result.put("data", data);
    if (data.size() > 0) {
        LOG.debug("Max columns count:" + columnsLength.size());
    }
    Gson gson = new Gson();
    return gson.toJson(result);
}
Also used : HashMap(java.util.HashMap) CSVReader(com.talend.csv.CSVReader) FileInputDelimited(org.talend.fileprocess.FileInputDelimited) ArrayList(java.util.ArrayList) Gson(com.google.gson.Gson)

Example 3 with FileInputDelimited

use of org.talend.fileprocess.FileInputDelimited in project tdq-studio-se by Talend.

the class DelimitedFileIndicatorEvaluator method useDelimitedReader.

/**
 * DOC talend Comment method "useDelimitedReader".
 *
 * @param file
 * @param delimitedFileconnection2
 * @param analysisElementList
 * @param columnElementList
 * @param indicToRowMap
 * @return
 */
private ReturnCode useDelimitedReader(List<ModelElement> analysisElementList, List<MetadataColumn> columnElementList, EMap<Indicator, AnalyzedDataSet> indicToRowMap) {
    // use TOSDelimitedReader in FileInputDelimited to parse.
    ReturnCode returnCode = new ReturnCode(true);
    try {
        FileInputDelimited fileInputDelimited = createFileInputDelimited();
        long currentRow = JavaSqlFactory.getHeadValue(delimitedFileconnection);
        while (fileInputDelimited.nextRecord()) {
            if (!continueRun()) {
                break;
            }
            currentRow++;
            int columsCount = fileInputDelimited.getColumnsCountOfCurrentRow();
            String[] rowValues = new String[columsCount];
            for (int i = 0; i < columsCount; i++) {
                rowValues[i] = fileInputDelimited.get(i);
            }
            returnCode.setOk(returnCode.isOk() && handleByARow(rowValues, currentRow, analysisElementList, columnElementList, indicToRowMap).isOk());
        }
        fileInputDelimited.close();
    } catch (IOException e) {
        log.error(e, e);
    }
    return returnCode;
}
Also used : ReturnCode(org.talend.utils.sugars.ReturnCode) FileInputDelimited(org.talend.fileprocess.FileInputDelimited) IOException(java.io.IOException)

Example 4 with FileInputDelimited

use of org.talend.fileprocess.FileInputDelimited in project tdq-studio-se by Talend.

the class ColumnSetIndicatorEvaluator method evaluateByDelimitedFile.

/**
 * orgnize EList 'objectLs' for DelimitedFile connection.
 *
 * @param sqlStatement
 * @param returnCode
 * @return
 */
private ReturnCode evaluateByDelimitedFile(String sqlStatement, ReturnCode returnCode) {
    DelimitedFileConnection fileConnection = (DelimitedFileConnection) analysis.getContext().getConnection();
    String path = JavaSqlFactory.getURL(fileConnection);
    String rowSeparator = JavaSqlFactory.getRowSeparatorValue(fileConnection);
    IPath iPath = new Path(path);
    File file = iPath.toFile();
    if (!file.exists()) {
        // $NON-NLS-1$
        returnCode.setReturnCode(Messages.getString("ColumnSetIndicatorEvaluator.FileNotFound", file.getName()), false);
        return returnCode;
    }
    CSVReader csvReader = null;
    try {
        List<ModelElement> analysisElementList = this.analysis.getContext().getAnalysedElements();
        EMap<Indicator, AnalyzedDataSet> indicToRowMap = analysis.getResults().getIndicToRowMap();
        indicToRowMap.clear();
        if (Escape.CSV.equals(fileConnection.getEscapeType())) {
            // use CsvReader to parse.
            csvReader = FileUtils.createCsvReader(file, fileConnection);
            this.useCsvReader(csvReader, file, fileConnection, analysisElementList);
        } else {
            // use TOSDelimitedReader in FileInputDelimited to parse.
            FileInputDelimited fileInputDelimited = AnalysisExecutorHelper.createFileInputDelimited(fileConnection);
            long currentRow = JavaSqlFactory.getHeadValue(fileConnection);
            int columsCount = 0;
            while (fileInputDelimited.nextRecord()) {
                if (!continueRun()) {
                    break;
                }
                currentRow++;
                if (columsCount == 0) {
                    columsCount = fileInputDelimited.getColumnsCountOfCurrentRow();
                }
                String[] rowValues = new String[columsCount];
                for (int i = 0; i < columsCount; i++) {
                    rowValues[i] = fileInputDelimited.get(i);
                }
                orgnizeObjectsToHandel(path, rowValues, currentRow, analysisElementList, rowSeparator);
            }
            // TDQ-5851~
            fileInputDelimited.close();
        }
    } catch (Exception e) {
        log.error(e, e);
        returnCode.setReturnCode(e.getMessage(), false);
    } finally {
        if (csvReader != null) {
            try {
                csvReader.close();
            } catch (IOException e) {
                log.error(e, e);
            }
        }
    }
    return returnCode;
}
Also used : IPath(org.eclipse.core.runtime.IPath) Path(org.eclipse.core.runtime.Path) IPath(org.eclipse.core.runtime.IPath) CSVReader(com.talend.csv.CSVReader) AnalyzedDataSet(org.talend.dataquality.analysis.AnalyzedDataSet) DelimitedFileConnection(org.talend.core.model.metadata.builder.connection.DelimitedFileConnection) IOException(java.io.IOException) UniqueCountIndicator(org.talend.dataquality.indicators.UniqueCountIndicator) Indicator(org.talend.dataquality.indicators.Indicator) DistinctCountIndicator(org.talend.dataquality.indicators.DistinctCountIndicator) AllMatchIndicator(org.talend.dataquality.indicators.columnset.AllMatchIndicator) SimpleStatIndicator(org.talend.dataquality.indicators.columnset.SimpleStatIndicator) DuplicateCountIndicator(org.talend.dataquality.indicators.DuplicateCountIndicator) RowCountIndicator(org.talend.dataquality.indicators.RowCountIndicator) SQLException(java.sql.SQLException) IOException(java.io.IOException) ModelElement(orgomg.cwm.objectmodel.core.ModelElement) FileInputDelimited(org.talend.fileprocess.FileInputDelimited) File(java.io.File)

Example 5 with FileInputDelimited

use of org.talend.fileprocess.FileInputDelimited in project tdq-studio-se by Talend.

the class DelimitedFileSQLExecutor method useFileInputDelimited.

/**
 * DOC yyin Comment method "useFileInputDelimited".
 *
 * @param analysedElements
 * @param delimitedFileconnection
 * @throws IOException
 * @throws Exception
 */
private void useFileInputDelimited(List<ModelElement> analysedElements, DelimitedFileConnection delimitedFileconnection) throws IOException, Exception {
    int[] analysedColumnIndex = getAnalysedColumnPositionInFileTable(analysedElements);
    FileInputDelimited fileInputDelimited = AnalysisExecutorHelper.createFileInputDelimited(delimitedFileconnection);
    int index = 0;
    while (fileInputDelimited.nextRecord()) {
        index++;
        int columsCount = analysedElements.size();
        String[] rowValues = new String[columsCount];
        for (int i = 0; i < columsCount; i++) {
            rowValues[i] = fileInputDelimited.get(analysedColumnIndex[i]);
        }
        handleRow(rowValues);
        if (getLimit() > 0 && index >= getLimit()) {
            break;
        }
    }
    fileInputDelimited.close();
}
Also used : FileInputDelimited(org.talend.fileprocess.FileInputDelimited)

Aggregations

FileInputDelimited (org.talend.fileprocess.FileInputDelimited)6 IOException (java.io.IOException)3 CSVReader (com.talend.csv.CSVReader)2 Gson (com.google.gson.Gson)1 File (java.io.File)1 InputStream (java.io.InputStream)1 SQLException (java.sql.SQLException)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 ZipEntry (java.util.zip.ZipEntry)1 ZipInputStream (java.util.zip.ZipInputStream)1 IPath (org.eclipse.core.runtime.IPath)1 Path (org.eclipse.core.runtime.Path)1 DelimitedFileConnection (org.talend.core.model.metadata.builder.connection.DelimitedFileConnection)1 AnalyzedDataSet (org.talend.dataquality.analysis.AnalyzedDataSet)1 DistinctCountIndicator (org.talend.dataquality.indicators.DistinctCountIndicator)1 DuplicateCountIndicator (org.talend.dataquality.indicators.DuplicateCountIndicator)1 Indicator (org.talend.dataquality.indicators.Indicator)1 RowCountIndicator (org.talend.dataquality.indicators.RowCountIndicator)1