use of org.talend.fileprocess.FileInputDelimited in project tdi-studio-se by Talend.
the class MultiSchemasManager method retrieveCsvArrayByDelimited.
/**
* cli Comment method "retrieveCsvArrayByDelimited".
*/
private CSVArrayAndSeparator retrieveCsvArrayByDelimited(final String filePath, final String encoding, final String fieldSeparator, final String rowSeparator, final boolean needSkpipEmptyRecord, final boolean splitRecord, int selectColumnIndex) {
CSVArrayAndSeparator csvArrayBean = new CSVArrayAndSeparator();
FileInputDelimited fileInputDelimited = null;
try {
fileInputDelimited = new FileInputDelimited(filePath, encoding, fieldSeparator, rowSeparator, needSkpipEmptyRecord, 0, 0, -1, -1, splitRecord);
final int maxColumnCount = FileInputDelimited.getMaxColumnCount(filePath, encoding, fieldSeparator, rowSeparator, needSkpipEmptyRecord, splitRecord, 0, Integer.MAX_VALUE);
if (maxColumnCount < 1) {
return null;
}
Set<String> uniqueKey = new HashSet<String>();
//$NON-NLS-1$
String currentRowKey = "";
while (fileInputDelimited.nextRecord()) {
final String first = fileInputDelimited.get(selectColumnIndex);
if ("".equals(first.trim())) {
// must be contain first //$NON-NLS-1$
continue;
}
currentRowKey = first;
if (uniqueKey.contains(currentRowKey)) {
// existed.
continue;
}
uniqueKey.add(currentRowKey);
handlerDelimitedArray(csvArrayBean, maxColumnCount, first, fileInputDelimited, fieldSeparator);
}
} catch (IOException e) {
ExceptionHandler.process(e);
return null;
} finally {
if (fileInputDelimited != null) {
fileInputDelimited.close();
}
}
return csvArrayBean;
}
use of org.talend.fileprocess.FileInputDelimited in project components by Talend.
the class FileInputDelimitedRuntime method previewData.
// Preview data and guess the columns
public String previewData(int maxRowsToPreview) throws IOException {
init();
Map<String, Object> result = new HashMap<String, Object>();
boolean retrieveHeader = false;
if (header > 0) {
header = header - 1;
retrieveHeader = true;
}
String[] rowData = null;
List<String[]> data = new ArrayList<>();
if (props.csvOptions.getValue()) {
if (limit < 1) {
limit = maxRowsToPreview;
}
CSVReader csvReader = getCsvReader();
if (retrieveHeader) {
lastLine = lastLine - 1;
}
try {
if (csvReader != null && csvReader.readNext()) {
rowData = csvReader.getValues();
if (retrieveHeader) {
result.put("columnNames", rowData);
columnNames = Arrays.asList(rowData);
LOG.debug("columnNames " + columnNames);
} else {
data.add(rowData);
updateColumnsLength(rowData);
}
while (csvReader.readNext()) {
rowData = csvReader.getValues();
if (props.removeEmptyRow.getValue() && (rowData.length == 1 && ("\015").equals(rowData[0]))) {
continue;
}
currentLine++;
if (lastLine > -1 && (currentLine > lastLine || currentLine > maxRowsToPreview)) {
break;
}
data.add(rowData);
updateColumnsLength(rowData);
LOG.debug("Preview row " + currentLine + " : " + Arrays.asList(rowData));
}
}
} finally {
if (csvReader != null) {
csvReader.close();
}
}
} else {
if (retrieveHeader) {
if (limit > 0) {
limit = limit + 1;
} else {
if (limit < 1) {
limit = maxRowsToPreview + 1;
}
}
}
FileInputDelimited fid = getFileDelimited();
try {
while (fid != null && fid.nextRecord()) {
int currentRowColsCount = fid.getColumnsCountOfCurrentRow();
rowData = new String[currentRowColsCount];
for (int i = 0; i < rowData.length; i++) {
rowData[i] = fid.get(i);
}
if (retrieveHeader) {
result.put("columnNames", rowData);
columnNames = Arrays.asList(rowData);
LOG.debug("columnNames " + columnNames);
retrieveHeader = false;
} else {
currentLine++;
data.add(rowData);
updateColumnsLength(rowData);
LOG.debug("Preview row " + currentLine + " : " + Arrays.asList(rowData));
}
}
} finally {
if (fid != null) {
fid.close();
}
}
}
result.put("data", data);
if (data.size() > 0) {
LOG.debug("Max columns count:" + columnsLength.size());
}
Gson gson = new Gson();
return gson.toJson(result);
}
use of org.talend.fileprocess.FileInputDelimited in project tdq-studio-se by Talend.
the class DelimitedFileIndicatorEvaluator method useDelimitedReader.
/**
* DOC talend Comment method "useDelimitedReader".
*
* @param file
* @param delimitedFileconnection2
* @param analysisElementList
* @param columnElementList
* @param indicToRowMap
* @return
*/
private ReturnCode useDelimitedReader(List<ModelElement> analysisElementList, List<MetadataColumn> columnElementList, EMap<Indicator, AnalyzedDataSet> indicToRowMap) {
// use TOSDelimitedReader in FileInputDelimited to parse.
ReturnCode returnCode = new ReturnCode(true);
try {
FileInputDelimited fileInputDelimited = createFileInputDelimited();
long currentRow = JavaSqlFactory.getHeadValue(delimitedFileconnection);
while (fileInputDelimited.nextRecord()) {
if (!continueRun()) {
break;
}
currentRow++;
int columsCount = fileInputDelimited.getColumnsCountOfCurrentRow();
String[] rowValues = new String[columsCount];
for (int i = 0; i < columsCount; i++) {
rowValues[i] = fileInputDelimited.get(i);
}
returnCode.setOk(returnCode.isOk() && handleByARow(rowValues, currentRow, analysisElementList, columnElementList, indicToRowMap).isOk());
}
fileInputDelimited.close();
} catch (IOException e) {
log.error(e, e);
}
return returnCode;
}
use of org.talend.fileprocess.FileInputDelimited in project tdq-studio-se by Talend.
the class ColumnSetIndicatorEvaluator method evaluateByDelimitedFile.
/**
* orgnize EList 'objectLs' for DelimitedFile connection.
*
* @param sqlStatement
* @param returnCode
* @return
*/
private ReturnCode evaluateByDelimitedFile(String sqlStatement, ReturnCode returnCode) {
DelimitedFileConnection fileConnection = (DelimitedFileConnection) analysis.getContext().getConnection();
String path = JavaSqlFactory.getURL(fileConnection);
String rowSeparator = JavaSqlFactory.getRowSeparatorValue(fileConnection);
IPath iPath = new Path(path);
File file = iPath.toFile();
if (!file.exists()) {
// $NON-NLS-1$
returnCode.setReturnCode(Messages.getString("ColumnSetIndicatorEvaluator.FileNotFound", file.getName()), false);
return returnCode;
}
CSVReader csvReader = null;
try {
List<ModelElement> analysisElementList = this.analysis.getContext().getAnalysedElements();
EMap<Indicator, AnalyzedDataSet> indicToRowMap = analysis.getResults().getIndicToRowMap();
indicToRowMap.clear();
if (Escape.CSV.equals(fileConnection.getEscapeType())) {
// use CsvReader to parse.
csvReader = FileUtils.createCsvReader(file, fileConnection);
this.useCsvReader(csvReader, file, fileConnection, analysisElementList);
} else {
// use TOSDelimitedReader in FileInputDelimited to parse.
FileInputDelimited fileInputDelimited = AnalysisExecutorHelper.createFileInputDelimited(fileConnection);
long currentRow = JavaSqlFactory.getHeadValue(fileConnection);
int columsCount = 0;
while (fileInputDelimited.nextRecord()) {
if (!continueRun()) {
break;
}
currentRow++;
if (columsCount == 0) {
columsCount = fileInputDelimited.getColumnsCountOfCurrentRow();
}
String[] rowValues = new String[columsCount];
for (int i = 0; i < columsCount; i++) {
rowValues[i] = fileInputDelimited.get(i);
}
orgnizeObjectsToHandel(path, rowValues, currentRow, analysisElementList, rowSeparator);
}
// TDQ-5851~
fileInputDelimited.close();
}
} catch (Exception e) {
log.error(e, e);
returnCode.setReturnCode(e.getMessage(), false);
} finally {
if (csvReader != null) {
try {
csvReader.close();
} catch (IOException e) {
log.error(e, e);
}
}
}
return returnCode;
}
use of org.talend.fileprocess.FileInputDelimited in project tdq-studio-se by Talend.
the class DelimitedFileSQLExecutor method useFileInputDelimited.
/**
* DOC yyin Comment method "useFileInputDelimited".
*
* @param analysedElements
* @param delimitedFileconnection
* @throws IOException
* @throws Exception
*/
private void useFileInputDelimited(List<ModelElement> analysedElements, DelimitedFileConnection delimitedFileconnection) throws IOException, Exception {
int[] analysedColumnIndex = getAnalysedColumnPositionInFileTable(analysedElements);
FileInputDelimited fileInputDelimited = AnalysisExecutorHelper.createFileInputDelimited(delimitedFileconnection);
int index = 0;
while (fileInputDelimited.nextRecord()) {
index++;
int columsCount = analysedElements.size();
String[] rowValues = new String[columsCount];
for (int i = 0; i < columsCount; i++) {
rowValues[i] = fileInputDelimited.get(analysedColumnIndex[i]);
}
handleRow(rowValues);
if (getLimit() > 0 && index >= getLimit()) {
break;
}
}
fileInputDelimited.close();
}
Aggregations