use of com.univocity.parsers.csv.CsvParserSettings in project carbondata by apache.
the class CSVInputFormat method extractCsvParserSettings.
public static CsvParserSettings extractCsvParserSettings(Configuration job) {
CsvParserSettings parserSettings = new CsvParserSettings();
parserSettings.getFormat().setDelimiter(job.get(DELIMITER, DELIMITER_DEFAULT).charAt(0));
parserSettings.getFormat().setComment(job.get(COMMENT, COMMENT_DEFAULT).charAt(0));
parserSettings.setLineSeparatorDetectionEnabled(true);
parserSettings.setNullValue("");
parserSettings.setEmptyValue("");
parserSettings.setIgnoreLeadingWhitespaces(false);
parserSettings.setIgnoreTrailingWhitespaces(false);
parserSettings.setSkipEmptyLines(Boolean.valueOf(job.get(SKIP_EMPTY_LINE, CarbonCommonConstants.CARBON_SKIP_EMPTY_LINE_DEFAULT)));
parserSettings.setMaxCharsPerColumn(CarbonCommonConstants.MAX_CHARS_PER_COLUMN_DEFAULT);
String maxColumns = job.get(MAX_COLUMNS, "" + DEFAULT_MAX_NUMBER_OF_COLUMNS_FOR_PARSING);
parserSettings.setMaxColumns(Integer.parseInt(maxColumns));
parserSettings.getFormat().setQuote(job.get(QUOTE, QUOTE_DEFAULT).charAt(0));
parserSettings.getFormat().setQuoteEscape(job.get(ESCAPE, ESCAPE_DEFAULT).charAt(0));
return parserSettings;
}
use of com.univocity.parsers.csv.CsvParserSettings in project mapping-benchmark by arnaudroger.
the class UnivocityConcurrentCsvParserBenchmark method parseCsv.
@Benchmark
public void parseCsv(Blackhole blackhole) throws IOException {
CsvParserSettings settings = new CsvParserSettings();
// turning off features enabled by default
settings.setIgnoreLeadingWhitespaces(false);
settings.setIgnoreTrailingWhitespaces(false);
settings.setSkipEmptyLines(false);
settings.setColumnReorderingEnabled(false);
settings.setReadInputOnSeparateThread(true);
settings.setRowProcessor(new AbstractRowProcessor() {
@Override
public void rowProcessed(String[] row, ParsingContext context) {
blackhole.consume(row);
}
});
com.univocity.parsers.csv.CsvParser parser = new com.univocity.parsers.csv.CsvParser(settings);
try (Reader reader = CsvParam.getSingleThreadedReader(quotes, nbRows)) {
parser.parse(reader);
}
}
use of com.univocity.parsers.csv.CsvParserSettings in project mapping-benchmark by arnaudroger.
the class UnivocityConcurrentCsvParserBenchmark method mapCsv.
@Benchmark
public void mapCsv(Blackhole blackhole) throws IOException {
CsvParserSettings settings = new CsvParserSettings();
// turning off features enabled by default
settings.setIgnoreLeadingWhitespaces(false);
settings.setIgnoreTrailingWhitespaces(false);
settings.setSkipEmptyLines(false);
settings.setColumnReorderingEnabled(false);
settings.setRowProcessor(new BeanProcessor<City>(City.class) {
@Override
public void beanProcessed(City bean, ParsingContext context) {
blackhole.consume(bean);
}
});
com.univocity.parsers.csv.CsvParser parser = new com.univocity.parsers.csv.CsvParser(settings);
try (Reader reader = CsvParam.getSingleThreadedReader(quotes, nbRows)) {
parser.parse(reader);
}
}
use of com.univocity.parsers.csv.CsvParserSettings in project mapping-benchmark by arnaudroger.
the class UnivocityCsvParserBenchmark method main.
public static void main(String[] args) throws IOException {
CsvParam csvParam = new CsvParam();
csvParam.setUp();
CsvParserSettings settings = new CsvParserSettings();
// turning off features enabled by default
settings.setIgnoreLeadingWhitespaces(false);
settings.setIgnoreTrailingWhitespaces(false);
settings.setSkipEmptyLines(false);
settings.setColumnReorderingEnabled(false);
settings.setReadInputOnSeparateThread(false);
settings.setProcessor(new BeanProcessor<City>(City.class) {
@Override
public void beanProcessed(City bean, ParsingContext context) {
System.out.println(bean);
}
});
com.univocity.parsers.csv.CsvParser parser = new com.univocity.parsers.csv.CsvParser(settings);
try (Reader reader = csvParam.getReader()) {
parser.parse(reader);
}
}
use of com.univocity.parsers.csv.CsvParserSettings in project Mycat_plus by coderczp.
the class ServerLoadDataInfileHandler method end.
@Override
public void end(byte packID) {
isStartLoadData = false;
this.packID = packID;
// load in data空包 结束
saveByteOrToFile(null, true);
List<SQLExpr> columns = statement.getColumns();
String tableName = statement.getTableName().getSimpleName();
if (isHasStoreToFile) {
parseFileByLine(tempFile, loadData.getCharset(), loadData.getLineTerminatedBy());
} else {
String content = new String(tempByteBuffer.toByteArray(), Charset.forName(loadData.getCharset()));
// List<String> lines = Splitter.on(loadData.getLineTerminatedBy()).omitEmptyStrings().splitToList(content);
CsvParserSettings settings = new CsvParserSettings();
settings.setMaxColumns(65535);
settings.setMaxCharsPerColumn(65535);
settings.getFormat().setLineSeparator(loadData.getLineTerminatedBy());
settings.getFormat().setDelimiter(loadData.getFieldTerminatedBy().charAt(0));
if (loadData.getEnclose() != null) {
settings.getFormat().setQuote(loadData.getEnclose().charAt(0));
}
if (loadData.getEscape() != null) {
settings.getFormat().setQuoteEscape(loadData.getEscape().charAt(0));
}
settings.getFormat().setNormalizedNewline(loadData.getLineTerminatedBy().charAt(0));
/*
* fix bug #1074 : LOAD DATA local INFILE导入的所有Boolean类型全部变成了false
* 不可见字符将在CsvParser被当成whitespace过滤掉, 使用settings.trimValues(false)来避免被过滤掉
* TODO : 设置trimValues(false)之后, 会引起字段值前后的空白字符无法被过滤!
*/
settings.trimValues(false);
CsvParser parser = new CsvParser(settings);
try {
parser.beginParsing(new StringReader(content));
String[] row = null;
while ((row = parser.parseNext()) != null) {
parseOneLine(columns, tableName, row, false, null);
}
} finally {
parser.stopParsing();
}
}
RouteResultset rrs = buildResultSet(routeResultMap);
if (rrs != null) {
flushDataToFile();
serverConnection.getSession2().execute(rrs, ServerParse.LOAD_DATA_INFILE_SQL);
}
// sendOk(++packID);
}
Aggregations