Search in sources :

Example 11 with CsvParserSettings

use of com.univocity.parsers.csv.CsvParserSettings in project carbondata by apache.

the class CSVInputFormat method extractCsvParserSettings.

public static CsvParserSettings extractCsvParserSettings(Configuration job) {
    CsvParserSettings parserSettings = new CsvParserSettings();
    parserSettings.getFormat().setDelimiter(job.get(DELIMITER, DELIMITER_DEFAULT).charAt(0));
    parserSettings.getFormat().setComment(job.get(COMMENT, COMMENT_DEFAULT).charAt(0));
    parserSettings.setLineSeparatorDetectionEnabled(true);
    parserSettings.setNullValue("");
    parserSettings.setEmptyValue("");
    parserSettings.setIgnoreLeadingWhitespaces(false);
    parserSettings.setIgnoreTrailingWhitespaces(false);
    parserSettings.setSkipEmptyLines(Boolean.valueOf(job.get(SKIP_EMPTY_LINE, CarbonCommonConstants.CARBON_SKIP_EMPTY_LINE_DEFAULT)));
    parserSettings.setMaxCharsPerColumn(CarbonCommonConstants.MAX_CHARS_PER_COLUMN_DEFAULT);
    String maxColumns = job.get(MAX_COLUMNS, "" + DEFAULT_MAX_NUMBER_OF_COLUMNS_FOR_PARSING);
    parserSettings.setMaxColumns(Integer.parseInt(maxColumns));
    parserSettings.getFormat().setQuote(job.get(QUOTE, QUOTE_DEFAULT).charAt(0));
    parserSettings.getFormat().setQuoteEscape(job.get(ESCAPE, ESCAPE_DEFAULT).charAt(0));
    return parserSettings;
}
Also used : CsvParserSettings(com.univocity.parsers.csv.CsvParserSettings)

Example 12 with CsvParserSettings

use of com.univocity.parsers.csv.CsvParserSettings in project mapping-benchmark by arnaudroger.

the class UnivocityConcurrentCsvParserBenchmark method parseCsv.

@Benchmark
public void parseCsv(Blackhole blackhole) throws IOException {
    CsvParserSettings settings = new CsvParserSettings();
    // turning off features enabled by default
    settings.setIgnoreLeadingWhitespaces(false);
    settings.setIgnoreTrailingWhitespaces(false);
    settings.setSkipEmptyLines(false);
    settings.setColumnReorderingEnabled(false);
    settings.setReadInputOnSeparateThread(true);
    settings.setRowProcessor(new AbstractRowProcessor() {

        @Override
        public void rowProcessed(String[] row, ParsingContext context) {
            blackhole.consume(row);
        }
    });
    com.univocity.parsers.csv.CsvParser parser = new com.univocity.parsers.csv.CsvParser(settings);
    try (Reader reader = CsvParam.getSingleThreadedReader(quotes, nbRows)) {
        parser.parse(reader);
    }
}
Also used : ParsingContext(com.univocity.parsers.common.ParsingContext) AbstractRowProcessor(com.univocity.parsers.common.processor.AbstractRowProcessor) Reader(java.io.Reader) CsvParserSettings(com.univocity.parsers.csv.CsvParserSettings) Benchmark(org.openjdk.jmh.annotations.Benchmark)

Example 13 with CsvParserSettings

use of com.univocity.parsers.csv.CsvParserSettings in project mapping-benchmark by arnaudroger.

the class UnivocityConcurrentCsvParserBenchmark method mapCsv.

@Benchmark
public void mapCsv(Blackhole blackhole) throws IOException {
    CsvParserSettings settings = new CsvParserSettings();
    // turning off features enabled by default
    settings.setIgnoreLeadingWhitespaces(false);
    settings.setIgnoreTrailingWhitespaces(false);
    settings.setSkipEmptyLines(false);
    settings.setColumnReorderingEnabled(false);
    settings.setRowProcessor(new BeanProcessor<City>(City.class) {

        @Override
        public void beanProcessed(City bean, ParsingContext context) {
            blackhole.consume(bean);
        }
    });
    com.univocity.parsers.csv.CsvParser parser = new com.univocity.parsers.csv.CsvParser(settings);
    try (Reader reader = CsvParam.getSingleThreadedReader(quotes, nbRows)) {
        parser.parse(reader);
    }
}
Also used : CsvParserSettings(com.univocity.parsers.csv.CsvParserSettings) ParsingContext(com.univocity.parsers.common.ParsingContext) Reader(java.io.Reader) Benchmark(org.openjdk.jmh.annotations.Benchmark)

Example 14 with CsvParserSettings

use of com.univocity.parsers.csv.CsvParserSettings in project mapping-benchmark by arnaudroger.

the class UnivocityCsvParserBenchmark method main.

public static void main(String[] args) throws IOException {
    CsvParam csvParam = new CsvParam();
    csvParam.setUp();
    CsvParserSettings settings = new CsvParserSettings();
    // turning off features enabled by default
    settings.setIgnoreLeadingWhitespaces(false);
    settings.setIgnoreTrailingWhitespaces(false);
    settings.setSkipEmptyLines(false);
    settings.setColumnReorderingEnabled(false);
    settings.setReadInputOnSeparateThread(false);
    settings.setProcessor(new BeanProcessor<City>(City.class) {

        @Override
        public void beanProcessed(City bean, ParsingContext context) {
            System.out.println(bean);
        }
    });
    com.univocity.parsers.csv.CsvParser parser = new com.univocity.parsers.csv.CsvParser(settings);
    try (Reader reader = csvParam.getReader()) {
        parser.parse(reader);
    }
}
Also used : ParsingContext(com.univocity.parsers.common.ParsingContext) Reader(java.io.Reader) CsvParam(org.simpleflatmapper.param.CsvParam) CsvParserSettings(com.univocity.parsers.csv.CsvParserSettings)

Example 15 with CsvParserSettings

use of com.univocity.parsers.csv.CsvParserSettings in project Mycat_plus by coderczp.

the class ServerLoadDataInfileHandler method end.

@Override
public void end(byte packID) {
    isStartLoadData = false;
    this.packID = packID;
    // load in data空包 结束
    saveByteOrToFile(null, true);
    List<SQLExpr> columns = statement.getColumns();
    String tableName = statement.getTableName().getSimpleName();
    if (isHasStoreToFile) {
        parseFileByLine(tempFile, loadData.getCharset(), loadData.getLineTerminatedBy());
    } else {
        String content = new String(tempByteBuffer.toByteArray(), Charset.forName(loadData.getCharset()));
        // List<String> lines = Splitter.on(loadData.getLineTerminatedBy()).omitEmptyStrings().splitToList(content);
        CsvParserSettings settings = new CsvParserSettings();
        settings.setMaxColumns(65535);
        settings.setMaxCharsPerColumn(65535);
        settings.getFormat().setLineSeparator(loadData.getLineTerminatedBy());
        settings.getFormat().setDelimiter(loadData.getFieldTerminatedBy().charAt(0));
        if (loadData.getEnclose() != null) {
            settings.getFormat().setQuote(loadData.getEnclose().charAt(0));
        }
        if (loadData.getEscape() != null) {
            settings.getFormat().setQuoteEscape(loadData.getEscape().charAt(0));
        }
        settings.getFormat().setNormalizedNewline(loadData.getLineTerminatedBy().charAt(0));
        /*
             *  fix bug #1074 : LOAD DATA local INFILE导入的所有Boolean类型全部变成了false
             *  不可见字符将在CsvParser被当成whitespace过滤掉, 使用settings.trimValues(false)来避免被过滤掉
             *  TODO : 设置trimValues(false)之后, 会引起字段值前后的空白字符无法被过滤!
             */
        settings.trimValues(false);
        CsvParser parser = new CsvParser(settings);
        try {
            parser.beginParsing(new StringReader(content));
            String[] row = null;
            while ((row = parser.parseNext()) != null) {
                parseOneLine(columns, tableName, row, false, null);
            }
        } finally {
            parser.stopParsing();
        }
    }
    RouteResultset rrs = buildResultSet(routeResultMap);
    if (rrs != null) {
        flushDataToFile();
        serverConnection.getSession2().execute(rrs, ServerParse.LOAD_DATA_INFILE_SQL);
    }
// sendOk(++packID);
}
Also used : CsvParserSettings(com.univocity.parsers.csv.CsvParserSettings) CsvParser(com.univocity.parsers.csv.CsvParser) SQLExpr(com.alibaba.druid.sql.ast.SQLExpr) RouteResultset(io.mycat.route.RouteResultset)

Aggregations

CsvParserSettings (com.univocity.parsers.csv.CsvParserSettings)22 CsvParser (com.univocity.parsers.csv.CsvParser)15 SQLExpr (com.alibaba.druid.sql.ast.SQLExpr)6 ParsingContext (com.univocity.parsers.common.ParsingContext)5 Reader (java.io.Reader)5 Benchmark (org.openjdk.jmh.annotations.Benchmark)4 AbstractRowProcessor (com.univocity.parsers.common.processor.AbstractRowProcessor)2 ConcurrentRowProcessor (com.univocity.parsers.common.processor.ConcurrentRowProcessor)2 RowListProcessor (com.univocity.parsers.common.processor.RowListProcessor)2 RouteResultset (io.mycat.route.RouteResultset)2 ArrayList (java.util.ArrayList)2 RouteResultset (com.actiontech.dble.route.RouteResultset)1 JSONElement (com.eden.common.json.JSONElement)1 TsvParser (com.univocity.parsers.tsv.TsvParser)1 TsvParserSettings (com.univocity.parsers.tsv.TsvParserSettings)1 JsonDocument (io.lumeer.api.dto.JsonDocument)1 Document (io.lumeer.api.model.Document)1 DataDocument (io.lumeer.engine.api.data.DataDocument)1 InputStreamReader (java.io.InputStreamReader)1 StringReader (java.io.StringReader)1