Search in sources :

Example 6 with CSVRecord

use of org.apache.commons.csv.CSVRecord in project phoenix by apache.

the class CSVCommonsLoaderIT method testCSVCommonsUpsert.

@Test
public void testCSVCommonsUpsert() throws Exception {
    CSVParser parser = null;
    PhoenixConnection conn = null;
    try {
        String stockTableName = generateUniqueName();
        // Create table
        String statements = "CREATE TABLE IF NOT EXISTS " + stockTableName + "(SYMBOL VARCHAR NOT NULL PRIMARY KEY, COMPANY VARCHAR);";
        conn = DriverManager.getConnection(getUrl()).unwrap(PhoenixConnection.class);
        PhoenixRuntime.executeStatements(conn, new StringReader(statements), null);
        // Upsert CSV file
        CSVCommonsLoader csvUtil = new CSVCommonsLoader(conn, stockTableName, Collections.<String>emptyList(), true);
        csvUtil.upsert(new StringReader(STOCK_CSV_VALUES_WITH_HEADER));
        // Compare Phoenix ResultSet with CSV file content
        PreparedStatement statement = conn.prepareStatement("SELECT SYMBOL, COMPANY FROM " + stockTableName);
        ResultSet phoenixResultSet = statement.executeQuery();
        parser = new CSVParser(new StringReader(STOCK_CSV_VALUES_WITH_HEADER), csvUtil.getFormat());
        for (CSVRecord record : parser) {
            assertTrue(phoenixResultSet.next());
            int i = 0;
            for (String value : record) {
                assertEquals(value, phoenixResultSet.getString(i + 1));
                i++;
            }
        }
        assertFalse(phoenixResultSet.next());
    } finally {
        if (parser != null)
            parser.close();
        if (conn != null)
            conn.close();
    }
}
Also used : PhoenixConnection(org.apache.phoenix.jdbc.PhoenixConnection) CSVParser(org.apache.commons.csv.CSVParser) StringReader(java.io.StringReader) CSVCommonsLoader(org.apache.phoenix.util.CSVCommonsLoader) ResultSet(java.sql.ResultSet) PreparedStatement(java.sql.PreparedStatement) CSVRecord(org.apache.commons.csv.CSVRecord) Test(org.junit.Test)

Example 7 with CSVRecord

use of org.apache.commons.csv.CSVRecord in project phoenix by apache.

the class CSVCommonsLoaderIT method testCSVUpsertWithColumns.

@Test
public void testCSVUpsertWithColumns() throws Exception {
    CSVParser parser = null;
    PhoenixConnection conn = null;
    try {
        String stockTableName = generateUniqueName();
        // Create table
        String statements = "CREATE TABLE IF NOT EXISTS " + stockTableName + "(SYMBOL VARCHAR NOT NULL PRIMARY KEY, COMPANY VARCHAR);";
        conn = DriverManager.getConnection(getUrl()).unwrap(PhoenixConnection.class);
        PhoenixRuntime.executeStatements(conn, new StringReader(statements), null);
        // Upsert CSV file
        CSVCommonsLoader csvUtil = new CSVCommonsLoader(conn, stockTableName, Arrays.<String>asList(STOCK_COLUMNS), true);
        // no header
        csvUtil.upsert(new StringReader(STOCK_CSV_VALUES));
        // Compare Phoenix ResultSet with CSV file content
        PreparedStatement statement = conn.prepareStatement("SELECT SYMBOL, COMPANY FROM " + stockTableName);
        ResultSet phoenixResultSet = statement.executeQuery();
        parser = new CSVParser(new StringReader(STOCK_CSV_VALUES), csvUtil.getFormat());
        for (CSVRecord record : parser) {
            assertTrue(phoenixResultSet.next());
            int i = 0;
            for (String value : record) {
                assertEquals(value, phoenixResultSet.getString(i + 1));
                i++;
            }
        }
        assertFalse(phoenixResultSet.next());
    } finally {
        if (parser != null)
            parser.close();
        if (conn != null)
            conn.close();
    }
}
Also used : PhoenixConnection(org.apache.phoenix.jdbc.PhoenixConnection) CSVParser(org.apache.commons.csv.CSVParser) StringReader(java.io.StringReader) CSVCommonsLoader(org.apache.phoenix.util.CSVCommonsLoader) ResultSet(java.sql.ResultSet) PreparedStatement(java.sql.PreparedStatement) CSVRecord(org.apache.commons.csv.CSVRecord) Test(org.junit.Test)

Example 8 with CSVRecord

use of org.apache.commons.csv.CSVRecord in project ranger by apache.

the class FileSourceUserGroupBuilder method readTextFile.

public Map<String, List<String>> readTextFile(File textFile) throws Exception {
    Map<String, List<String>> ret = new HashMap<String, List<String>>();
    String delimiter = config.getUserSyncFileSourceDelimiter();
    CSVFormat csvFormat = CSVFormat.newFormat(delimiter.charAt(0));
    CSVParser csvParser = new CSVParser(new BufferedReader(new FileReader(textFile)), csvFormat);
    List<CSVRecord> csvRecordList = csvParser.getRecords();
    if (csvRecordList != null) {
        for (CSVRecord csvRecord : csvRecordList) {
            List<String> groups = new ArrayList<String>();
            String user = csvRecord.get(0);
            user = user.replaceAll("^\"|\"$", "");
            int i = csvRecord.size();
            for (int j = 1; j < i; j++) {
                String group = csvRecord.get(j);
                if (group != null && !group.isEmpty()) {
                    group = group.replaceAll("^\"|\"$", "");
                    groups.add(group);
                }
            }
            ret.put(user, groups);
        }
    }
    csvParser.close();
    return ret;
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) CSVParser(org.apache.commons.csv.CSVParser) BufferedReader(java.io.BufferedReader) ArrayList(java.util.ArrayList) List(java.util.List) CSVFormat(org.apache.commons.csv.CSVFormat) FileReader(java.io.FileReader) CSVRecord(org.apache.commons.csv.CSVRecord)

Example 9 with CSVRecord

use of org.apache.commons.csv.CSVRecord in project nifi by apache.

the class CSVRecordLookupService method loadCache.

private void loadCache() throws IllegalStateException, IOException {
    if (lock.tryLock()) {
        try {
            final ComponentLog logger = getLogger();
            if (logger.isDebugEnabled()) {
                logger.debug("Loading lookup table from file: " + csvFile);
            }
            final FileReader reader = new FileReader(csvFile);
            final CSVParser records = csvFormat.withFirstRecordAsHeader().parse(reader);
            ConcurrentHashMap<String, Record> cache = new ConcurrentHashMap<>();
            RecordSchema lookupRecordSchema = null;
            for (final CSVRecord record : records) {
                final String key = record.get(lookupKeyColumn);
                if (StringUtils.isBlank(key)) {
                    throw new IllegalStateException("Empty lookup key encountered in: " + csvFile);
                } else if (!ignoreDuplicates && cache.containsKey(key)) {
                    throw new IllegalStateException("Duplicate lookup key encountered: " + key + " in " + csvFile);
                } else if (ignoreDuplicates && cache.containsKey(key)) {
                    logger.warn("Duplicate lookup key encountered: {} in {}", new Object[] { key, csvFile });
                }
                // Put each key/value pair (except the lookup) into the properties
                final Map<String, Object> properties = new HashMap<>();
                record.toMap().forEach((k, v) -> {
                    if (!lookupKeyColumn.equals(k)) {
                        properties.put(k, v);
                    }
                });
                if (lookupRecordSchema == null) {
                    List<RecordField> recordFields = new ArrayList<>(properties.size());
                    properties.forEach((k, v) -> recordFields.add(new RecordField(k, RecordFieldType.STRING.getDataType())));
                    lookupRecordSchema = new SimpleRecordSchema(recordFields);
                }
                cache.put(key, new MapRecord(lookupRecordSchema, properties));
            }
            this.cache = cache;
            if (cache.isEmpty()) {
                logger.warn("Lookup table is empty after reading file: " + csvFile);
            }
        } finally {
            lock.unlock();
        }
    }
}
Also used : SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) MapRecord(org.apache.nifi.serialization.record.MapRecord) RecordField(org.apache.nifi.serialization.record.RecordField) HashMap(java.util.HashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) ArrayList(java.util.ArrayList) ComponentLog(org.apache.nifi.logging.ComponentLog) CSVParser(org.apache.commons.csv.CSVParser) FileReader(java.io.FileReader) CSVRecord(org.apache.commons.csv.CSVRecord) Record(org.apache.nifi.serialization.record.Record) MapRecord(org.apache.nifi.serialization.record.MapRecord) CSVRecord(org.apache.commons.csv.CSVRecord) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema)

Example 10 with CSVRecord

use of org.apache.commons.csv.CSVRecord in project nifi by apache.

the class CSVRecordReader method nextRecord.

@Override
public Record nextRecord(final boolean coerceTypes, final boolean dropUnknownFields) throws IOException, MalformedRecordException {
    final RecordSchema schema = getSchema();
    final List<RecordField> recordFields = getRecordFields();
    final int numFieldNames = recordFields.size();
    for (final CSVRecord csvRecord : csvParser) {
        final Map<String, Object> values = new LinkedHashMap<>(recordFields.size() * 2);
        for (int i = 0; i < csvRecord.size(); i++) {
            final String rawValue = csvRecord.get(i);
            final String rawFieldName;
            final DataType dataType;
            if (i >= numFieldNames) {
                if (!dropUnknownFields) {
                    values.put("unknown_field_index_" + i, rawValue);
                }
                continue;
            } else {
                final RecordField recordField = recordFields.get(i);
                rawFieldName = recordField.getFieldName();
                dataType = recordField.getDataType();
            }
            final Object value;
            if (coerceTypes) {
                value = convert(rawValue, dataType, rawFieldName);
            } else {
                // The CSV Reader is going to return all fields as Strings, because CSV doesn't have any way to
                // dictate a field type. As a result, we will use the schema that we have to attempt to convert
                // the value into the desired type if it's a simple type.
                value = convertSimpleIfPossible(rawValue, dataType, rawFieldName);
            }
            values.put(rawFieldName, value);
        }
        return new MapRecord(schema, values, coerceTypes, dropUnknownFields);
    }
    return null;
}
Also used : MapRecord(org.apache.nifi.serialization.record.MapRecord) RecordField(org.apache.nifi.serialization.record.RecordField) DataType(org.apache.nifi.serialization.record.DataType) CSVRecord(org.apache.commons.csv.CSVRecord) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) LinkedHashMap(java.util.LinkedHashMap)

Aggregations

CSVRecord (org.apache.commons.csv.CSVRecord)96 CSVParser (org.apache.commons.csv.CSVParser)47 IOException (java.io.IOException)26 ArrayList (java.util.ArrayList)23 CSVFormat (org.apache.commons.csv.CSVFormat)23 StringReader (java.io.StringReader)19 FileReader (java.io.FileReader)15 Test (org.junit.Test)13 InputStreamReader (java.io.InputStreamReader)12 PreparedStatement (java.sql.PreparedStatement)10 InputStream (java.io.InputStream)9 Reader (java.io.Reader)9 ResultSet (java.sql.ResultSet)9 HashMap (java.util.HashMap)9 PhoenixConnection (org.apache.phoenix.jdbc.PhoenixConnection)9 CSVCommonsLoader (org.apache.phoenix.util.CSVCommonsLoader)9 File (java.io.File)6 Map (java.util.Map)6 User (org.eclipse.sw360.datahandler.thrift.users.User)6 FileNotFoundException (java.io.FileNotFoundException)5