Search in sources :

Example 1 with CSVFormat

use of org.apache.commons.csv.CSVFormat in project hadoop by apache.

the class TestFileSystemTimelineReaderImpl method initializeDataDirectory.

public static void initializeDataDirectory(String rootDir) throws Exception {
    loadEntityData(rootDir);
    // Create app flow mapping file.
    CSVFormat format = CSVFormat.DEFAULT.withHeader("APP", "USER", "FLOW", "FLOWRUN");
    String appFlowMappingFile = rootDir + File.separator + "entities" + File.separator + "cluster1" + File.separator + FileSystemTimelineReaderImpl.APP_FLOW_MAPPING_FILE;
    try (PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter(appFlowMappingFile, true)));
        CSVPrinter printer = new CSVPrinter(out, format)) {
        printer.printRecord("app1", "user1", "flow1", 1);
        printer.printRecord("app2", "user1", "flow1,flow", 1);
        printer.close();
    }
    (new File(rootDir)).deleteOnExit();
}
Also used : CSVPrinter(org.apache.commons.csv.CSVPrinter) FileWriter(java.io.FileWriter) CSVFormat(org.apache.commons.csv.CSVFormat) File(java.io.File) PrintWriter(java.io.PrintWriter) BufferedWriter(java.io.BufferedWriter)

Example 2 with CSVFormat

use of org.apache.commons.csv.CSVFormat in project logging-log4j2 by apache.

the class CsvLogEventLayout method toSerializable.

@Override
public String toSerializable(final LogEvent event) {
    final StringBuilder buffer = getStringBuilder();
    final CSVFormat format = getFormat();
    try {
        format.print(event.getNanoTime(), buffer, true);
        format.print(event.getTimeMillis(), buffer, false);
        format.print(event.getLevel(), buffer, false);
        format.print(event.getThreadId(), buffer, false);
        format.print(event.getThreadName(), buffer, false);
        format.print(event.getThreadPriority(), buffer, false);
        format.print(event.getMessage().getFormattedMessage(), buffer, false);
        format.print(event.getLoggerFqcn(), buffer, false);
        format.print(event.getLoggerName(), buffer, false);
        format.print(event.getMarker(), buffer, false);
        format.print(event.getThrownProxy(), buffer, false);
        format.print(event.getSource(), buffer, false);
        format.print(event.getContextData(), buffer, false);
        format.print(event.getContextStack(), buffer, false);
        format.println(buffer);
        return buffer.toString();
    } catch (final IOException e) {
        StatusLogger.getLogger().error(event.toString(), e);
        return format.getCommentMarker() + " " + e;
    }
}
Also used : CSVFormat(org.apache.commons.csv.CSVFormat) IOException(java.io.IOException)

Example 3 with CSVFormat

use of org.apache.commons.csv.CSVFormat in project ranger by apache.

the class FileSourceUserGroupBuilder method readTextFile.

public Map<String, List<String>> readTextFile(File textFile) throws Exception {
    Map<String, List<String>> ret = new HashMap<String, List<String>>();
    String delimiter = config.getUserSyncFileSourceDelimiter();
    CSVFormat csvFormat = CSVFormat.newFormat(delimiter.charAt(0));
    CSVParser csvParser = new CSVParser(new BufferedReader(new FileReader(textFile)), csvFormat);
    List<CSVRecord> csvRecordList = csvParser.getRecords();
    if (csvRecordList != null) {
        for (CSVRecord csvRecord : csvRecordList) {
            List<String> groups = new ArrayList<String>();
            String user = csvRecord.get(0);
            user = user.replaceAll("^\"|\"$", "");
            int i = csvRecord.size();
            for (int j = 1; j < i; j++) {
                String group = csvRecord.get(j);
                if (group != null && !group.isEmpty()) {
                    group = group.replaceAll("^\"|\"$", "");
                    groups.add(group);
                }
            }
            ret.put(user, groups);
        }
    }
    csvParser.close();
    return ret;
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) CSVParser(org.apache.commons.csv.CSVParser) BufferedReader(java.io.BufferedReader) ArrayList(java.util.ArrayList) List(java.util.List) CSVFormat(org.apache.commons.csv.CSVFormat) FileReader(java.io.FileReader) CSVRecord(org.apache.commons.csv.CSVRecord)

Example 4 with CSVFormat

use of org.apache.commons.csv.CSVFormat in project nifi by apache.

the class ConvertExcelToCSVProcessor method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    final FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    final String desiredSheetsDelimited = context.getProperty(DESIRED_SHEETS).evaluateAttributeExpressions().getValue();
    final boolean formatValues = context.getProperty(FORMAT_VALUES).asBoolean();
    final CSVFormat csvFormat = CSVUtils.createCSVFormat(context);
    // Switch to 0 based index
    final int firstRow = context.getProperty(ROWS_TO_SKIP).asInteger() - 1;
    final String[] sColumnsToSkip = StringUtils.split(context.getProperty(COLUMNS_TO_SKIP).getValue(), ",");
    final List<Integer> columnsToSkip = new ArrayList<>();
    if (sColumnsToSkip != null && sColumnsToSkip.length > 0) {
        for (String c : sColumnsToSkip) {
            try {
                // Switch to 0 based index
                columnsToSkip.add(Integer.parseInt(c) - 1);
            } catch (NumberFormatException e) {
                throw new ProcessException("Invalid column in Columns to Skip list.", e);
            }
        }
    }
    try {
        session.read(flowFile, new InputStreamCallback() {

            @Override
            public void process(InputStream inputStream) throws IOException {
                try {
                    OPCPackage pkg = OPCPackage.open(inputStream);
                    XSSFReader r = new XSSFReader(pkg);
                    ReadOnlySharedStringsTable sst = new ReadOnlySharedStringsTable(pkg);
                    StylesTable styles = r.getStylesTable();
                    XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) r.getSheetsData();
                    if (desiredSheetsDelimited != null) {
                        String[] desiredSheets = StringUtils.split(desiredSheetsDelimited, DESIRED_SHEETS_DELIMITER);
                        if (desiredSheets != null) {
                            while (iter.hasNext()) {
                                InputStream sheet = iter.next();
                                String sheetName = iter.getSheetName();
                                for (int i = 0; i < desiredSheets.length; i++) {
                                    // If the sheetName is a desired one parse it
                                    if (sheetName.equalsIgnoreCase(desiredSheets[i])) {
                                        ExcelSheetReadConfig readConfig = new ExcelSheetReadConfig(columnsToSkip, firstRow, sheetName, formatValues, sst, styles);
                                        handleExcelSheet(session, flowFile, sheet, readConfig, csvFormat);
                                        break;
                                    }
                                }
                            }
                        } else {
                            getLogger().debug("Excel document was parsed but no sheets with the specified desired names were found.");
                        }
                    } else {
                        // Get all of the sheets in the document.
                        while (iter.hasNext()) {
                            InputStream sheet = iter.next();
                            String sheetName = iter.getSheetName();
                            ExcelSheetReadConfig readConfig = new ExcelSheetReadConfig(columnsToSkip, firstRow, sheetName, formatValues, sst, styles);
                            handleExcelSheet(session, flowFile, sheet, readConfig, csvFormat);
                        }
                    }
                } catch (InvalidFormatException ife) {
                    getLogger().error("Only .xlsx Excel 2007 OOXML files are supported", ife);
                    throw new UnsupportedOperationException("Only .xlsx Excel 2007 OOXML files are supported", ife);
                } catch (OpenXML4JException | SAXException e) {
                    getLogger().error("Error occurred while processing Excel document metadata", e);
                }
            }
        });
        session.transfer(flowFile, ORIGINAL);
    } catch (RuntimeException ex) {
        getLogger().error("Failed to process incoming Excel document. " + ex.getMessage(), ex);
        FlowFile failedFlowFile = session.putAttribute(flowFile, ConvertExcelToCSVProcessor.class.getName() + ".error", ex.getMessage());
        session.transfer(failedFlowFile, FAILURE);
    }
}
Also used : ReadOnlySharedStringsTable(org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable) ArrayList(java.util.ArrayList) StylesTable(org.apache.poi.xssf.model.StylesTable) InvalidFormatException(org.apache.poi.openxml4j.exceptions.InvalidFormatException) FlowFile(org.apache.nifi.flowfile.FlowFile) InputStream(java.io.InputStream) IOException(java.io.IOException) ProcessException(org.apache.nifi.processor.exception.ProcessException) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) CSVFormat(org.apache.commons.csv.CSVFormat) OPCPackage(org.apache.poi.openxml4j.opc.OPCPackage) XSSFReader(org.apache.poi.xssf.eventusermodel.XSSFReader)

Example 5 with CSVFormat

use of org.apache.commons.csv.CSVFormat in project nifi by apache.

the class TestJacksonCSVRecordReader method testMultipleRecordsEscapedWithSpecialChar.

@Test
public void testMultipleRecordsEscapedWithSpecialChar() throws IOException, MalformedRecordException {
    char delimiter = StringEscapeUtils.unescapeJava("\u0001").charAt(0);
    final CSVFormat format = CSVFormat.DEFAULT.withFirstRecordAsHeader().withTrim().withQuote('"').withDelimiter(delimiter);
    final List<RecordField> fields = getDefaultFields();
    fields.replaceAll(f -> f.getFieldName().equals("balance") ? new RecordField("balance", doubleDataType) : f);
    final RecordSchema schema = new SimpleRecordSchema(fields);
    try (final InputStream fis = new FileInputStream(new File("src/test/resources/csv/multi-bank-account_escapedchar.csv"));
        final JacksonCSVRecordReader reader = createReader(fis, schema, format)) {
        final Object[] firstRecord = reader.nextRecord().getValues();
        final Object[] firstExpectedValues = new Object[] { "1", "John Doe", 4750.89D, "123 My Street", "My City", "MS", "11111", "USA" };
        Assert.assertArrayEquals(firstExpectedValues, firstRecord);
        final Object[] secondRecord = reader.nextRecord().getValues();
        final Object[] secondExpectedValues = new Object[] { "2", "Jane Doe", 4820.09D, "321 Your Street", "Your City", "NY", "33333", "USA" };
        Assert.assertArrayEquals(secondExpectedValues, secondRecord);
        assertNull(reader.nextRecord());
    }
}
Also used : SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) RecordField(org.apache.nifi.serialization.record.RecordField) ByteArrayInputStream(java.io.ByteArrayInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) CSVFormat(org.apache.commons.csv.CSVFormat) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) File(java.io.File) FileInputStream(java.io.FileInputStream) Test(org.junit.Test)

Aggregations

CSVFormat (org.apache.commons.csv.CSVFormat)59 IOException (java.io.IOException)23 CSVRecord (org.apache.commons.csv.CSVRecord)22 CSVParser (org.apache.commons.csv.CSVParser)19 ArrayList (java.util.ArrayList)14 StringReader (java.io.StringReader)13 CSVPrinter (org.apache.commons.csv.CSVPrinter)10 InputStream (java.io.InputStream)9 InputStreamReader (java.io.InputStreamReader)8 HashMap (java.util.HashMap)8 SimpleRecordSchema (org.apache.nifi.serialization.SimpleRecordSchema)8 RecordField (org.apache.nifi.serialization.record.RecordField)8 RecordSchema (org.apache.nifi.serialization.record.RecordSchema)8 Test (org.junit.Test)8 ByteArrayOutputStream (java.io.ByteArrayOutputStream)7 Reader (java.io.Reader)7 LinkedHashMap (java.util.LinkedHashMap)7 SchemaNameAsAttribute (org.apache.nifi.schema.access.SchemaNameAsAttribute)7 MapRecord (org.apache.nifi.serialization.record.MapRecord)7 Record (org.apache.nifi.serialization.record.Record)7