use of org.apache.commons.csv.CSVFormat in project hadoop by apache.
the class TestFileSystemTimelineReaderImpl method initializeDataDirectory.
public static void initializeDataDirectory(String rootDir) throws Exception {
loadEntityData(rootDir);
// Create app flow mapping file.
CSVFormat format = CSVFormat.DEFAULT.withHeader("APP", "USER", "FLOW", "FLOWRUN");
String appFlowMappingFile = rootDir + File.separator + "entities" + File.separator + "cluster1" + File.separator + FileSystemTimelineReaderImpl.APP_FLOW_MAPPING_FILE;
try (PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter(appFlowMappingFile, true)));
CSVPrinter printer = new CSVPrinter(out, format)) {
printer.printRecord("app1", "user1", "flow1", 1);
printer.printRecord("app2", "user1", "flow1,flow", 1);
printer.close();
}
(new File(rootDir)).deleteOnExit();
}
use of org.apache.commons.csv.CSVFormat in project logging-log4j2 by apache.
the class CsvLogEventLayout method toSerializable.
@Override
public String toSerializable(final LogEvent event) {
final StringBuilder buffer = getStringBuilder();
final CSVFormat format = getFormat();
try {
format.print(event.getNanoTime(), buffer, true);
format.print(event.getTimeMillis(), buffer, false);
format.print(event.getLevel(), buffer, false);
format.print(event.getThreadId(), buffer, false);
format.print(event.getThreadName(), buffer, false);
format.print(event.getThreadPriority(), buffer, false);
format.print(event.getMessage().getFormattedMessage(), buffer, false);
format.print(event.getLoggerFqcn(), buffer, false);
format.print(event.getLoggerName(), buffer, false);
format.print(event.getMarker(), buffer, false);
format.print(event.getThrownProxy(), buffer, false);
format.print(event.getSource(), buffer, false);
format.print(event.getContextData(), buffer, false);
format.print(event.getContextStack(), buffer, false);
format.println(buffer);
return buffer.toString();
} catch (final IOException e) {
StatusLogger.getLogger().error(event.toString(), e);
return format.getCommentMarker() + " " + e;
}
}
use of org.apache.commons.csv.CSVFormat in project ranger by apache.
the class FileSourceUserGroupBuilder method readTextFile.
public Map<String, List<String>> readTextFile(File textFile) throws Exception {
Map<String, List<String>> ret = new HashMap<String, List<String>>();
String delimiter = config.getUserSyncFileSourceDelimiter();
CSVFormat csvFormat = CSVFormat.newFormat(delimiter.charAt(0));
CSVParser csvParser = new CSVParser(new BufferedReader(new FileReader(textFile)), csvFormat);
List<CSVRecord> csvRecordList = csvParser.getRecords();
if (csvRecordList != null) {
for (CSVRecord csvRecord : csvRecordList) {
List<String> groups = new ArrayList<String>();
String user = csvRecord.get(0);
user = user.replaceAll("^\"|\"$", "");
int i = csvRecord.size();
for (int j = 1; j < i; j++) {
String group = csvRecord.get(j);
if (group != null && !group.isEmpty()) {
group = group.replaceAll("^\"|\"$", "");
groups.add(group);
}
}
ret.put(user, groups);
}
}
csvParser.close();
return ret;
}
use of org.apache.commons.csv.CSVFormat in project nifi by apache.
the class ConvertExcelToCSVProcessor method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
final FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final String desiredSheetsDelimited = context.getProperty(DESIRED_SHEETS).evaluateAttributeExpressions().getValue();
final boolean formatValues = context.getProperty(FORMAT_VALUES).asBoolean();
final CSVFormat csvFormat = CSVUtils.createCSVFormat(context);
// Switch to 0 based index
final int firstRow = context.getProperty(ROWS_TO_SKIP).asInteger() - 1;
final String[] sColumnsToSkip = StringUtils.split(context.getProperty(COLUMNS_TO_SKIP).getValue(), ",");
final List<Integer> columnsToSkip = new ArrayList<>();
if (sColumnsToSkip != null && sColumnsToSkip.length > 0) {
for (String c : sColumnsToSkip) {
try {
// Switch to 0 based index
columnsToSkip.add(Integer.parseInt(c) - 1);
} catch (NumberFormatException e) {
throw new ProcessException("Invalid column in Columns to Skip list.", e);
}
}
}
try {
session.read(flowFile, new InputStreamCallback() {
@Override
public void process(InputStream inputStream) throws IOException {
try {
OPCPackage pkg = OPCPackage.open(inputStream);
XSSFReader r = new XSSFReader(pkg);
ReadOnlySharedStringsTable sst = new ReadOnlySharedStringsTable(pkg);
StylesTable styles = r.getStylesTable();
XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) r.getSheetsData();
if (desiredSheetsDelimited != null) {
String[] desiredSheets = StringUtils.split(desiredSheetsDelimited, DESIRED_SHEETS_DELIMITER);
if (desiredSheets != null) {
while (iter.hasNext()) {
InputStream sheet = iter.next();
String sheetName = iter.getSheetName();
for (int i = 0; i < desiredSheets.length; i++) {
// If the sheetName is a desired one parse it
if (sheetName.equalsIgnoreCase(desiredSheets[i])) {
ExcelSheetReadConfig readConfig = new ExcelSheetReadConfig(columnsToSkip, firstRow, sheetName, formatValues, sst, styles);
handleExcelSheet(session, flowFile, sheet, readConfig, csvFormat);
break;
}
}
}
} else {
getLogger().debug("Excel document was parsed but no sheets with the specified desired names were found.");
}
} else {
// Get all of the sheets in the document.
while (iter.hasNext()) {
InputStream sheet = iter.next();
String sheetName = iter.getSheetName();
ExcelSheetReadConfig readConfig = new ExcelSheetReadConfig(columnsToSkip, firstRow, sheetName, formatValues, sst, styles);
handleExcelSheet(session, flowFile, sheet, readConfig, csvFormat);
}
}
} catch (InvalidFormatException ife) {
getLogger().error("Only .xlsx Excel 2007 OOXML files are supported", ife);
throw new UnsupportedOperationException("Only .xlsx Excel 2007 OOXML files are supported", ife);
} catch (OpenXML4JException | SAXException e) {
getLogger().error("Error occurred while processing Excel document metadata", e);
}
}
});
session.transfer(flowFile, ORIGINAL);
} catch (RuntimeException ex) {
getLogger().error("Failed to process incoming Excel document. " + ex.getMessage(), ex);
FlowFile failedFlowFile = session.putAttribute(flowFile, ConvertExcelToCSVProcessor.class.getName() + ".error", ex.getMessage());
session.transfer(failedFlowFile, FAILURE);
}
}
use of org.apache.commons.csv.CSVFormat in project nifi by apache.
the class TestJacksonCSVRecordReader method testMultipleRecordsEscapedWithSpecialChar.
@Test
public void testMultipleRecordsEscapedWithSpecialChar() throws IOException, MalformedRecordException {
char delimiter = StringEscapeUtils.unescapeJava("\u0001").charAt(0);
final CSVFormat format = CSVFormat.DEFAULT.withFirstRecordAsHeader().withTrim().withQuote('"').withDelimiter(delimiter);
final List<RecordField> fields = getDefaultFields();
fields.replaceAll(f -> f.getFieldName().equals("balance") ? new RecordField("balance", doubleDataType) : f);
final RecordSchema schema = new SimpleRecordSchema(fields);
try (final InputStream fis = new FileInputStream(new File("src/test/resources/csv/multi-bank-account_escapedchar.csv"));
final JacksonCSVRecordReader reader = createReader(fis, schema, format)) {
final Object[] firstRecord = reader.nextRecord().getValues();
final Object[] firstExpectedValues = new Object[] { "1", "John Doe", 4750.89D, "123 My Street", "My City", "MS", "11111", "USA" };
Assert.assertArrayEquals(firstExpectedValues, firstRecord);
final Object[] secondRecord = reader.nextRecord().getValues();
final Object[] secondExpectedValues = new Object[] { "2", "Jane Doe", 4820.09D, "321 Your Street", "Your City", "NY", "33333", "USA" };
Assert.assertArrayEquals(secondExpectedValues, secondRecord);
assertNull(reader.nextRecord());
}
}
Aggregations