use of com.thinkbiganalytics.kylo.metadata.file.FileMetadata in project kylo by Teradata.
the class TikaParserTest method test.
@Test
public void test() throws Exception {
String file = "test.xml";
FileMetadata type = FileMetadataService.detectFromStream(getFile(file).getInputStream(), file);
Assert.assertEquals("application/xml", type.getMimeType());
Assert.assertEquals("catalog", type.getProperties().get("rowTag"));
file = "test2.xml";
type = FileMetadataService.detectFromStream(getFile(file).getInputStream(), file);
Assert.assertEquals("application/xml", type.getMimeType());
Assert.assertEquals("some-books", type.getProperties().get("rowTag"));
file = "MOCK_DATA.commasep.txt";
type = FileMetadataService.detectFromStream(getFile(file).getInputStream(), file);
Assert.assertEquals("text/csv", type.getMimeType());
Assert.assertEquals(",", type.getProperties().get("delimiter"));
file = "MOCK_DATA.pipe.txt";
type = FileMetadataService.detectFromStream(getFile(file).getInputStream(), file);
Assert.assertEquals("text/csv", type.getMimeType());
Assert.assertEquals("|", type.getProperties().get("delimiter"));
file = "test.parquet";
type = FileMetadataService.detectFromStream(getFile(file).getInputStream(), file);
Assert.assertEquals("application/parquet", type.getMimeType());
file = "books1.json";
type = FileMetadataService.detectFromStream(getFile(file).getInputStream(), file);
Assert.assertEquals("application/json", type.getMimeType());
file = "userdata1.avro";
type = FileMetadataService.detectFromStream(getFile(file).getInputStream(), file);
Assert.assertEquals("application/avro", type.getMimeType());
file = "userdata1.orva";
type = FileMetadataService.detectFromStream(getFile(file).getInputStream(), file);
Assert.assertEquals("application/avro", type.getMimeType());
file = "userdata1_orc";
type = FileMetadataService.detectFromStream(getFile(file).getInputStream(), file);
Assert.assertEquals("application/orc", type.getMimeType());
}
use of com.thinkbiganalytics.kylo.metadata.file.FileMetadata in project kylo by Teradata.
the class SparkFileMetadataExtractor method parse.
@Override
public List<FileMetadata> parse(String[] filePaths) {
List<Dataset> dataFrameList = new ArrayList<>();
for (String path : filePaths) {
Dataset df = (Dataset) sqlContext.read().format("com.thinkbiganalytics.spark.file.metadata").load(path);
dataFrameList.add(df);
}
Dataset unionDf = unionAll(dataFrameList);
Encoder<FileMetadata> encoder = Encoders.bean(FileMetadata.class);
Dataset fileData = unionDf.as(encoder);
return fileData.collectAsList();
}
Aggregations