Search in sources :

Example 6 with SparkFileSchemaParser

use of com.thinkbiganalytics.discovery.parser.SparkFileSchemaParser in project kylo by Teradata.

the class FileMetadataTransformResponseModifier method complete.

public void complete(ModifiedTransformResponse<FileMetadataResponse> modifiedTransformResponse) {
    FileMetadataResponse fileMetadataResponse = new FileMetadataResponse();
    String tableId = modifiedTransformResponse.getTable();
    Map<String, List<FileMetadataResponse.ParsedFileMetadata>> groupedMimeTypeMetadata = response.getResults().getRows().stream().map(r -> parseRow(r)).collect(Collectors.groupingBy(FileMetadataResponse.ParsedFileMetadata::getMimeType));
    Map<String, List<FileMetadataResponse.ParsedFileMetadata>> groupedMetadata = response.getResults().getRows().stream().map(r -> parseRow(r)).collect(Collectors.groupingBy(FileMetadataResponse.ParsedFileMetadata::getKey));
    // create user friendly names from the groupings
    // the key in this new map will be the first file found
    Map<String, FileMetadataResponse.FileDataSet> fileDatasets = groupedMetadata.keySet().stream().collect(Collectors.toMap(key -> groupedMetadata.get(key).get(0).getFilePath(), key -> {
        FileMetadataResponse.FileDataSet dataSet = new FileMetadataResponse.FileDataSet();
        List<FileMetadataResponse.ParsedFileMetadata> files = groupedMetadata.get(key);
        dataSet.setFiles(files);
        FileMetadataResponse.ParsedFileMetadata firstFile = files.get(0);
        dataSet.setMimeType(firstFile.getMimeType());
        findSchemaParserForMimeType(firstFile.getMimeType()).ifPresent(schemaParserDescriptor -> {
            dataSet.setSchemaParser(schemaParserDescriptor);
            // if the parser has a delimiter property set it
            schemaParserDescriptor.getProperties().stream().filter(property -> property.getObjectProperty().equals("separatorChar")).findFirst().ifPresent(property -> {
                property.setValue(firstFile.getDelimiter());
            });
            schemaParserDescriptor.getProperties().stream().filter(property -> property.getObjectProperty().equals("rowTag")).findFirst().ifPresent(property -> {
                property.setValue(firstFile.getRowTag());
            });
            Optional<FileSchemaParser> fileSchemaParser = fileSchemaParser(schemaParserDescriptor);
            if (fileSchemaParser.isPresent() && fileSchemaParser.get() instanceof SparkFileSchemaParser) {
                List<String> paths = files.stream().map(parsedFileMetadata -> parsedFileMetadata.getFilePath()).collect(Collectors.toList());
                SampleFileSparkScript sparkScript = ((SparkFileSchemaParser) fileSchemaParser.get()).getSparkScript(paths);
                dataSet.setSparkScript(sparkScript);
            }
        });
        return dataSet;
    }));
    fileMetadataResponse.setDatasets(fileDatasets);
    modifiedTransformResponse.setResults(fileMetadataResponse);
    modifiedTransformResponse.setStatus(TransformResponse.Status.SUCCESS);
}
Also used : SampleFileSparkScript(com.thinkbiganalytics.discovery.parser.SampleFileSparkScript) Arrays(java.util.Arrays) FileMetadataTaskService(com.thinkbiganalytics.spark.rest.filemetadata.tasks.FileMetadataTaskService) SchemaParserDescriptor(com.thinkbiganalytics.discovery.model.SchemaParserDescriptor) AbstractTransformResponseModifier(com.thinkbiganalytics.spark.rest.controller.AbstractTransformResponseModifier) ModifiedTransformResponse(com.thinkbiganalytics.spark.rest.model.ModifiedTransformResponse) FileParserFactory(com.thinkbiganalytics.discovery.FileParserFactory) StringUtils(org.apache.commons.lang3.StringUtils) Collectors(java.util.stream.Collectors) ArrayList(java.util.ArrayList) SparkFileSchemaParser(com.thinkbiganalytics.discovery.parser.SparkFileSchemaParser) FileMetadataResponse(com.thinkbiganalytics.spark.rest.model.FileMetadataResponse) FileSchemaParser(com.thinkbiganalytics.discovery.parser.FileSchemaParser) TransformResponse(com.thinkbiganalytics.spark.rest.model.TransformResponse) List(java.util.List) Map(java.util.Map) Optional(java.util.Optional) SchemaParserAnnotationTransformer(com.thinkbiganalytics.discovery.rest.controller.SchemaParserAnnotationTransformer) SampleFileSparkScript(com.thinkbiganalytics.discovery.parser.SampleFileSparkScript) SparkFileSchemaParser(com.thinkbiganalytics.discovery.parser.SparkFileSchemaParser) Optional(java.util.Optional) FileMetadataResponse(com.thinkbiganalytics.spark.rest.model.FileMetadataResponse) ArrayList(java.util.ArrayList) List(java.util.List)

Aggregations

FileSchemaParser (com.thinkbiganalytics.discovery.parser.FileSchemaParser)6 SparkFileSchemaParser (com.thinkbiganalytics.discovery.parser.SparkFileSchemaParser)6 SchemaParserDescriptor (com.thinkbiganalytics.discovery.model.SchemaParserDescriptor)4 SampleFileSparkScript (com.thinkbiganalytics.discovery.parser.SampleFileSparkScript)4 SchemaParserAnnotationTransformer (com.thinkbiganalytics.discovery.rest.controller.SchemaParserAnnotationTransformer)4 FileParserFactory (com.thinkbiganalytics.discovery.FileParserFactory)2 PolicyTransformException (com.thinkbiganalytics.policy.PolicyTransformException)2 AbstractTransformResponseModifier (com.thinkbiganalytics.spark.rest.controller.AbstractTransformResponseModifier)2 FileMetadataResponse (com.thinkbiganalytics.spark.rest.model.FileMetadataResponse)2 ModifiedTransformResponse (com.thinkbiganalytics.spark.rest.model.ModifiedTransformResponse)2 ApiOperation (io.swagger.annotations.ApiOperation)2 ApiResponses (io.swagger.annotations.ApiResponses)2 ArrayList (java.util.ArrayList)2 Arrays (java.util.Arrays)2 List (java.util.List)2 Map (java.util.Map)2 Optional (java.util.Optional)2 Collectors (java.util.stream.Collectors)2 Consumes (javax.ws.rs.Consumes)2 InternalServerErrorException (javax.ws.rs.InternalServerErrorException)2