use of com.thinkbiganalytics.discovery.model.SchemaParserDescriptor in project kylo by Teradata.
the class SchemaDiscoveryRestController method getFileParsers.
@GET
@Path("/file-parsers")
@Produces(MediaType.APPLICATION_JSON)
@ApiOperation("Gets the available file parsers.")
@ApiResponses(@ApiResponse(code = 200, message = "Returns the file parsers.", response = SchemaParserDescriptor.class, responseContainer = "List"))
public Response getFileParsers() {
List<FileSchemaParser> parsers = FileParserFactory.instance().listSchemaParsers();
List<SchemaParserDescriptor> descriptors = new ArrayList<>();
SchemaParserAnnotationTransformer transformer = new SchemaParserAnnotationTransformer();
for (FileSchemaParser parser : parsers) {
SchemaParserDescriptor descriptor = transformer.toUIModel(parser);
descriptors.add(descriptor);
}
return Response.ok(descriptors).build();
}
use of com.thinkbiganalytics.discovery.model.SchemaParserDescriptor in project kylo by Teradata.
the class SchemaDiscoveryRestController method uploadFileSpark.
/**
* Generate the spark script that can parse the passed in file using the passed in "parserDescriptor"
*
* @param parserDescriptor metadata about how the file should be parsed
* @param dataFrameVariable the name of the dataframe variable in the generate spark code
* @param limit a number indicating how many rows the script should limit the output
* @param fileInputStream the file
* @param fileMetaData metadata about the file
* @return an object including the name of the file on disk and the generated spark script
*/
@POST
@Path("/spark/sample-file")
@Consumes(MediaType.MULTIPART_FORM_DATA)
@Produces(MediaType.APPLICATION_JSON)
@ApiOperation("Determines the schema of the provided file.")
@ApiResponses({ @ApiResponse(code = 200, message = "Returns the spark script that parses the sample file.", response = Schema.class), @ApiResponse(code = 500, message = "The schema could not be determined.", response = RestResponseStatus.class) })
public Response uploadFileSpark(@FormDataParam("parser") String parserDescriptor, @FormDataParam("dataFrameVariable") @DefaultValue("df") String dataFrameVariable, @FormDataParam("limit") @DefaultValue("-1") Integer limit, @FormDataParam("file") InputStream fileInputStream, @FormDataParam("file") FormDataContentDisposition fileMetaData) throws Exception {
SampleFileSparkScript sampleFileSparkScript = null;
SchemaParserAnnotationTransformer transformer = new SchemaParserAnnotationTransformer();
try {
SchemaParserDescriptor descriptor = ObjectMapperSerializer.deserialize(parserDescriptor, SchemaParserDescriptor.class);
FileSchemaParser p = transformer.fromUiModel(descriptor);
SparkFileSchemaParser sparkFileSchemaParser = (SparkFileSchemaParser) p;
sparkFileSchemaParser.setDataFrameVariable(dataFrameVariable);
sparkFileSchemaParser.setLimit(limit);
sampleFileSparkScript = sparkFileSchemaParser.getSparkScript(fileInputStream);
} catch (IOException e) {
throw new WebApplicationException(e.getMessage());
} catch (PolicyTransformException e) {
log.warn("Failed to convert parser", e);
throw new InternalServerErrorException(STRINGS.getString("discovery.transformError"), e);
}
if (sampleFileSparkScript == null) {
log.warn("Failed to convert parser");
throw new InternalServerErrorException(STRINGS.getString("discovery.transformError"));
}
return Response.ok(sampleFileSparkScript).build();
}
use of com.thinkbiganalytics.discovery.model.SchemaParserDescriptor in project kylo by Teradata.
the class SchemaDiscoveryRestController method uploadFile.
@POST
@Path("/hive/sample-file")
@Consumes(MediaType.MULTIPART_FORM_DATA)
@Produces(MediaType.APPLICATION_JSON)
@ApiOperation("Determines the schema of the provided file.")
@ApiResponses({ @ApiResponse(code = 200, message = "Returns the schema.", response = Schema.class), @ApiResponse(code = 500, message = "The schema could not be determined.", response = RestResponseStatus.class) })
public Response uploadFile(@FormDataParam("parser") String parserDescriptor, @FormDataParam("file") InputStream fileInputStream, @FormDataParam("file") FormDataContentDisposition fileMetaData) throws Exception {
Schema schema;
SchemaParserAnnotationTransformer transformer = new SchemaParserAnnotationTransformer();
try {
SchemaParserDescriptor descriptor = ObjectMapperSerializer.deserialize(parserDescriptor, SchemaParserDescriptor.class);
FileSchemaParser p = transformer.fromUiModel(descriptor);
// TODO: Detect charset
schema = p.parse(fileInputStream, Charset.defaultCharset(), TableSchemaType.HIVE);
} catch (IOException e) {
throw new WebApplicationException(e.getMessage());
} catch (PolicyTransformException e) {
log.warn("Failed to convert parser", e);
throw new InternalServerErrorException(STRINGS.getString("discovery.transformError"), e);
}
return Response.ok(schema).build();
}
use of com.thinkbiganalytics.discovery.model.SchemaParserDescriptor in project kylo by Teradata.
the class SchemaDiscoveryRestControllerTest method createMockParserDescriptor.
private SchemaParserDescriptor createMockParserDescriptor() {
SchemaParserDescriptor descriptor = new SchemaParserDescriptor();
descriptor.setObjectClassType("com.thinkbiganalytics.discovery.rest.controller.MockSchemaParser2");
FieldRuleProperty propDetect = new FieldRuleProperty();
propDetect.setName("Auto Detect?");
propDetect.setObjectProperty("autoDetect");
propDetect.setValue("true");
FieldRuleProperty propHeader = new FieldRuleProperty();
propHeader.setName("Header?");
propHeader.setObjectProperty("headerRow");
propHeader.setValue("false");
descriptor.setProperties(Arrays.asList(propDetect, propHeader));
return descriptor;
}
use of com.thinkbiganalytics.discovery.model.SchemaParserDescriptor in project kylo by Teradata.
the class SchemaParserAnnotationTransformer method buildUiModel.
@Override
public SchemaParserDescriptor buildUiModel(SchemaParser annotation, FileSchemaParser policy, List<FieldRuleProperty> properties) {
SchemaParserDescriptor descriptor = new SchemaParserDescriptor();
descriptor.setProperties(properties);
descriptor.setName(annotation.name());
descriptor.setDescription(annotation.description());
descriptor.setObjectClassType(policy.getClass().getTypeName());
descriptor.setTags(annotation.tags());
descriptor.setGeneratesHiveSerde(annotation.generatesHiveSerde());
descriptor.setSupportsBinary(annotation.supportsBinary());
descriptor.setAllowSkipHeader(annotation.allowSkipHeader());
descriptor.setPrimary(annotation.primary());
descriptor.setUsesSpark(annotation.usesSpark());
descriptor.setMimeTypes(annotation.mimeTypes());
descriptor.setSparkFormat(annotation.sparkFormat());
return descriptor;
}
Aggregations