Search in sources :

Example 1 with CsvParser

use of com.alibaba.alink.operator.common.io.csv.CsvParser in project Alink by alibaba.

the class MTable method readCsvFromFile.

public static MTable readCsvFromFile(BufferedReader reader, String schemaStr) throws IOException {
    TableSchema schema = CsvUtil.schemaStr2Schema(schemaStr);
    CsvParser parser = new CsvParser(schema.getFieldTypes(), ",", '"');
    List<Row> rows = new ArrayList<>();
    while (true) {
        String line = reader.readLine();
        if (null == line) {
            break;
        }
        Tuple2<Boolean, Row> tuple2 = parser.parse(line);
        if (tuple2.f0) {
            rows.add(tuple2.f1);
        } else {
            throw new RuntimeException("Fail to parse line: \"" + line + "\"");
        }
    }
    return new MTable(rows, schema);
}
Also used : TableSchema(org.apache.flink.table.api.TableSchema) ArrayList(java.util.ArrayList) CsvParser(com.alibaba.alink.operator.common.io.csv.CsvParser) Row(org.apache.flink.types.Row)

Example 2 with CsvParser

use of com.alibaba.alink.operator.common.io.csv.CsvParser in project Alink by alibaba.

the class LocalPredictorLoader method loadLocalPredictor.

/**
 * Create a local predictor by loading model from a CSV file, without launching Flink job.
 *
 * @param modelPath   Path of the model file in CSV format. Can be local file or remote file.
 * @param inputSchema Test data schema.
 * @return
 * @throws Exception
 */
@Deprecated
public static LocalPredictor loadLocalPredictor(String modelPath, TableSchema inputSchema) throws Exception {
    Map<Long, List<Row>> rows = readPipelineModelRowsFromCsvFile(modelPath, LegacyModelExporterUtils.PIPELINE_MODEL_SCHEMA);
    Preconditions.checkState(rows.containsKey(-1L), "can't find meta in model.");
    String meta = (String) rows.get(-1L).get(0).getField(1);
    PipelineStageBase[] transformers = constructPipelineStagesFromMeta(meta, 0L);
    String[] modelSchemaStr = JsonConverter.fromJson(JsonPath.read(meta, "$.schema").toString(), String[].class);
    LocalPredictor predictor = null;
    TableSchema schema = inputSchema;
    for (int i = 0; i < transformers.length; i++) {
        PipelineStageBase transformer = transformers[i];
        LocalPredictor localPredictor;
        if (transformer instanceof MapModel) {
            MapModel<?> mapModel = (MapModel) transformer;
            ModelMapper mapper = mapModel.mapperBuilder.apply(CsvUtil.schemaStr2Schema(modelSchemaStr[i]), schema, mapModel.getParams());
            CsvParser csvParser = new CsvParser(CsvUtil.getColTypes(modelSchemaStr[i]), "^", '\'');
            List<Row> modelRows = rows.get((long) i);
            int s = modelRows.size();
            for (int j = 0; j < s; j++) {
                Row r = modelRows.get(j);
                r = csvParser.parse((String) r.getField(1)).f1;
                modelRows.set(j, r);
            }
            mapper.loadModel(modelRows);
            localPredictor = new LocalPredictor(mapper);
        } else {
            localPredictor = ((LocalPredictable) transformer).collectLocalPredictor(schema);
        }
        schema = localPredictor.getOutputSchema();
        if (predictor == null) {
            predictor = localPredictor;
        } else {
            predictor.merge(localPredictor);
        }
    }
    return predictor;
}
Also used : TableSchema(org.apache.flink.table.api.TableSchema) ModelMapper(com.alibaba.alink.common.mapper.ModelMapper) ArrayList(java.util.ArrayList) List(java.util.List) CsvParser(com.alibaba.alink.operator.common.io.csv.CsvParser) Row(org.apache.flink.types.Row)

Example 3 with CsvParser

use of com.alibaba.alink.operator.common.io.csv.CsvParser in project Alink by alibaba.

the class LocalPredictorLoader method loadLocalPredictor.

/**
 * Create a local predictor by loading model from a List<Row> format, without launching Flink job.
 *
 * @param modelRows model in list<Row></Row> format.
 * @return
 * @throws Exception
 */
@Deprecated
public static LocalPredictor loadLocalPredictor(List<Row> modelRows, TableSchema dataSchema) throws Exception {
    Map<Long, List<Row>> mapRows = getMapModelRows(modelRows);
    String meta = (String) mapRows.get(-1L).get(0).getField(1);
    PipelineStageBase[] transformers = constructPipelineStagesFromMeta(meta, 0L);
    String[] modelSchemaStr = JsonConverter.fromJson(JsonPath.read(meta, "$.schema").toString(), String[].class);
    LocalPredictor predictor = null;
    TableSchema schema = dataSchema;
    for (int i = 0; i < transformers.length; i++) {
        PipelineStageBase transformer = transformers[i];
        LocalPredictor localPredictor = null;
        if (transformer instanceof MapModel) {
            MapModel<?> mapModel = (MapModel) transformer;
            ModelMapper mapper = mapModel.mapperBuilder.apply(CsvUtil.schemaStr2Schema(modelSchemaStr[i]), schema, mapModel.getParams());
            CsvParser csvParser = new CsvParser(CsvUtil.getColTypes(modelSchemaStr[i]), "^", '\'');
            List<Row> singleModelRows = mapRows.get((long) i);
            int s = singleModelRows.size();
            for (int j = 0; j < s; j++) {
                Row r = singleModelRows.get(j);
                r = csvParser.parse((String) r.getField(1)).f1;
                singleModelRows.set(j, r);
            }
            mapper.loadModel(singleModelRows);
            localPredictor = new LocalPredictor(mapper);
        } else {
            localPredictor = ((LocalPredictable) transformer).collectLocalPredictor(schema);
        }
        schema = localPredictor.getOutputSchema();
        if (predictor == null) {
            predictor = localPredictor;
        } else {
            predictor.merge(localPredictor);
        }
    }
    return predictor;
}
Also used : TableSchema(org.apache.flink.table.api.TableSchema) ModelMapper(com.alibaba.alink.common.mapper.ModelMapper) ArrayList(java.util.ArrayList) List(java.util.List) CsvParser(com.alibaba.alink.operator.common.io.csv.CsvParser) Row(org.apache.flink.types.Row)

Example 4 with CsvParser

use of com.alibaba.alink.operator.common.io.csv.CsvParser in project Alink by alibaba.

the class LegacyModelExporterUtils method unpackBatchOp.

/**
 * Unpack a BatchOperator.
 */
@Deprecated
private static BatchOperator<?> unpackBatchOp(BatchOperator<?> data, TableSchema schema) {
    DataSet<Row> rows = data.getDataSet();
    final TypeInformation<?>[] types = schema.getFieldTypes();
    rows = rows.map(new RichMapFunction<Row, Row>() {

        private static final long serialVersionUID = 7791442624358724472L;

        private transient CsvParser parser;

        @Override
        public void open(Configuration parameters) throws Exception {
            parser = new CsvParser(types, "^", '\'');
        }

        @Override
        public Row map(Row value) throws Exception {
            return parser.parse((String) value.getField(1)).f1;
        }
    });
    return BatchOperator.fromTable(DataSetConversionUtil.toTable(data.getMLEnvironmentId(), rows, schema)).setMLEnvironmentId(data.getMLEnvironmentId());
}
Also used : Configuration(org.apache.flink.configuration.Configuration) RichMapFunction(org.apache.flink.api.common.functions.RichMapFunction) CsvParser(com.alibaba.alink.operator.common.io.csv.CsvParser) Row(org.apache.flink.types.Row) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation)

Example 5 with CsvParser

use of com.alibaba.alink.operator.common.io.csv.CsvParser in project Alink by alibaba.

the class LocalPredictorLoader method readPipelineModelRowsFromCsvFile.

@Deprecated
static Map<Long, List<Row>> readPipelineModelRowsFromCsvFile(String filePath, TableSchema schema) throws Exception {
    Map<Long, List<Row>> rows = new HashMap<>();
    Path path = new Path(filePath);
    FileSystem fs = FileSystem.get(path.toUri());
    FSDataInputStream stream = fs.open(path);
    BufferedReader reader = new BufferedReader(new InputStreamReader(stream));
    CsvParser csvParser = new CsvParser(schema.getFieldTypes(), ",", '"');
    while (reader.ready()) {
        String line = reader.readLine();
        Tuple2<Boolean, Row> parsed = csvParser.parse(line);
        Preconditions.checkState(parsed.f0, "Fail to parse line: " + line);
        Long id = (Long) parsed.f1.getField(0);
        if (rows.containsKey(id)) {
            rows.get(id).add(parsed.f1);
        } else {
            List<Row> r = new ArrayList<>();
            r.add(parsed.f1);
            rows.put(id, r);
        }
    }
    reader.close();
    stream.close();
    return rows;
}
Also used : JsonPath(com.jayway.jsonpath.JsonPath) FilePath(com.alibaba.alink.common.io.filesystem.FilePath) Path(org.apache.flink.core.fs.Path) InputStreamReader(java.io.InputStreamReader) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) FileSystem(org.apache.flink.core.fs.FileSystem) BufferedReader(java.io.BufferedReader) FSDataInputStream(org.apache.flink.core.fs.FSDataInputStream) ArrayList(java.util.ArrayList) List(java.util.List) CsvParser(com.alibaba.alink.operator.common.io.csv.CsvParser) Row(org.apache.flink.types.Row)

Aggregations

CsvParser (com.alibaba.alink.operator.common.io.csv.CsvParser)5 Row (org.apache.flink.types.Row)5 ArrayList (java.util.ArrayList)4 List (java.util.List)3 TableSchema (org.apache.flink.table.api.TableSchema)3 ModelMapper (com.alibaba.alink.common.mapper.ModelMapper)2 FilePath (com.alibaba.alink.common.io.filesystem.FilePath)1 JsonPath (com.jayway.jsonpath.JsonPath)1 BufferedReader (java.io.BufferedReader)1 InputStreamReader (java.io.InputStreamReader)1 HashMap (java.util.HashMap)1 RichMapFunction (org.apache.flink.api.common.functions.RichMapFunction)1 TypeInformation (org.apache.flink.api.common.typeinfo.TypeInformation)1 Configuration (org.apache.flink.configuration.Configuration)1 FSDataInputStream (org.apache.flink.core.fs.FSDataInputStream)1 FileSystem (org.apache.flink.core.fs.FileSystem)1 Path (org.apache.flink.core.fs.Path)1