Search in sources :

Example 1 with Pair

use of won.protocol.util.RdfUtils.Pair in project webofneeds by researchstudio-sat.

the class TestDataMigrator method migrateFile.

/**
 * Deletes all 'remote messages', changes msg:isRemoteResponseTo to
 * msg:respondingTo references.
 *
 * @param filename
 * @throws Exception
 */
public void migrateFile(File inFile, File outFile) throws Exception {
    Objects.requireNonNull(inFile);
    Objects.requireNonNull(outFile);
    if (!inFile.isFile()) {
        throw new IllegalArgumentException("input file must be a file, but this isnt one: " + inFile);
    }
    if (!inFile.canRead()) {
        throw new IllegalArgumentException("cannot read input file " + inFile);
    }
    if (inFile.getAbsolutePath().equals(outFile.getAbsolutePath())) {
        throw new IllegalArgumentException("Input file and output file are the same, aborting.");
    }
    logger.debug("migrating {} from {} to {}", new Object[] { inFile.getName(), removeBase(inFile.getParentFile().getAbsolutePath()), removeBase(outFile.getParentFile().getAbsolutePath()) });
    Dataset ds = DatasetFactory.createGeneral();
    RDFDataMgr.read(ds, inFile.getAbsolutePath());
    final Dataset finalDs = ds;
    List<Pair<String>> remoteMessages = this.findRemoteMsg.apply(ds);
    Map<String, String> remoteToOrig = remoteMessages.stream().distinct().collect(Collectors.toMap(Pair::getSecond, Pair::getFirst));
    Map<String, List<Statement>> statementsToMoveFromRemoteMsgToOrig = new HashMap<>();
    Set<String> remoteResponses = findRemoteResponse.apply(ds).stream().collect(Collectors.toSet());
    // remove the msg:isResponseTo triple from the remote responses (in the next
    // step, we'll rename the msg:isRemoteResponseTo triples
    ds = RdfUtils.toQuadStream(ds).filter(q -> {
        if (!q.getSubject().isURI()) {
            return true;
        }
        if (!remoteResponses.contains(q.getSubject().getURI())) {
            return true;
        }
        if ((WONMSG.getURI() + "isResponseTo").equals(q.getPredicate().getURI())) {
            return false;
        }
        return true;
    }).collect(RdfUtils.collectToDataset());
    ds = RdfUtils.toQuadStream(ds).map(quad -> {
        Node pred = quad.getPredicate();
        if (pred.isURI() && pred.getURI().equals(WONMSG.getURI() + "isRemoteResponseTo")) {
            pred = WONMSG.respondingTo.asNode();
        }
        if (pred.isURI() && pred.getURI().equals(WONMSG.getURI() + "isResponseTo")) {
            pred = WONMSG.respondingTo.asNode();
        }
        if (pred.isURI() && pred.getURI().equals(WONMSG.getURI() + "isResponseToMessageType")) {
            pred = WONMSG.respondingToMessageType.asNode();
        }
        return new Quad(quad.getGraph(), quad.getSubject(), pred, quad.getObject());
    }).collect(RdfUtils.collectToDataset());
    // remove the correspondingRemoteMessage triples
    ds = RdfUtils.toQuadStream(ds).filter(q -> !(WONMSG.getURI() + "correspondingRemoteMessage").equals(q.getPredicate().getURI())).collect(RdfUtils.collectToDataset());
    // replace remote message uris in prev references with the org msgs and move
    // those triples to the orig msg envelopes
    RdfUtils.toQuadStream(ds).forEach(q -> {
        Node pred = q.getPredicate();
        if (pred.isURI() && pred.getURI().equals(WONMSG.previousMessage.getURI())) {
            pred = WONMSG.respondingTo.asNode();
            Node obj = q.getObject();
            if (obj.isURI()) {
                String orig = remoteToOrig.get(obj.getURI());
                if (orig != null) {
                    Statement stmt = new StatementImpl(new ResourceImpl(q.getSubject().getURI()), new PropertyImpl(q.getPredicate().getURI()), new ResourceImpl(orig));
                    List<Statement> stmts = statementsToMoveFromRemoteMsgToOrig.get(orig);
                    if (stmts == null) {
                        stmts = new ArrayList<>();
                    }
                    stmts.add(stmt);
                    statementsToMoveFromRemoteMsgToOrig.put(orig, stmts);
                }
            }
        }
    });
    // remove remote messages (their graphs)
    Set<String> toRemove = remoteMessages.stream().map(p -> p.getSecond()).collect(Collectors.toSet());
    ds = removeMessages(ds, toRemove);
    // now add the triples we saved from the remote messages to the original
    // messages:
    // for each original message: find the quad that holds the [msg] msg:messageType
    // [msgType] triple. insert the set of statements in that graph
    // this creates a stream of quads, which in the end we add to the ds
    final Dataset finalDsNow = ds;
    List<Quad> moveddata = statementsToMoveFromRemoteMsgToOrig.entrySet().stream().flatMap(entry -> {
        Optional<Quad> quad = RdfUtils.toQuadStream(finalDsNow).filter(q -> q.getSubject().isURI() && entry.getKey().equals(q.getSubject().getURI()) && WONMSG.messageType.getURI().equals(q.getPredicate().getURI())).findFirst();
        if (!quad.isPresent())
            return null;
        return entry.getValue().stream().map(stmt -> new Quad(quad.get().getGraph(), new ResourceImpl(entry.getKey()).asNode(), stmt.getPredicate().asNode(), stmt.getObject().asNode()));
    }).filter(x -> x != null).collect(Collectors.toList());
    ds = Stream.concat(RdfUtils.toQuadStream(ds), moveddata.stream()).collect(RdfUtils.collectToDataset());
    ds = removeMessages(ds, listFromExternalMsgs.apply(ds).stream().collect(Collectors.toSet()));
    logger.debug("writing rsult to {}", outFile);
    RDFDataMgr.write(new FileOutputStream(outFile), Prefixer.setPrefixes(ds), Lang_WON.TRIG_WON_CONVERSATION);
// RDFDataMgr.write(System.out, ds, Lang.TRIG);
}
Also used : ResourceImpl(org.apache.jena.rdf.model.impl.ResourceImpl) LoggerFactory(org.slf4j.LoggerFactory) HashMap(java.util.HashMap) WONMSG(won.protocol.vocabulary.WONMSG) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Statement(org.apache.jena.rdf.model.Statement) StatementImpl(org.apache.jena.rdf.model.impl.StatementImpl) QuerySolution(org.apache.jena.query.QuerySolution) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Quad(org.apache.jena.sparql.core.Quad) DatasetFactory(org.apache.jena.query.DatasetFactory) Map(java.util.Map) Dataset(org.apache.jena.query.Dataset) Logger(org.slf4j.Logger) Iterator(java.util.Iterator) PropertyImpl(org.apache.jena.rdf.model.impl.PropertyImpl) FileOutputStream(java.io.FileOutputStream) Set(java.util.Set) Pair(won.protocol.util.RdfUtils.Pair) Collectors(java.util.stream.Collectors) File(java.io.File) Objects(java.util.Objects) List(java.util.List) Level(ch.qos.logback.classic.Level) Stream(java.util.stream.Stream) Node(org.apache.jena.graph.Node) RDFDataMgr(org.apache.jena.riot.RDFDataMgr) Lang_WON(won.protocol.util.pretty.Lang_WON) Optional(java.util.Optional) Pattern(java.util.regex.Pattern) Quad(org.apache.jena.sparql.core.Quad) HashMap(java.util.HashMap) Dataset(org.apache.jena.query.Dataset) Statement(org.apache.jena.rdf.model.Statement) Node(org.apache.jena.graph.Node) PropertyImpl(org.apache.jena.rdf.model.impl.PropertyImpl) ResourceImpl(org.apache.jena.rdf.model.impl.ResourceImpl) StatementImpl(org.apache.jena.rdf.model.impl.StatementImpl) FileOutputStream(java.io.FileOutputStream) ArrayList(java.util.ArrayList) List(java.util.List) Pair(won.protocol.util.RdfUtils.Pair)

Aggregations

Level (ch.qos.logback.classic.Level)1 File (java.io.File)1 FileOutputStream (java.io.FileOutputStream)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 Iterator (java.util.Iterator)1 List (java.util.List)1 Map (java.util.Map)1 Objects (java.util.Objects)1 Optional (java.util.Optional)1 Set (java.util.Set)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1 Pattern (java.util.regex.Pattern)1 Collectors (java.util.stream.Collectors)1 Stream (java.util.stream.Stream)1 Node (org.apache.jena.graph.Node)1 Dataset (org.apache.jena.query.Dataset)1 DatasetFactory (org.apache.jena.query.DatasetFactory)1 QuerySolution (org.apache.jena.query.QuerySolution)1