Search in sources :

Example 1 with ExtractedVertexType

use of au.gov.asd.tac.constellation.graph.schema.type.SchemaVertexTypeUtilities.ExtractedVertexType in project constellation by constellation-app.

the class ExtractTypesFromTextPlugin method query.

@Override
protected RecordStore query(final RecordStore query, final PluginInteraction interaction, final PluginParameters parameters) throws InterruptedException, PluginException {
    final RecordStore result = new GraphRecordStore();
    interaction.setProgress(0, 0, "Importing...", true);
    final Map<String, PluginParameter<?>> extractEntityParameters = parameters.getParameters();
    final String text = extractEntityParameters.get(TEXT_PARAMETER_ID).getStringValue();
    if (text == null) {
        throw new PluginException(PluginNotificationLevel.ERROR, "No text provided from which to extract types.");
    }
    final List<ExtractedVertexType> extractedTypes = SchemaVertexTypeUtilities.extractVertexTypes(text);
    final Map<String, SchemaVertexType> identifiers = new HashMap<>();
    extractedTypes.forEach(extractedType -> identifiers.put(extractedType.getIdentifier(), extractedType.getType()));
    for (final String identifier : identifiers.keySet()) {
        result.add();
        result.set(GraphRecordStoreUtilities.SOURCE + VisualConcept.VertexAttribute.IDENTIFIER, identifier);
        result.set(GraphRecordStoreUtilities.SOURCE + AnalyticConcept.VertexAttribute.TYPE, identifiers.get(identifier));
        result.set(GraphRecordStoreUtilities.SOURCE + AnalyticConcept.VertexAttribute.SEED, "true");
    }
    ConstellationLoggerHelper.createPropertyBuilder(this, result.getAll(GraphRecordStoreUtilities.SOURCE + VisualConcept.VertexAttribute.IDENTIFIER), ConstellationLoggerHelper.SUCCESS);
    interaction.setProgress(1, 0, "Completed successfully - imported " + result.size() + " entities.", true);
    return result;
}
Also used : SchemaVertexType(au.gov.asd.tac.constellation.graph.schema.type.SchemaVertexType) HashMap(java.util.HashMap) RecordStore(au.gov.asd.tac.constellation.graph.processing.RecordStore) GraphRecordStore(au.gov.asd.tac.constellation.graph.processing.GraphRecordStore) PluginException(au.gov.asd.tac.constellation.plugins.PluginException) GraphRecordStore(au.gov.asd.tac.constellation.graph.processing.GraphRecordStore) PluginParameter(au.gov.asd.tac.constellation.plugins.parameters.PluginParameter) ExtractedVertexType(au.gov.asd.tac.constellation.graph.schema.type.SchemaVertexTypeUtilities.ExtractedVertexType)

Example 2 with ExtractedVertexType

use of au.gov.asd.tac.constellation.graph.schema.type.SchemaVertexTypeUtilities.ExtractedVertexType in project constellation by constellation-app.

the class ExtractWordsFromTextPlugin method edit.

@Override
public void edit(final GraphWriteMethods wg, final PluginInteraction interaction, final PluginParameters parameters) throws InterruptedException, PluginException {
    interaction.setProgress(0, 0, "Extracting...", true);
    /*
         Retrieving attributes
         */
    final Map<String, PluginParameter<?>> extractEntityParameters = parameters.getParameters();
    final String contentAttribute = extractEntityParameters.get(ATTRIBUTE_PARAMETER_ID).getStringValue();
    final String words = extractEntityParameters.get(WORDS_PARAMETER_ID).getStringValue() == null ? null : extractEntityParameters.get(WORDS_PARAMETER_ID).getStringValue().trim();
    final boolean useRegex = extractEntityParameters.get(USE_REGEX_PARAMETER_ID).getBooleanValue();
    final boolean wholeWordOnly = extractEntityParameters.get(WHOLE_WORDS_ONLY_PARAMETER_ID).getBooleanValue();
    final int wordLength = parameters.getParameters().get(MIN_WORD_LENGTH_PARAMETER_ID).getIntegerValue();
    final boolean removeSpecialChars = extractEntityParameters.get(REMOVE_SPECIAL_CHARS_PARAMETER_ID).getBooleanValue();
    final boolean toLowerCase = extractEntityParameters.get(LOWER_CASE_PARAMETER_ID).getBooleanValue();
    final boolean types = extractEntityParameters.get(SCHEMA_TYPES_PARAMETER_ID).getBooleanValue();
    final String inOrOut = extractEntityParameters.get(IN_OR_OUT_PARAMETER_ID).getStringValue();
    final boolean selectedOnly = extractEntityParameters.get(SELECTED_ONLY_PARAMETER_ID).getBooleanValue();
    final boolean regexOnly = extractEntityParameters.get(REGEX_ONLY_PARAMETER_ID).getBooleanValue();
    if (!OUTGOING.equals(inOrOut) && !INCOMING.equals(inOrOut)) {
        var msg = String.format("Parameter %s must be '%s' or '%s'", REGEX_ONLY_PARAMETER_ID, OUTGOING, INCOMING);
        throw new PluginException(PluginNotificationLevel.ERROR, msg);
    }
    final boolean outgoing = OUTGOING.equals(inOrOut);
    /*
         Retrieving attribute IDs
         */
    final int vertexIdentifierAttributeId = VisualConcept.VertexAttribute.IDENTIFIER.ensure(wg);
    final int vertexTypeAttributeId = AnalyticConcept.VertexAttribute.TYPE.ensure(wg);
    final int transactionTypeAttributeId = AnalyticConcept.TransactionAttribute.TYPE.ensure(wg);
    final int transactionDatetimeAttributeId = TemporalConcept.TransactionAttribute.DATETIME.ensure(wg);
    final int transactionContentAttributeId = wg.getAttribute(GraphElementType.TRANSACTION, contentAttribute);
    final int transactionSelectedAttributeId = VisualConcept.TransactionAttribute.SELECTED.ensure(wg);
    // 
    if (transactionContentAttributeId == Graph.NOT_FOUND) {
        final NotifyDescriptor nd = new NotifyDescriptor.Message(String.format("The specified attribute %s does not exist.", contentAttribute), NotifyDescriptor.WARNING_MESSAGE);
        DialogDisplayer.getDefault().notify(nd);
        return;
    }
    final int transactionCount = wg.getTransactionCount();
    if (regexOnly) {
        // This choice ignores several other parameters, so is a bit simpler
        // even if there code commonalities, but combining the if/else
        // code would make things even more complex.
        // 
        // The input words are treated as trusted regular expressions,
        // so the caller has to know what they're doing.
        // This is power-use mode.
        // 
        // Each line of the input words is a regex.
        // Use them as-is for the power users.
        // 
        final List<Pattern> patterns = new ArrayList<>();
        if (StringUtils.isNotBlank(words)) {
            for (String word : words.split(SeparatorConstants.NEWLINE)) {
                word = word.strip();
                if (!word.isEmpty()) {
                    final Pattern pattern = Pattern.compile(word);
                    patterns.add(pattern);
                }
            }
        }
        if (!patterns.isEmpty()) {
            // Use a set to hold the words.
            // If a word is found multiple times, there's no point adding multiple nodes.
            // 
            final Set<String> matched = new HashSet<>();
            // 
            for (int transactionPosition = 0; transactionPosition < transactionCount; transactionPosition++) {
                final int transactionId = wg.getTransaction(transactionPosition);
                final boolean selectedTx = wg.getBooleanValue(transactionSelectedAttributeId, transactionId);
                if (selectedOnly && !selectedTx) {
                    continue;
                }
                final String content = wg.getStringValue(transactionContentAttributeId, transactionId);
                /*
                     Does the transaction have content?
                     */
                if (StringUtils.isBlank(content)) {
                    continue;
                }
                /*
                     Ignore other "referenced" transactions because that's not useful
                     */
                if (wg.getObjectValue(transactionTypeAttributeId, transactionId) != null && wg.getObjectValue(transactionTypeAttributeId, transactionId).equals(AnalyticConcept.TransactionType.REFERENCED)) {
                    continue;
                }
                patterns.stream().map(pattern -> pattern.matcher(content)).forEach(matcher -> {
                    while (matcher.find()) {
                        if (matcher.groupCount() == 0) {
                            // The regex doesn't have an explicit capture group, so capture the lot.
                            // 
                            final String g = matcher.group();
                            matched.add(toLowerCase ? g.toLowerCase() : g);
                        } else {
                            // 
                            for (int i = 1; i <= matcher.groupCount(); i++) {
                                final String g = matcher.group(i);
                                matched.add(toLowerCase ? g.toLowerCase() : g);
                            }
                        }
                    }
                });
                // 
                if (!matched.isEmpty()) {
                    /*
                         Retrieving information needed to create new transactions
                         */
                    final int sourceVertexId = wg.getTransactionSourceVertex(transactionId);
                    final int destinationVertexId = wg.getTransactionDestinationVertex(transactionId);
                    final ZonedDateTime datetime = wg.getObjectValue(transactionDatetimeAttributeId, transactionId);
                    matched.forEach(word -> {
                        final int newVertexId = wg.addVertex();
                        wg.setStringValue(vertexIdentifierAttributeId, newVertexId, word);
                        wg.setObjectValue(vertexTypeAttributeId, newVertexId, AnalyticConcept.VertexType.WORD);
                        final int newTransactionId = outgoing ? wg.addTransaction(sourceVertexId, newVertexId, true) : wg.addTransaction(newVertexId, destinationVertexId, true);
                        wg.setObjectValue(transactionDatetimeAttributeId, newTransactionId, datetime);
                        wg.setObjectValue(transactionTypeAttributeId, newTransactionId, AnalyticConcept.TransactionType.REFERENCED);
                        wg.setStringValue(transactionContentAttributeId, newTransactionId, content);
                    });
                }
            }
        }
    // End of regexOnly.
    } else {
        // The original logic.
        final List<Pattern> patterns = patternsFromWords(words, useRegex, wholeWordOnly);
        /*
             Iterating over all the transactions in the graph
             */
        final List<String> foundWords = new ArrayList<>();
        for (int transactionPosition = 0; transactionPosition < transactionCount; transactionPosition++) {
            foundWords.clear();
            final int transactionId = wg.getTransaction(transactionPosition);
            final boolean selectedTx = wg.getBooleanValue(transactionSelectedAttributeId, transactionId);
            if (selectedOnly && !selectedTx) {
                continue;
            }
            String content = wg.getStringValue(transactionContentAttributeId, transactionId);
            /*
                 Does the transaction have content?
                 */
            if (StringUtils.isBlank(content)) {
                continue;
            }
            /*
                 Ignore other "referenced" transactions because that's not useful
                 */
            if (wg.getObjectValue(transactionTypeAttributeId, transactionId) != null && wg.getObjectValue(transactionTypeAttributeId, transactionId).equals(AnalyticConcept.TransactionType.REFERENCED)) {
                continue;
            }
            /*
                 Retrieving information needed to create new transactions
                 */
            final int sourceVertexId = wg.getTransactionSourceVertex(transactionId);
            final int destinationVertexId = wg.getTransactionDestinationVertex(transactionId);
            final ZonedDateTime datetime = wg.getObjectValue(transactionDatetimeAttributeId, transactionId);
            final HashSet<String> typesExtracted = new HashSet<>();
            /*
                 Extracting Schema Types
                 */
            if (types) {
                final List<ExtractedVertexType> extractedTypes = SchemaVertexTypeUtilities.extractVertexTypes(content);
                final Map<String, SchemaVertexType> identifiers = new HashMap<>();
                extractedTypes.forEach(extractedType -> identifiers.put(extractedType.getIdentifier(), extractedType.getType()));
                for (String identifier : identifiers.keySet()) {
                    final int newVertexId = wg.addVertex();
                    wg.setStringValue(vertexIdentifierAttributeId, newVertexId, identifier);
                    wg.setObjectValue(vertexTypeAttributeId, newVertexId, identifiers.get(identifier));
                    final int newTransactionId = outgoing ? wg.addTransaction(sourceVertexId, newVertexId, true) : wg.addTransaction(newVertexId, destinationVertexId, true);
                    wg.setObjectValue(transactionDatetimeAttributeId, newTransactionId, datetime);
                    wg.setObjectValue(transactionTypeAttributeId, newTransactionId, AnalyticConcept.TransactionType.REFERENCED);
                    wg.setStringValue(transactionContentAttributeId, newTransactionId, content);
                    typesExtracted.add(identifier.toLowerCase());
                }
            }
            if (StringUtils.isBlank(words)) {
                /*
                     Extracting all words of the specified length if no word list has been provided
                     */
                for (String word : content.split(" ")) {
                    if (toLowerCase) {
                        word = word.toLowerCase();
                    }
                    if (removeSpecialChars) {
                        word = word.replaceAll("\\W", "");
                    }
                    if (word.length() < wordLength) {
                        continue;
                    }
                    foundWords.add(word);
                }
            } else {
                patterns.stream().map(pattern -> pattern.matcher(content)).forEach(matcher -> {
                    while (matcher.find()) {
                        final String g = matcher.group();
                        foundWords.add(toLowerCase ? g.toLowerCase() : g);
                    }
                });
            }
            /*
                 Add words to graph
                 */
            for (String word : foundWords) {
                if (types && typesExtracted.contains(word.toLowerCase())) {
                    continue;
                }
                final int newVertexId = wg.addVertex();
                wg.setStringValue(vertexIdentifierAttributeId, newVertexId, word);
                wg.setObjectValue(vertexTypeAttributeId, newVertexId, AnalyticConcept.VertexType.WORD);
                final int newTransactionId = outgoing ? wg.addTransaction(sourceVertexId, newVertexId, true) : wg.addTransaction(newVertexId, destinationVertexId, true);
                wg.setObjectValue(transactionDatetimeAttributeId, newTransactionId, datetime);
                wg.setObjectValue(transactionTypeAttributeId, newTransactionId, AnalyticConcept.TransactionType.REFERENCED);
                wg.setStringValue(transactionContentAttributeId, newTransactionId, content);
            }
        }
    }
    PluginExecutor.startWith(VisualSchemaPluginRegistry.COMPLETE_SCHEMA).followedBy(InteractiveGraphPluginRegistry.RESET_VIEW).executeNow(wg);
    interaction.setProgress(1, 0, "Completed successfully", true);
}
Also used : ReadableGraph(au.gov.asd.tac.constellation.graph.ReadableGraph) StringParameterType(au.gov.asd.tac.constellation.plugins.parameters.types.StringParameterType) SingleChoiceParameterType(au.gov.asd.tac.constellation.plugins.parameters.types.SingleChoiceParameterType) ZonedDateTime(java.time.ZonedDateTime) StringAttributeDescription(au.gov.asd.tac.constellation.graph.attribute.StringAttributeDescription) PluginType(au.gov.asd.tac.constellation.plugins.PluginType) StringUtils(org.apache.commons.lang3.StringUtils) DataAccessPlugin(au.gov.asd.tac.constellation.views.dataaccess.plugins.DataAccessPlugin) Map(java.util.Map) PluginExecutor(au.gov.asd.tac.constellation.plugins.PluginExecutor) StringParameterValue(au.gov.asd.tac.constellation.plugins.parameters.types.StringParameterValue) InteractiveGraphPluginRegistry(au.gov.asd.tac.constellation.graph.interaction.InteractiveGraphPluginRegistry) SeparatorConstants(au.gov.asd.tac.constellation.utilities.text.SeparatorConstants) BooleanParameterValue(au.gov.asd.tac.constellation.plugins.parameters.types.BooleanParameterType.BooleanParameterValue) Set(java.util.Set) PluginNotificationLevel(au.gov.asd.tac.constellation.plugins.PluginNotificationLevel) PluginInfo(au.gov.asd.tac.constellation.plugins.PluginInfo) List(java.util.List) NotifyDescriptor(org.openide.NotifyDescriptor) IntegerParameterType(au.gov.asd.tac.constellation.plugins.parameters.types.IntegerParameterType) VisualSchemaPluginRegistry(au.gov.asd.tac.constellation.graph.schema.visual.VisualSchemaPluginRegistry) Pattern(java.util.regex.Pattern) Messages(org.openide.util.NbBundle.Messages) GraphWriteMethods(au.gov.asd.tac.constellation.graph.GraphWriteMethods) SchemaVertexTypeUtilities(au.gov.asd.tac.constellation.graph.schema.type.SchemaVertexTypeUtilities) IntegerParameterValue(au.gov.asd.tac.constellation.plugins.parameters.types.IntegerParameterType.IntegerParameterValue) ParameterChange(au.gov.asd.tac.constellation.plugins.parameters.ParameterChange) HashMap(java.util.HashMap) VisualConcept(au.gov.asd.tac.constellation.graph.schema.visual.concept.VisualConcept) ArrayList(java.util.ArrayList) Graph(au.gov.asd.tac.constellation.graph.Graph) HashSet(java.util.HashSet) Plugin(au.gov.asd.tac.constellation.plugins.Plugin) PluginInteraction(au.gov.asd.tac.constellation.plugins.PluginInteraction) ServiceProviders(org.openide.util.lookup.ServiceProviders) PluginParameter(au.gov.asd.tac.constellation.plugins.parameters.PluginParameter) ServiceProvider(org.openide.util.lookup.ServiceProvider) PluginTags(au.gov.asd.tac.constellation.plugins.templates.PluginTags) PluginParameters(au.gov.asd.tac.constellation.plugins.parameters.PluginParameters) ContentConcept(au.gov.asd.tac.constellation.graph.schema.analytic.concept.ContentConcept) GraphElementType(au.gov.asd.tac.constellation.graph.GraphElementType) DialogDisplayer(org.openide.DialogDisplayer) ExtractedVertexType(au.gov.asd.tac.constellation.graph.schema.type.SchemaVertexTypeUtilities.ExtractedVertexType) PluginException(au.gov.asd.tac.constellation.plugins.PluginException) BooleanParameterType(au.gov.asd.tac.constellation.plugins.parameters.types.BooleanParameterType) SchemaVertexType(au.gov.asd.tac.constellation.graph.schema.type.SchemaVertexType) AnalyticConcept(au.gov.asd.tac.constellation.graph.schema.analytic.concept.AnalyticConcept) TemporalConcept(au.gov.asd.tac.constellation.graph.schema.analytic.concept.TemporalConcept) SimpleQueryPlugin(au.gov.asd.tac.constellation.plugins.templates.SimpleQueryPlugin) DataAccessPluginCoreType(au.gov.asd.tac.constellation.views.dataaccess.plugins.DataAccessPluginCoreType) SingleChoiceParameterValue(au.gov.asd.tac.constellation.plugins.parameters.types.SingleChoiceParameterType.SingleChoiceParameterValue) Pattern(java.util.regex.Pattern) SchemaVertexType(au.gov.asd.tac.constellation.graph.schema.type.SchemaVertexType) HashMap(java.util.HashMap) PluginException(au.gov.asd.tac.constellation.plugins.PluginException) ArrayList(java.util.ArrayList) ExtractedVertexType(au.gov.asd.tac.constellation.graph.schema.type.SchemaVertexTypeUtilities.ExtractedVertexType) NotifyDescriptor(org.openide.NotifyDescriptor) ZonedDateTime(java.time.ZonedDateTime) PluginParameter(au.gov.asd.tac.constellation.plugins.parameters.PluginParameter) HashSet(java.util.HashSet)

Aggregations

SchemaVertexType (au.gov.asd.tac.constellation.graph.schema.type.SchemaVertexType)2 ExtractedVertexType (au.gov.asd.tac.constellation.graph.schema.type.SchemaVertexTypeUtilities.ExtractedVertexType)2 PluginException (au.gov.asd.tac.constellation.plugins.PluginException)2 PluginParameter (au.gov.asd.tac.constellation.plugins.parameters.PluginParameter)2 HashMap (java.util.HashMap)2 Graph (au.gov.asd.tac.constellation.graph.Graph)1 GraphElementType (au.gov.asd.tac.constellation.graph.GraphElementType)1 GraphWriteMethods (au.gov.asd.tac.constellation.graph.GraphWriteMethods)1 ReadableGraph (au.gov.asd.tac.constellation.graph.ReadableGraph)1 StringAttributeDescription (au.gov.asd.tac.constellation.graph.attribute.StringAttributeDescription)1 InteractiveGraphPluginRegistry (au.gov.asd.tac.constellation.graph.interaction.InteractiveGraphPluginRegistry)1 GraphRecordStore (au.gov.asd.tac.constellation.graph.processing.GraphRecordStore)1 RecordStore (au.gov.asd.tac.constellation.graph.processing.RecordStore)1 AnalyticConcept (au.gov.asd.tac.constellation.graph.schema.analytic.concept.AnalyticConcept)1 ContentConcept (au.gov.asd.tac.constellation.graph.schema.analytic.concept.ContentConcept)1 TemporalConcept (au.gov.asd.tac.constellation.graph.schema.analytic.concept.TemporalConcept)1 SchemaVertexTypeUtilities (au.gov.asd.tac.constellation.graph.schema.type.SchemaVertexTypeUtilities)1 VisualSchemaPluginRegistry (au.gov.asd.tac.constellation.graph.schema.visual.VisualSchemaPluginRegistry)1 VisualConcept (au.gov.asd.tac.constellation.graph.schema.visual.concept.VisualConcept)1 Plugin (au.gov.asd.tac.constellation.plugins.Plugin)1