use of au.gov.asd.tac.constellation.graph.schema.type.SchemaVertexTypeUtilities.ExtractedVertexType in project constellation by constellation-app.
the class ExtractTypesFromTextPlugin method query.
@Override
protected RecordStore query(final RecordStore query, final PluginInteraction interaction, final PluginParameters parameters) throws InterruptedException, PluginException {
final RecordStore result = new GraphRecordStore();
interaction.setProgress(0, 0, "Importing...", true);
final Map<String, PluginParameter<?>> extractEntityParameters = parameters.getParameters();
final String text = extractEntityParameters.get(TEXT_PARAMETER_ID).getStringValue();
if (text == null) {
throw new PluginException(PluginNotificationLevel.ERROR, "No text provided from which to extract types.");
}
final List<ExtractedVertexType> extractedTypes = SchemaVertexTypeUtilities.extractVertexTypes(text);
final Map<String, SchemaVertexType> identifiers = new HashMap<>();
extractedTypes.forEach(extractedType -> identifiers.put(extractedType.getIdentifier(), extractedType.getType()));
for (final String identifier : identifiers.keySet()) {
result.add();
result.set(GraphRecordStoreUtilities.SOURCE + VisualConcept.VertexAttribute.IDENTIFIER, identifier);
result.set(GraphRecordStoreUtilities.SOURCE + AnalyticConcept.VertexAttribute.TYPE, identifiers.get(identifier));
result.set(GraphRecordStoreUtilities.SOURCE + AnalyticConcept.VertexAttribute.SEED, "true");
}
ConstellationLoggerHelper.createPropertyBuilder(this, result.getAll(GraphRecordStoreUtilities.SOURCE + VisualConcept.VertexAttribute.IDENTIFIER), ConstellationLoggerHelper.SUCCESS);
interaction.setProgress(1, 0, "Completed successfully - imported " + result.size() + " entities.", true);
return result;
}
use of au.gov.asd.tac.constellation.graph.schema.type.SchemaVertexTypeUtilities.ExtractedVertexType in project constellation by constellation-app.
the class ExtractWordsFromTextPlugin method edit.
@Override
public void edit(final GraphWriteMethods wg, final PluginInteraction interaction, final PluginParameters parameters) throws InterruptedException, PluginException {
interaction.setProgress(0, 0, "Extracting...", true);
/*
Retrieving attributes
*/
final Map<String, PluginParameter<?>> extractEntityParameters = parameters.getParameters();
final String contentAttribute = extractEntityParameters.get(ATTRIBUTE_PARAMETER_ID).getStringValue();
final String words = extractEntityParameters.get(WORDS_PARAMETER_ID).getStringValue() == null ? null : extractEntityParameters.get(WORDS_PARAMETER_ID).getStringValue().trim();
final boolean useRegex = extractEntityParameters.get(USE_REGEX_PARAMETER_ID).getBooleanValue();
final boolean wholeWordOnly = extractEntityParameters.get(WHOLE_WORDS_ONLY_PARAMETER_ID).getBooleanValue();
final int wordLength = parameters.getParameters().get(MIN_WORD_LENGTH_PARAMETER_ID).getIntegerValue();
final boolean removeSpecialChars = extractEntityParameters.get(REMOVE_SPECIAL_CHARS_PARAMETER_ID).getBooleanValue();
final boolean toLowerCase = extractEntityParameters.get(LOWER_CASE_PARAMETER_ID).getBooleanValue();
final boolean types = extractEntityParameters.get(SCHEMA_TYPES_PARAMETER_ID).getBooleanValue();
final String inOrOut = extractEntityParameters.get(IN_OR_OUT_PARAMETER_ID).getStringValue();
final boolean selectedOnly = extractEntityParameters.get(SELECTED_ONLY_PARAMETER_ID).getBooleanValue();
final boolean regexOnly = extractEntityParameters.get(REGEX_ONLY_PARAMETER_ID).getBooleanValue();
if (!OUTGOING.equals(inOrOut) && !INCOMING.equals(inOrOut)) {
var msg = String.format("Parameter %s must be '%s' or '%s'", REGEX_ONLY_PARAMETER_ID, OUTGOING, INCOMING);
throw new PluginException(PluginNotificationLevel.ERROR, msg);
}
final boolean outgoing = OUTGOING.equals(inOrOut);
/*
Retrieving attribute IDs
*/
final int vertexIdentifierAttributeId = VisualConcept.VertexAttribute.IDENTIFIER.ensure(wg);
final int vertexTypeAttributeId = AnalyticConcept.VertexAttribute.TYPE.ensure(wg);
final int transactionTypeAttributeId = AnalyticConcept.TransactionAttribute.TYPE.ensure(wg);
final int transactionDatetimeAttributeId = TemporalConcept.TransactionAttribute.DATETIME.ensure(wg);
final int transactionContentAttributeId = wg.getAttribute(GraphElementType.TRANSACTION, contentAttribute);
final int transactionSelectedAttributeId = VisualConcept.TransactionAttribute.SELECTED.ensure(wg);
//
if (transactionContentAttributeId == Graph.NOT_FOUND) {
final NotifyDescriptor nd = new NotifyDescriptor.Message(String.format("The specified attribute %s does not exist.", contentAttribute), NotifyDescriptor.WARNING_MESSAGE);
DialogDisplayer.getDefault().notify(nd);
return;
}
final int transactionCount = wg.getTransactionCount();
if (regexOnly) {
// This choice ignores several other parameters, so is a bit simpler
// even if there code commonalities, but combining the if/else
// code would make things even more complex.
//
// The input words are treated as trusted regular expressions,
// so the caller has to know what they're doing.
// This is power-use mode.
//
// Each line of the input words is a regex.
// Use them as-is for the power users.
//
final List<Pattern> patterns = new ArrayList<>();
if (StringUtils.isNotBlank(words)) {
for (String word : words.split(SeparatorConstants.NEWLINE)) {
word = word.strip();
if (!word.isEmpty()) {
final Pattern pattern = Pattern.compile(word);
patterns.add(pattern);
}
}
}
if (!patterns.isEmpty()) {
// Use a set to hold the words.
// If a word is found multiple times, there's no point adding multiple nodes.
//
final Set<String> matched = new HashSet<>();
//
for (int transactionPosition = 0; transactionPosition < transactionCount; transactionPosition++) {
final int transactionId = wg.getTransaction(transactionPosition);
final boolean selectedTx = wg.getBooleanValue(transactionSelectedAttributeId, transactionId);
if (selectedOnly && !selectedTx) {
continue;
}
final String content = wg.getStringValue(transactionContentAttributeId, transactionId);
/*
Does the transaction have content?
*/
if (StringUtils.isBlank(content)) {
continue;
}
/*
Ignore other "referenced" transactions because that's not useful
*/
if (wg.getObjectValue(transactionTypeAttributeId, transactionId) != null && wg.getObjectValue(transactionTypeAttributeId, transactionId).equals(AnalyticConcept.TransactionType.REFERENCED)) {
continue;
}
patterns.stream().map(pattern -> pattern.matcher(content)).forEach(matcher -> {
while (matcher.find()) {
if (matcher.groupCount() == 0) {
// The regex doesn't have an explicit capture group, so capture the lot.
//
final String g = matcher.group();
matched.add(toLowerCase ? g.toLowerCase() : g);
} else {
//
for (int i = 1; i <= matcher.groupCount(); i++) {
final String g = matcher.group(i);
matched.add(toLowerCase ? g.toLowerCase() : g);
}
}
}
});
//
if (!matched.isEmpty()) {
/*
Retrieving information needed to create new transactions
*/
final int sourceVertexId = wg.getTransactionSourceVertex(transactionId);
final int destinationVertexId = wg.getTransactionDestinationVertex(transactionId);
final ZonedDateTime datetime = wg.getObjectValue(transactionDatetimeAttributeId, transactionId);
matched.forEach(word -> {
final int newVertexId = wg.addVertex();
wg.setStringValue(vertexIdentifierAttributeId, newVertexId, word);
wg.setObjectValue(vertexTypeAttributeId, newVertexId, AnalyticConcept.VertexType.WORD);
final int newTransactionId = outgoing ? wg.addTransaction(sourceVertexId, newVertexId, true) : wg.addTransaction(newVertexId, destinationVertexId, true);
wg.setObjectValue(transactionDatetimeAttributeId, newTransactionId, datetime);
wg.setObjectValue(transactionTypeAttributeId, newTransactionId, AnalyticConcept.TransactionType.REFERENCED);
wg.setStringValue(transactionContentAttributeId, newTransactionId, content);
});
}
}
}
// End of regexOnly.
} else {
// The original logic.
final List<Pattern> patterns = patternsFromWords(words, useRegex, wholeWordOnly);
/*
Iterating over all the transactions in the graph
*/
final List<String> foundWords = new ArrayList<>();
for (int transactionPosition = 0; transactionPosition < transactionCount; transactionPosition++) {
foundWords.clear();
final int transactionId = wg.getTransaction(transactionPosition);
final boolean selectedTx = wg.getBooleanValue(transactionSelectedAttributeId, transactionId);
if (selectedOnly && !selectedTx) {
continue;
}
String content = wg.getStringValue(transactionContentAttributeId, transactionId);
/*
Does the transaction have content?
*/
if (StringUtils.isBlank(content)) {
continue;
}
/*
Ignore other "referenced" transactions because that's not useful
*/
if (wg.getObjectValue(transactionTypeAttributeId, transactionId) != null && wg.getObjectValue(transactionTypeAttributeId, transactionId).equals(AnalyticConcept.TransactionType.REFERENCED)) {
continue;
}
/*
Retrieving information needed to create new transactions
*/
final int sourceVertexId = wg.getTransactionSourceVertex(transactionId);
final int destinationVertexId = wg.getTransactionDestinationVertex(transactionId);
final ZonedDateTime datetime = wg.getObjectValue(transactionDatetimeAttributeId, transactionId);
final HashSet<String> typesExtracted = new HashSet<>();
/*
Extracting Schema Types
*/
if (types) {
final List<ExtractedVertexType> extractedTypes = SchemaVertexTypeUtilities.extractVertexTypes(content);
final Map<String, SchemaVertexType> identifiers = new HashMap<>();
extractedTypes.forEach(extractedType -> identifiers.put(extractedType.getIdentifier(), extractedType.getType()));
for (String identifier : identifiers.keySet()) {
final int newVertexId = wg.addVertex();
wg.setStringValue(vertexIdentifierAttributeId, newVertexId, identifier);
wg.setObjectValue(vertexTypeAttributeId, newVertexId, identifiers.get(identifier));
final int newTransactionId = outgoing ? wg.addTransaction(sourceVertexId, newVertexId, true) : wg.addTransaction(newVertexId, destinationVertexId, true);
wg.setObjectValue(transactionDatetimeAttributeId, newTransactionId, datetime);
wg.setObjectValue(transactionTypeAttributeId, newTransactionId, AnalyticConcept.TransactionType.REFERENCED);
wg.setStringValue(transactionContentAttributeId, newTransactionId, content);
typesExtracted.add(identifier.toLowerCase());
}
}
if (StringUtils.isBlank(words)) {
/*
Extracting all words of the specified length if no word list has been provided
*/
for (String word : content.split(" ")) {
if (toLowerCase) {
word = word.toLowerCase();
}
if (removeSpecialChars) {
word = word.replaceAll("\\W", "");
}
if (word.length() < wordLength) {
continue;
}
foundWords.add(word);
}
} else {
patterns.stream().map(pattern -> pattern.matcher(content)).forEach(matcher -> {
while (matcher.find()) {
final String g = matcher.group();
foundWords.add(toLowerCase ? g.toLowerCase() : g);
}
});
}
/*
Add words to graph
*/
for (String word : foundWords) {
if (types && typesExtracted.contains(word.toLowerCase())) {
continue;
}
final int newVertexId = wg.addVertex();
wg.setStringValue(vertexIdentifierAttributeId, newVertexId, word);
wg.setObjectValue(vertexTypeAttributeId, newVertexId, AnalyticConcept.VertexType.WORD);
final int newTransactionId = outgoing ? wg.addTransaction(sourceVertexId, newVertexId, true) : wg.addTransaction(newVertexId, destinationVertexId, true);
wg.setObjectValue(transactionDatetimeAttributeId, newTransactionId, datetime);
wg.setObjectValue(transactionTypeAttributeId, newTransactionId, AnalyticConcept.TransactionType.REFERENCED);
wg.setStringValue(transactionContentAttributeId, newTransactionId, content);
}
}
}
PluginExecutor.startWith(VisualSchemaPluginRegistry.COMPLETE_SCHEMA).followedBy(InteractiveGraphPluginRegistry.RESET_VIEW).executeNow(wg);
interaction.setProgress(1, 0, "Completed successfully", true);
}
Aggregations