Search in sources :

Example 1 with Statement

use of dna.dataStructures.Statement in project dna by leifeld.

the class Exporter method computeTimeWindowMatrices.

/**
 * Create a series of one-mode or two-mode networks using a moving time window.
 *
 * @param statements            A (potentially filtered) {@link ArrayList} of {@link Statement}s.
 * @param documents             An {@link ArrayList} of {@link Document}s which contain the statements.
 * @param statementType         The {@link StatementType} corresponding to the statements.
 * @param var1                  {@link String} denoting the first variable (containing the row values).
 * @param var2                  {@link String} denoting the second variable (containing the columns values).
 * @param var1Document          {@link boolean} indicating whether the first variable is a document-level variable.
 * @param var2Document          {@link boolean} indicating whether the second variable is a document-level variable.
 * @param names1                {@link String} array containing the row labels.
 * @param names2                {@link String} array containing the column labels.
 * @param qualifier             {@link String} denoting the name of the qualifier variable.
 * @param qualifierAggregation  {@link String} indicating how different levels of the qualifier variable are aggregated. Valid values are "ignore", "subtract", and "combine".
 * @param normalization         {@link String} indicating what type of normalization will be used. Valid values are "no", "average activity", "Jaccard", and "cosine".
 * @param twoMode               Create two-mode networks? If false, one-mode networks are created.
 * @param start                 Start date of the time range over which the time window moves.
 * @param stop                  End date of the time range over which the time window moves.
 * @param unitType              {@link String} indicating the kind of temporal unit used for the moving window. Valid values are "using seconds", "using minutes", "using hours", "using days", "using weeks", "using months", "using years", and "using events".
 * @param timeUnits             How large is the time window? E.g., 100 days, where "days" are defined in the unit type argument.
 * @param includeIsolates       Boolean indicating whether all nodes should be present at all times
 * @return                      {@link Matrix} object containing a one-mode network matrix.
 */
private ArrayList<Matrix> computeTimeWindowMatrices(ArrayList<Statement> statements, ArrayList<Document> documents, StatementType statementType, String var1, String var2, boolean var1Document, boolean var2Document, String[] names1, String[] names2, String qualifier, String qualifierAggregation, String normalization, boolean twoMode, Date start, Date stop, String unitType, int timeUnits, boolean includeIsolates) {
    timeWindowMatrices = new ArrayList<Matrix>();
    int statementIterator = timeUnits;
    GregorianCalendar stopCalendar = new GregorianCalendar();
    stopCalendar.setTime(stop);
    timeLabels = new ArrayList<Date>();
    GregorianCalendar currentStop = new GregorianCalendar();
    currentStop.setTime(start);
    GregorianCalendar currentStart = (GregorianCalendar) currentStop.clone();
    if (unitType.equals("using seconds")) {
        currentStop.add(Calendar.SECOND, timeUnits);
    } else if (unitType.equals("using minutes")) {
        currentStop.add(Calendar.MINUTE, timeUnits);
    } else if (unitType.equals("using hours")) {
        currentStop.add(Calendar.HOUR_OF_DAY, timeUnits);
    } else if (unitType.equals("using days")) {
        currentStop.add(Calendar.DAY_OF_MONTH, timeUnits);
    } else if (unitType.equals("using weeks")) {
        currentStop.add(Calendar.WEEK_OF_YEAR, timeUnits);
    } else if (unitType.equals("using months")) {
        currentStop.add(Calendar.MONTH, timeUnits);
    } else if (unitType.equals("using years")) {
        currentStop.add(Calendar.YEAR, timeUnits);
    } else if (unitType.equals("using events")) {
        if (statementIterator >= statements.size()) {
            currentStop.setTime(statements.get(statements.size() - 1).getDate());
        } else {
            currentStop.setTime(statements.get(statementIterator).getDate());
        }
    }
    while (!currentStop.after(stopCalendar)) {
        ArrayList<Statement> currentStatements = new ArrayList<Statement>();
        for (int i = 0; i < statements.size(); i++) {
            GregorianCalendar currentTime = new GregorianCalendar();
            currentTime.setTime(statements.get(i).getDate());
            if (!currentTime.before(currentStart) && !currentTime.after(currentStop)) {
                currentStatements.add(statements.get(i));
            }
        }
        if (includeIsolates == false) {
            names1 = extractLabels(currentStatements, statements, documents, var1, var1Document, statementType.getId(), includeIsolates);
            names2 = extractLabels(currentStatements, statements, documents, var2, var2Document, statementType.getId(), includeIsolates);
        }
        if (twoMode == true) {
            boolean verbose;
            verbose = true;
            timeWindowMatrices.add(computeTwoModeMatrix(currentStatements, documents, statementType, var1, var2, var1Document, var2Document, names1, names2, qualifier, qualifierAggregation, normalization, verbose));
        } else {
            timeWindowMatrices.add(computeOneModeMatrix(currentStatements, documents, statementType, var1, var2, var1Document, var2Document, names1, names2, qualifier, qualifierAggregation, normalization));
        }
        timeLabels.add(currentStop.getTime());
        if (unitType.equals("using seconds")) {
            currentStart.add(Calendar.SECOND, 1);
            currentStop.add(Calendar.SECOND, 1);
        } else if (unitType.equals("using minutes")) {
            currentStart.add(Calendar.MINUTE, 1);
            currentStop.add(Calendar.MINUTE, 1);
        } else if (unitType.equals("using hours")) {
            currentStart.add(Calendar.HOUR_OF_DAY, 1);
            currentStop.add(Calendar.HOUR_OF_DAY, 1);
        } else if (unitType.equals("using days")) {
            currentStart.add(Calendar.DAY_OF_MONTH, 1);
            currentStop.add(Calendar.DAY_OF_MONTH, 1);
        } else if (unitType.equals("using weeks")) {
            currentStart.add(Calendar.WEEK_OF_YEAR, 1);
            currentStop.add(Calendar.WEEK_OF_YEAR, 1);
        } else if (unitType.equals("using months")) {
            currentStart.add(Calendar.MONTH, 1);
            currentStop.add(Calendar.MONTH, 1);
        } else if (unitType.equals("using years")) {
            currentStart.add(Calendar.YEAR, 1);
            currentStop.add(Calendar.YEAR, 1);
        } else if (unitType.equals("using events")) {
            if (statementIterator + 1 < statements.size()) {
                statementIterator = statementIterator + 1;
                currentStop.setTime(statements.get(statementIterator).getDate());
            } else {
                // invoke stop of while loop
                currentStop.add(Calendar.YEAR, 1);
            }
            currentStart.setTime(statements.get(statementIterator - timeUnits).getDate());
        }
    }
    return timeWindowMatrices;
}
Also used : Statement(dna.dataStructures.Statement) GregorianCalendar(java.util.GregorianCalendar) ArrayList(java.util.ArrayList) Date(java.util.Date)

Example 2 with Statement

use of dna.dataStructures.Statement in project dna by leifeld.

the class Exporter method retrieveValues.

/**
 * Retrieve the values across statements/documents given the name of the variable.
 * E.g., provide a list of statements, a list of documents, a variable name, and
 * information on whether the variable is defined at the document level (e.g.,
 * author or section) or at the statement level (e.g., organization), and return
 * a one-dimensional array of values (e.g., the organization names or authors for
 * all statements provided.
 *
 * @param statements            A (potentially filtered) {@link ArrayList} of {@link Statement}s.
 * @param documents             An {@link ArrayList} of {@link Document}s which contain the statements.
 * @param variable              {@link String} denoting the first variable (containing the row values).
 * @param documentLevel         {@link boolean} indicating whether the first variable is a document-level variable.
 * @return                      String array of values.
 */
private String[] retrieveValues(ArrayList<Statement> statements, ArrayList<Document> documents, String variable, boolean documentLevel) {
    // HashMap for fast lookup of document indices by ID
    HashMap<Integer, Integer> docMap = new HashMap<Integer, Integer>();
    for (int i = 0; i < documents.size(); i++) {
        docMap.put(documents.get(i).getId(), i);
    }
    Statement s;
    String docAuthor, docSource, docSection, docType;
    String[] values = new String[statements.size()];
    for (int i = 0; i < statements.size(); i++) {
        s = statements.get(i);
        docAuthor = documents.get(docMap.get(s.getDocumentId())).getAuthor();
        docSource = documents.get(docMap.get(s.getDocumentId())).getSource();
        docSection = documents.get(docMap.get(s.getDocumentId())).getSection();
        docType = documents.get(docMap.get(s.getDocumentId())).getType();
        if (documentLevel == true) {
            if (variable.equals("author")) {
                values[i] = docAuthor;
            } else if (variable.equals("source")) {
                values[i] = docSource;
            } else if (variable.equals("section")) {
                values[i] = docSection;
            } else if (variable.equals("type")) {
                values[i] = docType;
            }
        } else {
            values[i] = (String) s.getValues().get(variable);
        }
    }
    return values;
}
Also used : HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Statement(dna.dataStructures.Statement)

Example 3 with Statement

use of dna.dataStructures.Statement in project dna by leifeld.

the class SqlConnection method getAllData.

/**
 * @return     Data object.
 */
public Data getAllData() {
    Data data = new Data();
    data.setSettings(getAllSettings());
    data.setDocuments(getAllDocuments());
    data.setCoders(getAllCoders());
    data.setCoderRelations(getAllCoderRelations());
    data.setRegexes(getAllRegexes());
    data.setStatementLinks(getAllStatementLinks());
    data.setStatementTypes(getAllStatementTypes());
    ArrayList<Statement> statements = new ArrayList<Statement>();
    try {
        String myQuery = "SELECT * FROM STATEMENTS";
        PreparedStatement preStatement = (PreparedStatement) connection.prepareStatement(myQuery);
        ResultSet result = preStatement.executeQuery();
        if (result.next()) {
            do {
                int id = result.getInt("ID");
                int documentId = result.getInt("DocumentId");
                int start = result.getInt("Start");
                int stop = result.getInt("Stop");
                int statementTypeId = result.getInt("StatementTypeId");
                int coder = result.getInt("Coder");
                Date date = data.getDocument(documentId).getDate();
                StatementType st = data.getStatementTypeById(statementTypeId);
                LinkedHashMap<String, Object> values = new LinkedHashMap<String, Object>();
                Iterator<String> keyIterator = st.getVariables().keySet().iterator();
                while (keyIterator.hasNext()) {
                    String key = keyIterator.next();
                    String value = st.getVariables().get(key);
                    String tableExtension = "";
                    if (value.equals("boolean")) {
                        tableExtension = "BOOLEAN";
                    } else if (value.equals("integer")) {
                        tableExtension = "INTEGER";
                    } else if (value.equals("short text")) {
                        tableExtension = "SHORTTEXT";
                    } else if (value.equals("long text")) {
                        tableExtension = "LONGTEXT";
                    }
                    String myQuery2 = "SELECT * FROM DATA" + tableExtension + " WHERE StatementId = " + id + " AND VariableId = (SELECT ID FROM VARIABLES WHERE StatementTypeId = " + statementTypeId + " AND Variable = '" + key + "')";
                    PreparedStatement preStatement2 = (PreparedStatement) connection.prepareStatement(myQuery2);
                    ResultSet result2 = preStatement2.executeQuery();
                    if (result2.next()) {
                        do {
                            values.put(key, result2.getObject("Value"));
                        } while (result2.next());
                    }
                    result2.close();
                    preStatement2.close();
                    if (values.size() == 0 || values.get(key) == null) {
                        // Fix errors here if no statement contents availabe
                        System.err.print("Statement " + id + ": variable \"" + key + "\" was not saved... ");
                        String query = "SELECT ID FROM VARIABLES WHERE (StatementTypeId = " + statementTypeId + " AND Variable = '" + key + "')";
                        int varId = (int) executeQueryForObject(query);
                        String replacementValue = "0";
                        if (value.equals("short text") || value.equals("long text")) {
                            replacementValue = "''";
                        }
                        String statement = "INSERT INTO DATA" + tableExtension + " (StatementId, VariableId, StatementTypeId, Value) " + "Values (" + id + ", " + varId + ", " + statementTypeId + ", " + replacementValue + ")";
                        executeStatement(statement);
                        if (value.equals("short text") || value.equals("long text")) {
                            values.put(key, "");
                        } else {
                            values.put(key, 0);
                        }
                        System.err.println("The problem has been fixed. Please review this statement.");
                    }
                }
                Statement statement = new Statement(id, documentId, start, stop, date, statementTypeId, coder, values);
                statements.add(statement);
            } while (result.next());
        }
        result.close();
        preStatement.close();
    } catch (SQLException e) {
        e.printStackTrace();
    }
    data.setStatements(statements);
    data.setAttributes(getAllAttributes());
    return data;
}
Also used : SQLException(java.sql.SQLException) PreparedStatement(java.sql.PreparedStatement) Statement(dna.dataStructures.Statement) ArrayList(java.util.ArrayList) Data(dna.dataStructures.Data) PreparedStatement(java.sql.PreparedStatement) Date(java.util.Date) LinkedHashMap(java.util.LinkedHashMap) StatementType(dna.dataStructures.StatementType) ResultSet(java.sql.ResultSet)

Example 4 with Statement

use of dna.dataStructures.Statement in project dna by leifeld.

the class DocStatsPanel method computeStats.

public void computeStats() {
    clear();
    int numDocuments = Dna.data.getDocuments().size();
    int numStatements = Dna.dna.gui.rightPanel.statementPanel.ssc.getRowCount();
    // int statementLinks = Dna.dna.gui.rightPanel.linkedTableModel.getRowCount();
    String statText = "Documents: " + numDocuments + "\n" + "Statements: " + numStatements + "\n";
    for (StatementType st : Dna.data.getStatementTypes()) {
        statText = statText + "\n\"" + st.getLabel() + "\" Variables:\n";
        String[] vars = st.getVariables().keySet().toArray(new String[st.getVariables().keySet().size()]);
        ArrayList<Statement> s = Dna.data.getStatementsByStatementTypeId(st.getId());
        for (int j = 0; j < vars.length; j++) {
            ArrayList<Object> varEntries = new ArrayList<Object>();
            for (int i = 0; i < s.size(); i++) {
                if (!varEntries.contains(s.get(i).getValues().get(vars[j]))) {
                    varEntries.add(s.get(i).getValues().get(vars[j]));
                }
            }
            int count = varEntries.size();
            statText = statText + "     " + vars[j] + ": " + count + "\n";
        }
    }
    tf.setEditable(true);
    tf.setText(statText);
    tf.setEditable(false);
}
Also used : Statement(dna.dataStructures.Statement) StatementType(dna.dataStructures.StatementType) ArrayList(java.util.ArrayList)

Example 5 with Statement

use of dna.dataStructures.Statement in project dna by leifeld.

the class Exporter method filter.

/**
 * Return a filtered list of {@link Statement}s based on the settings in the GUI.
 *
 * @return	ArrayList of filtered {@link Statement}s
 */
/**
 * Return a filtered list of {@link Statement}s based on the settings in the GUI.
 *
 * @param statements          {@link ArrayList} of {@link Statement}s to be filtered.
 * @param startDate           {@link Date} object indicating the start date
 * @param stopDate            {@link Date} object indicating the end date
 * @param statementType       {@link StatementType} to which the export is restricted
 * @param var1                {@link String} indicating the first variable used for network construction, e.g., "organization"
 * @param var2                {@link String} indicating the second variable used for network construction, e.g., "concept"
 * @param var1Document        {@link boolean} indicating if the var1 variable is a document-level variable (as opposed to statement-level)
 * @param var2Document        {@link boolean} indicating if the var2 variable is a document-level variable (as opposed to statement-level)
 * @param qualifierName       {@link String} indicating the qualifier variable, e.g., "agreement"
 * @param ignoreQualifier     {@link boolean} indicating whether the qualifier variable should be ignored
 * @param duplicateSetting    {@link String} indicating how to handle duplicates; valid settings include "include all duplicates", "ignore per document", "ignore per calendar week", "ignore per calendar month", "ignore per calendar year", or "ignore across date range"
 * @param excludeAuthor       {@link ArrayList} with {@link String}s containing document authors to exclude
 * @param excludeSource       {@link ArrayList} with {@link String}s containing document sources to exclude
 * @param excludeSection      {@link ArrayList} with {@link String}s containing document sections to exclude
 * @param excludeType         {@link ArrayList} with {@link String}s containing document types to exclude
 * @param excludeValues       {@link HashMap} with {@link String}s as keys (indicating the variable for which entries should be excluded from export) and {@link HashMap}s of {@link String}s (containing variable entries to exclude from network export)
 * @param filterEmptyFields   {@link boolean} indicating whether empty fields (i.e., "") should be excluded
 * @return                    {@link ArrayList} of filtered {@link Statement}s
 */
private ArrayList<Statement> filter(ArrayList<Statement> statements, ArrayList<Document> documents, Date startDate, Date stopDate, StatementType statementType, String var1, String var2, boolean var1Document, boolean var2Document, String qualifierName, boolean ignoreQualifier, String duplicateSetting, ArrayList<String> excludeAuthor, ArrayList<String> excludeSource, ArrayList<String> excludeSection, ArrayList<String> excludeType, HashMap<String, ArrayList<String>> excludeValues, boolean filterEmptyFields, boolean verbose) {
    // sort statements by date and time
    Collections.sort(statements);
    // reporting
    Iterator<String> excludeIterator = excludeValues.keySet().iterator();
    while (excludeIterator.hasNext()) {
        String key = excludeIterator.next();
        ArrayList<String> values = excludeValues.get(key);
        if (verbose == true) {
            for (int i = 0; i < values.size(); i++) {
                System.out.println("[Excluded] " + key + ": " + values.get(i));
            }
        }
    }
    if (verbose == true) {
        for (int i = 0; i < excludeAuthor.size(); i++) {
            System.out.println("[Excluded] author: " + excludeAuthor.get(i));
        }
        for (int i = 0; i < excludeSource.size(); i++) {
            System.out.println("[Excluded] source: " + excludeSource.get(i));
        }
        for (int i = 0; i < excludeSection.size(); i++) {
            System.out.println("[Excluded] section: " + excludeSection.get(i));
        }
        for (int i = 0; i < excludeType.size(); i++) {
            System.out.println("[Excluded] type: " + excludeType.get(i));
        }
    }
    // HashMap for fast lookup of document indices by ID
    HashMap<Integer, Integer> docMap = new HashMap<Integer, Integer>();
    for (int i = 0; i < documents.size(); i++) {
        docMap.put(documents.get(i).getId(), i);
    }
    // Create arrays with variable values
    String[] values1 = retrieveValues(statements, documents, var1, var1Document);
    String[] values2 = retrieveValues(statements, documents, var2, var2Document);
    // process and exclude statements
    Statement s;
    ArrayList<Statement> al = new ArrayList<Statement>();
    String previousVar1 = null;
    String previousVar2 = null;
    Calendar cal, calPrevious;
    int year, month, week, yearPrevious, monthPrevious, weekPrevious;
    for (int i = 0; i < statements.size(); i++) {
        boolean select = true;
        s = statements.get(i);
        // step 1: get all statement IDs corresponding to date range and statement type
        if (s.getDate().before(startDate)) {
            select = false;
        } else if (s.getDate().after(stopDate)) {
            select = false;
        } else if (s.getStatementTypeId() != statementType.getId()) {
            select = false;
        }
        // step 2: check against excluded values
        if (excludeAuthor.contains(documents.get(docMap.get(s.getDocumentId())).getAuthor())) {
            select = false;
        } else if (excludeSource.contains(documents.get(docMap.get(s.getDocumentId())).getSource())) {
            select = false;
        } else if (excludeSection.contains(documents.get(docMap.get(s.getDocumentId())).getSection())) {
            select = false;
        } else if (excludeType.contains(documents.get(docMap.get(s.getDocumentId())).getType())) {
            select = false;
        }
        if (select == true) {
            Iterator<String> keyIterator = excludeValues.keySet().iterator();
            while (keyIterator.hasNext()) {
                String key = keyIterator.next();
                String string = "";
                if (statementType.getVariables().get(key) == null) {
                    throw new NullPointerException("'" + key + "' is not a statement-level variable and cannot be excluded.");
                } else if (statementType.getVariables().get(key).equals("boolean") || statementType.getVariables().get(key).equals("integer")) {
                    string = String.valueOf(s.getValues().get(key));
                } else {
                    string = (String) s.getValues().get(key);
                }
                if (excludeValues.get(key).contains(string)) {
                    select = false;
                }
            }
        }
        // step 3: check against empty fields
        if (select == true) {
            if (values1[i].equals("") || values2[i].equals("")) {
                if (filterEmptyFields == true) {
                    select = false;
                }
            }
        }
        // step 4: check for duplicates
        cal = Calendar.getInstance();
        cal.setTime(s.getDate());
        year = cal.get(Calendar.YEAR);
        month = cal.get(Calendar.MONTH);
        week = cal.get(Calendar.WEEK_OF_YEAR);
        if (!duplicateSetting.equals("include all duplicates")) {
            for (int j = al.size() - 1; j >= 0; j--) {
                if (var1Document == false) {
                    previousVar1 = (String) al.get(j).getValues().get(var1);
                } else if (var1.equals("author")) {
                    previousVar1 = documents.get(docMap.get(al.get(j).getDocumentId())).getAuthor();
                } else if (var1.equals("source")) {
                    previousVar1 = documents.get(docMap.get(al.get(j).getDocumentId())).getSource();
                } else if (var1.equals("section")) {
                    previousVar1 = documents.get(docMap.get(al.get(j).getDocumentId())).getSection();
                } else if (var1.equals("type")) {
                    previousVar1 = documents.get(docMap.get(al.get(j).getDocumentId())).getType();
                }
                if (var2Document == false) {
                    previousVar2 = (String) al.get(j).getValues().get(var2);
                } else if (var2.equals("author")) {
                    previousVar2 = documents.get(docMap.get(al.get(j).getDocumentId())).getAuthor();
                } else if (var2.equals("source")) {
                    previousVar2 = documents.get(docMap.get(al.get(j).getDocumentId())).getSource();
                } else if (var2.equals("section")) {
                    previousVar2 = documents.get(docMap.get(al.get(j).getDocumentId())).getSection();
                } else if (var2.equals("type")) {
                    previousVar2 = documents.get(docMap.get(al.get(j).getDocumentId())).getType();
                }
                calPrevious = Calendar.getInstance();
                calPrevious.setTime(al.get(j).getDate());
                yearPrevious = calPrevious.get(Calendar.YEAR);
                monthPrevious = calPrevious.get(Calendar.MONTH);
                weekPrevious = calPrevious.get(Calendar.WEEK_OF_YEAR);
                if (s.getStatementTypeId() == al.get(j).getStatementTypeId() && (al.get(j).getDocumentId() == s.getDocumentId() && duplicateSetting.equals("ignore per document") || duplicateSetting.equals("ignore across date range") || (duplicateSetting.equals("ignore per calendar year") && year == yearPrevious) || (duplicateSetting.equals("ignore per calendar month") && month == monthPrevious) || (duplicateSetting.equals("ignore per calendar week") && week == weekPrevious)) && values1[i].equals(previousVar1) && values2[i].equals(previousVar2) && (s.getValues().get(qualifierName).equals(al.get(j).getValues().get(qualifierName)) || ignoreQualifier == true)) {
                    select = false;
                    break;
                }
            }
        }
        // step 5: add only if the statement passed all checks
        if (select == true) {
            al.add(s);
        }
    }
    return (al);
}
Also used : HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Statement(dna.dataStructures.Statement) GregorianCalendar(java.util.GregorianCalendar) Calendar(java.util.Calendar) ArrayList(java.util.ArrayList)

Aggregations

Statement (dna.dataStructures.Statement)6 ArrayList (java.util.ArrayList)5 LinkedHashMap (java.util.LinkedHashMap)3 StatementType (dna.dataStructures.StatementType)2 Date (java.util.Date)2 GregorianCalendar (java.util.GregorianCalendar)2 HashMap (java.util.HashMap)2 Data (dna.dataStructures.Data)1 PreparedStatement (java.sql.PreparedStatement)1 ResultSet (java.sql.ResultSet)1 SQLException (java.sql.SQLException)1 Calendar (java.util.Calendar)1 DefaultMutableTreeNode (javax.swing.tree.DefaultMutableTreeNode)1