Source code for app.tests

import json

from app.metrics import CitationNetwork, TreeCitationNetwork
from app.munge import QueryMunger, RootMunger
from app.config import Config
from app.regression import test_regression, test_forecasting

"""
Official API endpoint script. All non-test invocations should run through here for convenience.
"""


[docs]def root_test_single(patent, depth, weighting_keys, bin_size=20):
    """
    The root endpoint constructs a descendant citation tree for one or more patents and calculates metrics for the root.

    :param patent: the patent number
    :type patent: str
    :param depth: the graph search depth
    :type depth: int
    :param bin_size: the bin size in weeks
    :type bin_size: int
    :param weighting_keys: the weighting key to use for knowledge calculation
    :type weighting_keys: list
    """
    munger = RootMunger(patent, depth=depth, limit=Config.DOC_LIMIT)
    cn = TreeCitationNetwork(munger.get_network(), patent, weighting_methods=weighting_keys)
    cn.eval_binned(bin_size, plot=True)
    cn.write_graphml("{}_{}".format(patent, depth))


[docs]def root_test_multiple(query_json_file, limit, weighting_keys, k_depth, discount, bin_size=20, prefix="TIME-DATA"):
    """
    The root endpoint constructs a descendant citation tree for one or more patents and calculates metrics for the root.

    :param query_json_file: path to a JSON file containing the query to be queried
    :type query_json_file: str
    :param limit: the maximum number of docs to munge
    :type limit: int
    :param k_depth: the maximum depth to evaluate k
    :type k_depth: int
    :param discount: the discount rate
    :type discount: float
    :param bin_size: bin size in weeks
    :type bin_size: Integer
    :param weighting_keys: the weighting key to use for knowledge calculation
    :type weighting_keys: list
    :param prefix: prefix for final storage file name
    :type prefix: str
    """
    # TODO: build this as a function of the full network, handling empty root networks automatically
    #  - then build a full dataframe and save to file
    munger = get_query_munger(query_json_file, limit=limit)
    G = munger.get_network(limit=limit)
    cn = CitationNetwork(
        G, custom_centrality=False, weighting_methods=weighting_keys, k_depth=k_depth, discount=discount
    )
    cn.root_analysis(
        k_depth,
        munger.make_filename(prefix="{}_{}".format(prefix, limit)),
        limit=limit,
        bin_size=bin_size
    )


[docs]def query_test(query_json_file, limit, weighting_keys, k_depth, discount, write_graph=False):
    """
    The query endpoint collects patents for a query, constructs a citation network,
    and conducts metric calculations breadth-wise.

    :param query_json_file: path to a JSON file containing the query to be queried
    :type query_json_file: str
    :param limit: the maximum number of docs to munge
    :type limit: int
    :param k_depth: the maximum depth to evaluate k
    :type k_depth: int
    :param discount: the discount rate
    :type discount: float
    :param write_graph: whether or not to write the network to a graph ml file
    :type write_graph: bool
    :param weighting_keys: the weighting key to use for knowledge calculation
    :type weighting_keys: list
    """
    munger = get_query_munger(query_json_file, limit=limit)
    eval_and_sum(munger, discount=discount, k_depth=k_depth, weighting_keys=weighting_keys, write_graph=write_graph)


[docs]def feature_test(query_json_file, limit, weighting_keys, discount, k_depth):
    """
    The feature endpoint constructs descendant trees for a series of roots from a single query, but does not conduct
    time series analysis. It also collects additional observable features for use as controls in multiple regression.

    :param query_json_file: path to a JSON file containing the query to be queried
    :type query_json_file: str
    :param limit: the maximum number of docs to munge
    :type limit: int
    :param k_depth: the maximum depth to evaluate k
    :type k_depth: int
    :param discount: the discount rate
    :type discount: float
    :param weighting_keys: the weighting key to use for knowledge calculation
    :type weighting_keys: list
    """
    root_test_multiple(query_json_file, limit, weighting_keys, k_depth, discount, bin_size=None, prefix="FEATURE")


[docs]def eval_and_sum(munger,  weighting_keys, k_depth, discount, write_graph=False):
    """
    Evaluates all metrics and summarize using the graph output from a munger.
    :param munger: the munger to analyze
    :param write_graph: whether or not to write the network to a graph ml file
    :param weighting_keys: the weighting key to use for knowledge calculation
    :param k_depth: the maximum depth to evaluate k
    :type k_depth: int
    :param discount: the discount rate
    :type discount: float
    """
    G = munger.get_network()
    cn = CitationNetwork(G, k_depth=k_depth, discount=discount, custom_centrality=False, knowledge=(not write_graph), weighting_methods=weighting_keys)
    filename = munger.make_filename(prefix="METRICS_{}".format(str(weighting_keys).strip(" ")))
    # cn.draw()
    cn.eval_all(file_early=filename)
    cn.summary()
    cn.file_custom_metrics(filename)
    if write_graph:
        cn.write_graphml(munger.make_filename(dirname="graph"))


[docs]def get_query_munger(query_file, limit=Config.DOC_LIMIT, cache=Config.USE_CACHED_QUERIES):
    """
    Construct a query munger for a given query, stored in a JSON file.
    :param query_file: the path to the query file
    :param limit: the maximum number of docs to query
    :return: a QueryMunger with this configuration
    """
    with open(query_file, 'r') as f:
        query = json.load(f)
    return QueryMunger(query, limit=limit, cache=cache)


[docs]def regression():
    test_regression()


[docs]def forecasting(forecast_type, relative_series=False):
    test_forecasting(forecast_type, relative_series=relative_series)