import json
from app.metrics import CitationNetwork, TreeCitationNetwork
from app.munge import QueryMunger, RootMunger
from app.config import Config
from app.regression import test_regression, test_forecasting
"""
Official API endpoint script. All non-test invocations should run through here for convenience.
"""
[docs]def root_test_single(patent, depth, weighting_keys, bin_size=20):
"""
The root endpoint constructs a descendant citation tree for one or more patents and calculates metrics for the root.
:param patent: the patent number
:type patent: str
:param depth: the graph search depth
:type depth: int
:param bin_size: the bin size in weeks
:type bin_size: int
:param weighting_keys: the weighting key to use for knowledge calculation
:type weighting_keys: list
"""
munger = RootMunger(patent, depth=depth, limit=Config.DOC_LIMIT)
cn = TreeCitationNetwork(munger.get_network(), patent, weighting_methods=weighting_keys)
cn.eval_binned(bin_size, plot=True)
cn.write_graphml("{}_{}".format(patent, depth))
[docs]def root_test_multiple(query_json_file, limit, weighting_keys, k_depth, discount, bin_size=20, prefix="TIME-DATA"):
"""
The root endpoint constructs a descendant citation tree for one or more patents and calculates metrics for the root.
:param query_json_file: path to a JSON file containing the query to be queried
:type query_json_file: str
:param limit: the maximum number of docs to munge
:type limit: int
:param k_depth: the maximum depth to evaluate k
:type k_depth: int
:param discount: the discount rate
:type discount: float
:param bin_size: bin size in weeks
:type bin_size: Integer
:param weighting_keys: the weighting key to use for knowledge calculation
:type weighting_keys: list
:param prefix: prefix for final storage file name
:type prefix: str
"""
# TODO: build this as a function of the full network, handling empty root networks automatically
# - then build a full dataframe and save to file
munger = get_query_munger(query_json_file, limit=limit)
G = munger.get_network(limit=limit)
cn = CitationNetwork(
G, custom_centrality=False, weighting_methods=weighting_keys, k_depth=k_depth, discount=discount
)
cn.root_analysis(
k_depth,
munger.make_filename(prefix="{}_{}".format(prefix, limit)),
limit=limit,
bin_size=bin_size
)
[docs]def query_test(query_json_file, limit, weighting_keys, k_depth, discount, write_graph=False):
"""
The query endpoint collects patents for a query, constructs a citation network,
and conducts metric calculations breadth-wise.
:param query_json_file: path to a JSON file containing the query to be queried
:type query_json_file: str
:param limit: the maximum number of docs to munge
:type limit: int
:param k_depth: the maximum depth to evaluate k
:type k_depth: int
:param discount: the discount rate
:type discount: float
:param write_graph: whether or not to write the network to a graph ml file
:type write_graph: bool
:param weighting_keys: the weighting key to use for knowledge calculation
:type weighting_keys: list
"""
munger = get_query_munger(query_json_file, limit=limit)
eval_and_sum(munger, discount=discount, k_depth=k_depth, weighting_keys=weighting_keys, write_graph=write_graph)
[docs]def feature_test(query_json_file, limit, weighting_keys, discount, k_depth):
"""
The feature endpoint constructs descendant trees for a series of roots from a single query, but does not conduct
time series analysis. It also collects additional observable features for use as controls in multiple regression.
:param query_json_file: path to a JSON file containing the query to be queried
:type query_json_file: str
:param limit: the maximum number of docs to munge
:type limit: int
:param k_depth: the maximum depth to evaluate k
:type k_depth: int
:param discount: the discount rate
:type discount: float
:param weighting_keys: the weighting key to use for knowledge calculation
:type weighting_keys: list
"""
root_test_multiple(query_json_file, limit, weighting_keys, k_depth, discount, bin_size=None, prefix="FEATURE")
[docs]def eval_and_sum(munger, weighting_keys, k_depth, discount, write_graph=False):
"""
Evaluates all metrics and summarize using the graph output from a munger.
:param munger: the munger to analyze
:param write_graph: whether or not to write the network to a graph ml file
:param weighting_keys: the weighting key to use for knowledge calculation
:param k_depth: the maximum depth to evaluate k
:type k_depth: int
:param discount: the discount rate
:type discount: float
"""
G = munger.get_network()
cn = CitationNetwork(G, k_depth=k_depth, discount=discount, custom_centrality=False, knowledge=(not write_graph), weighting_methods=weighting_keys)
filename = munger.make_filename(prefix="METRICS_{}".format(str(weighting_keys).strip(" ")))
# cn.draw()
cn.eval_all(file_early=filename)
cn.summary()
cn.file_custom_metrics(filename)
if write_graph:
cn.write_graphml(munger.make_filename(dirname="graph"))
[docs]def get_query_munger(query_file, limit=Config.DOC_LIMIT, cache=Config.USE_CACHED_QUERIES):
"""
Construct a query munger for a given query, stored in a JSON file.
:param query_file: the path to the query file
:param limit: the maximum number of docs to query
:return: a QueryMunger with this configuration
"""
with open(query_file, 'r') as f:
query = json.load(f)
return QueryMunger(query, limit=limit, cache=cache)
[docs]def regression():
test_regression()
[docs]def forecasting(forecast_type, relative_series=False):
test_forecasting(forecast_type, relative_series=relative_series)