Coverage for Python_files/hawkes_tools.py: 88%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2The aim of this code is to provide statistical tools for cascade's parameters estimation.
3"""
5import numpy as np
6from numpy.core.arrayprint import array2string
7import scipy.optimize as optim
9################################################
10####### Stats part ########
11################################################
13def loglikelihood(params, history, t):
14 """
15 Returns the loglikelihood of a Hawkes process with exponential kernel
16 computed with a linear time complexity
18 params -- parameter tuple (p,beta) of the Hawkes process
19 history -- (n,2) numpy array containing marked time points (t_i,m_i)
20 t -- current time (i.e end of observation window)
21 """
23 if not isinstance(t, (float, int) ) or t < 0:
24 raise Exception(" n must be an float or int greater than 0")
26 if not isinstance(params, np.ndarray):
27 raise Exception(" params must be a np.darray")
29 if not isinstance(params[0], (np.floating,float,int)):
30 raise Exception ("p must be a int or float")
32 if not isinstance(params[1], (np.floating,float, int)) and params[1]<0 :
33 raise Exception ("beta must be a int or float greater than 0")
35 if not isinstance(history, np.ndarray) or history.shape[1]!=2 :
36 raise Exception(" history must be an np.array with following shape : (n,2)")
38 p,beta = params
40 if p <= 0 or p >= 1 or beta <= 0.: return -np.inf
42 n = len(history)
43 tis = history[:,0]
44 mis = history[:,1]
46 LL = (n-1) * np.log(p * beta)
47 logA = -np.inf
48 prev_ti, prev_mi = history[0]
50 i = 0
51 for ti,mi in history[1:]:
52 if(prev_mi + np.exp(logA) <= 0):
53 print("Bad value", prev_mi + np.exp(logA))
55 logA = np.log(prev_mi + np.exp(logA)) - beta * (ti - prev_ti)
56 LL += logA
57 prev_ti,prev_mi = ti,mi
58 i += 1
60 logA = np.log(prev_mi + np.exp(logA)) - beta * (t - prev_ti)
61 LL -= p * (np.sum(mis) - np.exp(logA))
63 return LL
66def compute_MAP(history, t, alpha, mu,
67 prior_params = [ 0.02, 0.0002, 0.01, 0.001, -0.1],
68 max_n_star = 1, display=False):
69 """
70 Returns the pair of the estimated logdensity of a posteriori and parameters (as a numpy array)
72 history -- (n,2) numpy array containing marked time points (t_i,m_i)
73 t -- current time (i.e end of observation window)
74 alpha -- power parameter of the power-law mark distribution
75 mu -- min value parameter of the power-law mark distribution
76 prior_params -- list (mu_p, mu_beta, sig_p, sig_beta, corr) of hyper parameters of the prior
77 -- where:
78 -- mu_p: is the prior mean value of p
79 -- mu_beta: is the prior mean value of beta
80 -- sig_p: is the prior standard deviation of p
81 -- sig_beta: is the prior standard deviation of beta
82 -- corr: is the correlation coefficient between p and beta
83 max_n_star -- maximum authorized value of the branching factor (defines the upper bound of p)
84 display -- verbose flag to display optimization iterations (see 'disp' options of optim.optimize)
85 """
86 if not isinstance(t, (float,int) ) or t < 0:
87 raise Exception(" t must be an float or int greater than 0")
89 if not isinstance(history, np.ndarray) or history.shape[1]!=2 :
90 raise Exception(" history must be an np.array with following shape : (n,2)")
93 # Compute prior moments
94 mu_p, mu_beta, sig_p, sig_beta, corr = prior_params
95 sample_mean = np.array([mu_p, mu_beta])
96 cov_p_beta = corr * sig_p * sig_beta
97 Q = np.array([[sig_p ** 2, cov_p_beta], [cov_p_beta, sig_beta **2]])
99 # Apply method of moments
100 cov_prior = np.log(Q / sample_mean.reshape((-1,1)) / sample_mean.reshape((1,-1)) + 1)
101 mean_prior = np.log(sample_mean) - np.diag(cov_prior) / 2.
103 # Compute the covariance inverse (precision matrix) once for all
104 inv_cov_prior = np.asmatrix(cov_prior).I
106 # Define the target function to minimize as minus the log of the a posteriori density
107 def target(params):
108 log_params = np.log(params)
110 if np.any(np.isnan(log_params)):
111 return np.inf
112 else:
113 dparams = np.asmatrix(log_params - mean_prior)
114 prior_term = float(- 1/2 * dparams * inv_cov_prior * dparams.T)
115 logLL = loglikelihood(params, history, t)
116 return - (prior_term + logLL)
118 EM = mu * (alpha - 1) / (alpha - 2)
119 eps = 1.E-8
121 # Set realistic bounds on p and beta
122 p_min, p_max = eps, max_n_star/EM - eps
123 beta_min, beta_max = 1/(3600. * 24 * 10), 1/(60. * 1)
125 # Define the bounds on p (first column) and beta (second column)
126 bounds = optim.Bounds(
127 np.array([p_min, beta_min]),
128 np.array([p_max, beta_max])
129 )
131 # Run the optimization
132 res = optim.minimize(
133 target, sample_mean,
134 method='Powell',
135 bounds=bounds,
136 options={'xtol': 1e-8, 'disp': display}
137 )
138 # Returns the loglikelihood and found parameters
139 return(-res.fun, res.x) ## res.x contains p et beta