Coverage for Python_files/hawkes_tools.py: 88%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

60 statements  

1""" 

2The aim of this code is to provide statistical tools for cascade's parameters estimation.  

3""" 

4 

5import numpy as np 

6from numpy.core.arrayprint import array2string 

7import scipy.optimize as optim 

8 

9################################################ 

10####### Stats part ######## 

11################################################ 

12 

13def loglikelihood(params, history, t): 

14 """ 

15 Returns the loglikelihood of a Hawkes process with exponential kernel 

16 computed with a linear time complexity 

17  

18 params -- parameter tuple (p,beta) of the Hawkes process 

19 history -- (n,2) numpy array containing marked time points (t_i,m_i)  

20 t -- current time (i.e end of observation window) 

21 """ 

22 

23 if not isinstance(t, (float, int) ) or t < 0: 

24 raise Exception(" n must be an float or int greater than 0") 

25 

26 if not isinstance(params, np.ndarray): 

27 raise Exception(" params must be a np.darray") 

28 

29 if not isinstance(params[0], (np.floating,float,int)): 

30 raise Exception ("p must be a int or float") 

31 

32 if not isinstance(params[1], (np.floating,float, int)) and params[1]<0 : 

33 raise Exception ("beta must be a int or float greater than 0") 

34 

35 if not isinstance(history, np.ndarray) or history.shape[1]!=2 : 

36 raise Exception(" history must be an np.array with following shape : (n,2)") 

37 

38 p,beta = params 

39 

40 if p <= 0 or p >= 1 or beta <= 0.: return -np.inf 

41 

42 n = len(history) 

43 tis = history[:,0] 

44 mis = history[:,1] 

45 

46 LL = (n-1) * np.log(p * beta) 

47 logA = -np.inf 

48 prev_ti, prev_mi = history[0] 

49 

50 i = 0 

51 for ti,mi in history[1:]: 

52 if(prev_mi + np.exp(logA) <= 0): 

53 print("Bad value", prev_mi + np.exp(logA)) 

54 

55 logA = np.log(prev_mi + np.exp(logA)) - beta * (ti - prev_ti) 

56 LL += logA 

57 prev_ti,prev_mi = ti,mi 

58 i += 1 

59 

60 logA = np.log(prev_mi + np.exp(logA)) - beta * (t - prev_ti) 

61 LL -= p * (np.sum(mis) - np.exp(logA)) 

62 

63 return LL 

64 

65 

66def compute_MAP(history, t, alpha, mu, 

67 prior_params = [ 0.02, 0.0002, 0.01, 0.001, -0.1], 

68 max_n_star = 1, display=False): 

69 """ 

70 Returns the pair of the estimated logdensity of a posteriori and parameters (as a numpy array) 

71 

72 history -- (n,2) numpy array containing marked time points (t_i,m_i)  

73 t -- current time (i.e end of observation window) 

74 alpha -- power parameter of the power-law mark distribution 

75 mu -- min value parameter of the power-law mark distribution 

76 prior_params -- list (mu_p, mu_beta, sig_p, sig_beta, corr) of hyper parameters of the prior 

77 -- where: 

78 -- mu_p: is the prior mean value of p 

79 -- mu_beta: is the prior mean value of beta 

80 -- sig_p: is the prior standard deviation of p 

81 -- sig_beta: is the prior standard deviation of beta 

82 -- corr: is the correlation coefficient between p and beta 

83 max_n_star -- maximum authorized value of the branching factor (defines the upper bound of p) 

84 display -- verbose flag to display optimization iterations (see 'disp' options of optim.optimize) 

85 """ 

86 if not isinstance(t, (float,int) ) or t < 0: 

87 raise Exception(" t must be an float or int greater than 0") 

88 

89 if not isinstance(history, np.ndarray) or history.shape[1]!=2 : 

90 raise Exception(" history must be an np.array with following shape : (n,2)") 

91 

92 

93 # Compute prior moments 

94 mu_p, mu_beta, sig_p, sig_beta, corr = prior_params 

95 sample_mean = np.array([mu_p, mu_beta]) 

96 cov_p_beta = corr * sig_p * sig_beta 

97 Q = np.array([[sig_p ** 2, cov_p_beta], [cov_p_beta, sig_beta **2]]) 

98 

99 # Apply method of moments 

100 cov_prior = np.log(Q / sample_mean.reshape((-1,1)) / sample_mean.reshape((1,-1)) + 1) 

101 mean_prior = np.log(sample_mean) - np.diag(cov_prior) / 2. 

102 

103 # Compute the covariance inverse (precision matrix) once for all 

104 inv_cov_prior = np.asmatrix(cov_prior).I 

105 

106 # Define the target function to minimize as minus the log of the a posteriori density  

107 def target(params): 

108 log_params = np.log(params) 

109 

110 if np.any(np.isnan(log_params)): 

111 return np.inf 

112 else: 

113 dparams = np.asmatrix(log_params - mean_prior) 

114 prior_term = float(- 1/2 * dparams * inv_cov_prior * dparams.T) 

115 logLL = loglikelihood(params, history, t) 

116 return - (prior_term + logLL) 

117 

118 EM = mu * (alpha - 1) / (alpha - 2) 

119 eps = 1.E-8 

120 

121 # Set realistic bounds on p and beta 

122 p_min, p_max = eps, max_n_star/EM - eps 

123 beta_min, beta_max = 1/(3600. * 24 * 10), 1/(60. * 1) 

124 

125 # Define the bounds on p (first column) and beta (second column) 

126 bounds = optim.Bounds( 

127 np.array([p_min, beta_min]), 

128 np.array([p_max, beta_max]) 

129 ) 

130 

131 # Run the optimization 

132 res = optim.minimize( 

133 target, sample_mean, 

134 method='Powell', 

135 bounds=bounds, 

136 options={'xtol': 1e-8, 'disp': display} 

137 ) 

138 # Returns the loglikelihood and found parameters 

139 return(-res.fun, res.x) ## res.x contains p et beta