|
From: | Al Haji-Ali |
Subject: | Patch to avoid superfluous output directories |
Date: | Fri, 23 Jul 2021 18:13:13 +0100 |
User-agent: | mu4e 1.5.13; emacs 27.2 |
Hello all, I noticed one problem with the output-dir feature that I would like to patch. In my setup I set `TeX-output-dir` by default to `build` using `setq-default` and set the TeX-master as a local variable. What this means is that when the latex-mode gets applied (after a file is opened), TeX-region-file gets called before the local variables are hacked, and during this call the wrong output directory is created (since that's where the TeX-region file will be placed). If the output directory is in a sub-directory compared to master, this directory is useless and remains empty. The attached patch fixes this by making `TeX--master-output-dir` accept an optional argument `ensure` which controls the creation of the directory. This is only done when `TeX--master-output-dir` is called from `TeX--output-dir-arg` but not from `TeX-region-file`. Best regards, -- Al
In [1]:
import numpy as np
import matplotlib.pyplot as plt
import random
from math import pi
from numpy.linalg import inv
np.random.seed(1)
In [2]:
def prior(x1,x2,mu, ambda, sigma_k, samples):
#defination a kernel function k(x1,x2) with hype-parameters length scale :ambda and sigma_k #
matrix_norm = np.sum(x1**2, 1).reshape(-1, 1) + np.sum(x2**2, 1) - 2 * np.dot(x1, x2.T)
k=sigma_k ** 2 * np.exp(-1 / ambda ** 2 * matrix_norm)
# defination f_prior, draw random samples from a multivariate normal distribution with mean and corvanice k
f_prior = np.random.multivariate_normal(mu.ravel(),k,samples)
#estimate standard error(sd)
sd=np.sqrt(np.diag(k))
#estimate 95% CI
ci_low=mu.ravel()-1.96 * sd
ci_high=mu.ravel()+1.96 * sd
# plot sample paths
plt.figure(figsize=(10,4), dpi=80)
plt.figure(1)
ax1 = plt.subplot(121)
plt.plot(x1, f_prior.T)
plt.grid()
plt.xlabel("input, x")
plt.ylabel("output, f(x)")
plt.title("Sample paths from the Gaussian Prior")
# plot mean and sd
ax2 = plt.subplot(122)
plt.grid()
ax2.plot(x1, mu, color='black', label='Mean')
plt.fill_between(x1.ravel(), ci_high, ci_low, facecolor='grey', alpha=0.5)
plt.legend()
return f_prior
In [3]:
#generate training inputs with sample size N=10
N=10
X= np.linspace(start=-pi, stop=pi, num=N).reshape(-1,1)
mu = np.zeros(X.shape)
#generate test inputs
X_s= np.linspace(start=-pi, stop=pi, num=int(0.8*N)).reshape(-1,1)
# suppose zero mean
mu = np.zeros(X_s.shape)
#import inputs and hype-parameters: ambda=1,sigma_k=1,sample path=6
f_prior = prior(X_s, X_s, mu, 1.0, 1.0, 6)
In [ ]:
In [4]:
#defination a kernel function k(x1,x2)
def kernel (x1 ,x2, ambda, sigma_k):
matrix_norm = np.sum(x1**2, 1).reshape(-1, 1) + np.sum(x2**2, 1) - 2 * np.dot(x1, x2.T)
k=sigma_k ** 2 * np.exp(-1 / ambda ** 2 * matrix_norm)
return k
In [5]:
# test inputs X_s, training inputs X, noise level noise_sigma.
def posterior(X_s, X, ambda, sigma_k, noise_sigma, samples):
k_train = kernel(X, X, ambda, sigma_k) # K(X,X)
k_prior = kernel(X_s, X_s, ambda, sigma_k) # K(X_star,X_star)
k_post = kernel(X_s, X, ambda, sigma_k) # K(X_star,X)
k_pt = kernel(X, X_s, ambda, sigma_k) # K_posterior_T=K(X,X_star)
#define training outputs Y = f(X)+ epsilon and f(X)=sin(X), epsion ~ N(0, noise_sigma^2)
epsilon=np.random.normal(loc=0, scale=noise_sigma, size=X.shape)
Y = np.sin(X) + epsilon
#via dot product kernel function and equation (18) in the disertation, get mean and kernel function
mu_s = np.dot(np.dot(k_post, inv((k_train)+((noise_sigma**2)*np.eye(len(X))))), Y)
k_s = k_prior - np.dot(np.dot(k_post, inv((k_train) + ((noise_sigma**2)*np.eye(len(X))))), k_pt)
f_posterior = np.random.multivariate_normal(mu_s[:,0],k_s, samples)
#estimate standard error(SD) 95% CI
SD = np.sqrt(np.diag(k_s))
CI_low=mu_s.ravel()-1.96 * SD
CI_high=mu_s.ravel()+1.96 * SD
# plot sample paths
plt.figure(figsize=(10,4), dpi=80)
plt.subplot(4,2,i+1)
plt.subplots_adjust(top=2)
plt.plot(X_s, f_posterior.T, alpha=0.5)
plt.errorbar(X,Y, yerr=noise_sigma, color='black',fmt='o')
plt.grid()
plt.xlabel("input x, $k=\sigma_k^2 exp( -\Vert X-X^{'}\Vert_2^2 \, / \, \lambda^2)$ ")
plt.ylabel("output, f(x)=sin x")
#plt.title("Sample paths from the Gaussian Posterior with ambda, sigma_k, sigma)
plt.title(f'$\lambda={ambda}$, $\sigma_k = {sigma_k}$')
# plot mean and SD
plt.figure(figsize=(10,4), dpi=80)
plt.subplot(4,2,i+1)
plt.subplots_adjust(top=2)
plt.grid()
plt.errorbar(X,Y, yerr=noise_sigma, color='black',fmt='o')
plt.plot(X_s, mu_s.flat, color='black', label='Mean')
plt.fill(np.concatenate([X_s, X_s[::-1]]),
np.concatenate([CI_low,(CI_high)[::-1]]),alpha=0.5, fc='grey')
plt.legend()
return f_posterior
In [6]:
hype_params = [
(0.1,0.2),
(0.4,0.2),
(0.7,0.4),
(1.0,0.4),
]
for i,(ambda,sigma_k) in enumerate(hype_params):
f_posterior = posterior(X_s, X, ambda=ambda, sigma_k=sigma_k, noise_sigma=1e-4, samples=6)
In [ ]:
In [7]:
# Estimate hyper-parameters in a non-standared RBF function via Maximum Likelihood Estimation (MLE):
from scipy.optimize import minimize
class MLE:
def __init__(self, optimization=True):
self.is_fit = False
self.X, self.Y = None, None
self.hyparams = {"ambda": 0.5, "sigma_k": 0.2,"noise_sigma":1e-4}
self.optimization = optimization
def fitting(self, X, Y):
# store train data
self.X = np.array(X)
self.Y = np.array(Y)
# hyper parameters optimization
def Negative_Log_Likelihood(hyparams):
self.hyparams["ambda"], self.hyparams["sigma_k"] = hyparams[0], hyparams[1]
Kyy = kernel(self.X, self.X,self.hyparams["ambda"],self.hyparams["sigma_k"]) + (self.hyparams["noise_sigma"]**2) * np.eye(len(self.X))
# negative_log_likelihood
NLL = 0.5 * self.Y.T.dot(np.linalg.inv(Kyy)).dot(self.Y) + 0.5 * np.linalg.slogdet(Kyy)[1] + 0.5 * len(self.X) * np.log(2 * np.pi)
return NLL.ravel()
if self.optimization:
res = minimize(Negative_Log_Likelihood, [self.hyparams["ambda"], self.hyparams["sigma_k"]],
bounds=((1e-4, 1e4), (1e-4, 1e4)),
method='L-BFGS-B')
self.hyparams["ambda"], self.hyparams["sigma_k"] = res.x[0], res.x[1]
self.is_fit = True
def predict(self, X_s):
if not self.is_fit:
print("GPR Model not fit yet.")
return
X_s = np.asarray(X_s)
k_train = kernel(self.X, self.X,self.hyparams["ambda"],self.hyparams["sigma_k"]) # K(X, X)
k_prior = kernel(X_s, X_s,self.hyparams["ambda"],self.hyparams["sigma_k"]) # K(X_star,X_star)
k_post = kernel(X_s, self.X,self.hyparams["ambda"],self.hyparams["sigma_k"]) # K(X_star,X)
k_pt = kernel(self.X, X_s,self.hyparams["ambda"],self.hyparams["sigma_k"]) # K_posterior_T=K(X,X_star)
k_train_inv = np.linalg.inv(k_train + (self.hyparams["noise_sigma"]**2) * np.eye(len(self.X)))
mu_s = np.dot(np.dot(k_post, inv((k_train)+((self.hyparams["noise_sigma"])*np.eye(len(X))))), Y)
k_s = k_prior - np.dot(np.dot(k_post, inv((k_train) + ((self.hyparams["noise_sigma"])*np.eye(len(X))))), k_pt)
#estimate standard error(SD) 95% CI
SD = np.sqrt(np.diag(k_s))
CI_low=mu_s.ravel()-1.96 * SD
CI_high=mu_s.ravel()+1.96 * SD
return mu_s,k_s, CI_low, CI_high
def y(x, noise_sigma):
x = np.asarray(x)
epsilon=np.random.normal(loc=0, scale=noise_sigma, size=x.shape)
y = np.sin(x) + epsilon
return y.tolist()
Y = y(X, noise_sigma=1e-4)
gpr = MLE(optimization=True)
gpr.fitting(X, Y)
mu_s,k_s, CI_low, CI_high = gpr.predict(X_s)
Y_s = mu_s.ravel()
plt.figure()
plt.grid()
plt.title("$\lambda$=%.1f, $\sigma_k$=%.1f" % (gpr.hyparams["ambda"], gpr.hyparams["sigma_k"]))
plt.fill_between(X_s.ravel(), CI_high, CI_low, alpha=0.1,label='95% confidence interval')
plt.plot(X_s, Y_s, label="predict")
plt.scatter(X, Y, label="Train points", c="black", marker="o")
plt.xlabel("Test input X_s, $k=\sigma_k^2 exp( -\Vert X-X^{'}\Vert_2^2 \, / \, \lambda^2)$ ")
plt.ylabel("Test output, Y_s=sin X_s+$\epsilon$")
plt.legend()
Out[7]:
<matplotlib.legend.Legend at 0x7fee8b94c5e0> In [8]:
#method 2 : hyper-parameters optimization using a #Scikit-Learn library( MLE fitting)
#Scikit-Learn
from sklearn.gaussian_process import GaussianProcessRegressor as GPR
from sklearn.gaussian_process.kernels import RBF,ConstantKernel as C
def y(X, noise_sigma):
X = np.asarray(X)
epsilon=np.random.normal(loc=0, scale=noise_sigma, size=X.shape)
Y = np.sin(X) + epsilon
return Y.tolist()
Y = y(X, noise_sigma=1e-4)
# fit GPR,instantiate a Gaussian Process mode to estimate hyper-parameters
#kernel(C,lambda)=Constant kernel*RBF with initial values of constant and lamda, and their limits.
#because of our kernel function is not a Constant*RBF(lambda), which can be replaced by (sigma_k^2)*RBF(sqrt_ambda).
#hence constant_value=sigma_k^2;sqrt(2)*legtth_scale= ambda.here, their initial values correspond to sigma_k and ambda in previous Method 1.
kernel = C(constant_value=0.25, constant_value_bounds=(1e-4, 1e4)) * RBF(length_scale=0.3, length_scale_bounds=(1e-4, 1e4))
gpr = GPR(kernel=kernel, n_restarts_optimizer=2)
#Fitting the data via MLE
gpr.fit(X, Y)
mu_s, k_s = gpr.predict(X_s, return_cov=True)
Y_s = mu_s.ravel()
#estimate standard error(SD) 95% CI
SD = np.sqrt(np.diag(k_s))
CI_low=Y_s-1.96 * SD
CI_high=Y_s+1.96 * SD
# plotting
plt.figure()
plt.grid()
plt.title("$\lambda$=%.1f, $\sigma_k$=%.1f" % ((2**0.5)*gpr.kernel_.k2.length_scale, gpr.kernel_.k1.constant_value**0.5))
plt.fill_between(X_s.ravel(), CI_high, CI_low, alpha=0.1,label='95% confidence interval')
plt.plot(X_s, Y_s, label="predict")
plt.xlabel("Test input X_s, $k=\sigma_k^2 exp( -\Vert X-X^{'}\Vert_2^2 \, / \, \lambda^2)$ ")
plt.ylabel("Test output, Y_s=sin X_s+$\epsilon$")
plt.scatter(X, Y, label="train", c="black", marker="o")
plt.legend()
Out[8]:
<matplotlib.legend.Legend at 0x7fee8ab92c10> In [ ]:
## above two methods to estimate the optimal hyper-parameters,which are almost the same.
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
|
[Prev in Thread] | Current Thread | [Next in Thread] |