In [8]:
import numpy as np
import matplotlib.pyplot as plt
====== 基本関数 ======¶
In [5]:
def bernoulli_pmf(x, p):
"""
ベルヌーイ pmf: P(X=x) = p^x (1-p)^(1-x)
x: 0 or 1(np.arrayでもOK)
p: 0<=p<=1
"""
x = np.asarray(x)
if np.any((x != 0) & (x != 1)):
raise ValueError("x は 0/1 のみ許可")
if not (0.0 <= p <= 1.0):
raise ValueError("p は [0,1] 範囲")
return (p ** x) * ((1 - p) ** (1 - x))
In [6]:
def log_likelihood_bernoulli(data, p):
"""
対数尤度: sum_i [ x_i log p + (1-x_i) log(1-p) ]
"""
data = np.asarray(data)
if not (0.0 < p < 1.0):
# 端点は -inf になり得るのでここでは除外
return -np.inf
x_sum = data.sum()
n = data.size
return x_sum * np.log(p) + (n - x_sum) * np.log(1 - p)
In [7]:
def mle_bernoulli(data):
"""
最尤推定量 p̂ = (1/n) Σ x_i
"""
data = np.asarray(data)
return data.mean()
In [11]:
# 例:真の p=0.3 で 100試行をシミュレーション
rng = np.random.default_rng(0)
true_p = 0.3
n = 100
data = rng.binomial(n=1, p=true_p, size=n) # 0/1 データ
# pmf の例
print("P(X=1) with p=0.3:", bernoulli_pmf(1, 0.3))
print("P(X=0) with p=0.3:", bernoulli_pmf(0, 0.3))
# 最尤推定(標本平均)
p_hat = mle_bernoulli(data)
print("MLE p̂:", p_hat)
# 対数尤度の比較(p̂ で最大になることの確認)
grid = np.linspace(0.01, 0.99, 199)
ll = np.array([log_likelihood_bernoulli(data, p) for p in grid])
p_at_max = grid[np.argmax(ll)]
print("argmax of log-likelihood on grid:", p_at_max)
# 期待値・分散の理論値と標本値の比較
theo_mean = true_p
theo_var = true_p * (1 - true_p)
samp_mean = data.mean()
samp_var = data.var(ddof=0) # 母分散推定
print(f"Theoretical E[X]={theo_mean:.3f}, Var[X]={theo_var:.3f}")
print(f"Sample E[X]={samp_mean:.3f}, Var[X]={samp_var:.3f}")
P(X=1) with p=0.3: 0.3 P(X=0) with p=0.3: 0.7 MLE p̂: 0.37 argmax of log-likelihood on grid: 0.3713131313131313 Theoretical E[X]=0.300, Var[X]=0.210 Sample E[X]=0.370, Var[X]=0.233
In [12]:
# ====== 図1:pmf(棒グラフ)とサンプル頻度の比較 ======
plt.figure(figsize=(6,4))
xs = np.array([0, 1])
pmf_vals = bernoulli_pmf(xs, true_p)
# 理論pmf
plt.bar(xs - 0.15, pmf_vals, width=0.3, label="Theoretical pmf")
# サンプル相対度数
freq_0 = np.mean(data == 0)
freq_1 = np.mean(data == 1)
plt.bar(xs + 0.15, [freq_0, freq_1], width=0.3, label="Sample frequency")
plt.xticks([0,1], ["0","1"])
plt.ylim(0, 1)
plt.xlabel("x")
plt.ylabel("Probability / Frequency")
plt.title("Bernoulli pmf vs Sample Frequency")
plt.legend()
plt.grid(True)
plt.show()
In [13]:
# ====== 図2:対数尤度の曲線(真値pとMLEを可視化) ======
plt.figure(figsize=(7,4.5))
plt.plot(grid, ll, label="log-likelihood")
plt.axvline(x=p_hat, linestyle="--", label=f"MLE p̂={p_hat:.3f}")
plt.axvline(x=true_p, linestyle=":", label=f"True p={true_p:.2f}")
plt.xlabel("p")
plt.ylabel("log-likelihood")
plt.title("Bernoulli Log-Likelihood")
plt.legend()
plt.grid(True)
plt.show()
In [14]:
# ====== 図3:MLE(標本平均)の収束と95%信頼区間 ======
# 累積平均(1,2,...,N までの p̂)
N = 1000 # 収束を見るために試行数を増やす
data2 = rng.binomial(n=1, p=true_p, size=N)
cumsum = np.cumsum(data2)
ns = np.arange(1, N + 1)
p_hat_seq = cumsum / ns
# Wald型95%CI(z=1.96)
z = 1.96
se_seq = np.sqrt(np.clip(p_hat_seq * (1 - p_hat_seq) / ns, 1e-15, None))
lo_seq = np.clip(p_hat_seq - z * se_seq, 0.0, 1.0)
hi_seq = np.clip(p_hat_seq + z * se_seq, 0.0, 1.0)
plt.figure(figsize=(8,4.8))
plt.plot(ns, p_hat_seq, label="p̂ (cumulative mean)")
plt.fill_between(ns, lo_seq, hi_seq, alpha=0.2, label="95% CI (Wald)")
plt.axhline(y=true_p, linestyle="--", label=f"True p={true_p:.2f}")
plt.xlabel("number of trials n")
plt.ylabel("estimate / interval")
plt.title("Convergence of MLE and 95% Confidence Interval")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()
In [ ]: