Template fits: comparing two chi-square distributed test statistics
from iminuit import cost, Minuit
import numpy as np
from matplotlib import pyplot as plt
from scipy.stats import chi2
from IPython.display import display
xr = (0, 2) # xrange
rng = np.random.default_rng(1)
nmc = 1000
trials = 1000
data = {}
data2 = {}
first = True
for trial in range(trials):
for bins in (20, 200,):
xdata = rng.normal(1, 0.1, size=1000)
ydata = rng.exponential(size=len(xdata))
xmix = np.append(xdata, ydata)
xmix = xmix[(xr[0] < xmix) & (xmix < xr[1])]
n, xe = np.histogram(xmix, bins=bins, range=xr)
x = rng.normal(1, 0.1, size=nmc)
y = rng.exponential(size=nmc)
t = [
np.histogram(x, bins=bins, range=xr)[0],
np.histogram(y, bins=bins, range=xr)[0],
]
c = cost.Template(n, xe, t)
m = Minuit(c, 1, 1)
m.migrad()
assert m.valid
assert m.accurate
data.setdefault(bins, []).append(m.fmin.fval)
data2.setdefault(bins, []).append(np.nansum(c.pulls(m.values) ** 2))
# display one example fit
if first:
display(m)
first = False
for key in tuple(data):
val = data[key]
data[key] = np.array(val)
val = data2[key]
data2[key] = np.array(val)
for bins in data:
plt.figure()
plt.title(f"bins = {bins}")
plt.hist(chi2(bins-2).cdf(data[bins]), bins=10, range=(0, 1), label="cost function")
plt.hist(chi2(bins-2).cdf(data2[bins]), bins=10, range=(0, 1), alpha=0.5, label="sum of pulls squared")
plt.legend()


本文使用iminuit库在Templatefits中对比了两种统计方法:基于χ²分布的成本函数和Pulls平方和。通过生成随机数据并进行拟合,作者展示了如何计算并可视化这两种统计量在不同bin范围下的分布。
607

被折叠的 条评论
为什么被折叠?



