Statistic overview
Statistic overview#
!pip install -q -r requirements.txt
^C
import numpy as np
import pandas as pd
number1 = 319 #@param {type:"slider", min:3, max:1000, step:1}
number2 = 321 #@param {type:"slider", min:3, max:1000, step:1}
#@markdown ---
mu1 = 0 #@param {type:"slider", min:-10, max:10, step:0.01}
mu2 = 0.14 #@param {type:"slider", min:-10, max:10, step:0.01}
#@title
sigma = 0.1 # standard deviation
s1 = np.random.normal(mu1, sigma, number1)
d1 = pd.DataFrame(s1, columns = ['Area'])
d1['Label'] = '1'
d1.head()
Area | Label | |
---|---|---|
0 | 0.036129 | 1 |
1 | -0.064217 | 1 |
2 | -0.085285 | 1 |
3 | -0.100871 | 1 |
4 | 0.078571 | 1 |
s2 = np.random.normal(mu2, sigma, number2)
d2 = pd.DataFrame(s2, columns = ['Area'])
d2['Label'] = '2'
d2.head()
Area | Label | |
---|---|---|
0 | 0.272011 | 2 |
1 | 0.077220 | 2 |
2 | -0.007429 | 2 |
3 | 0.097255 | 2 |
4 | 0.124592 | 2 |
result = pd.concat([d1, d2])
result.head()
Area | Label | |
---|---|---|
0 | 0.036129 | 1 |
1 | -0.064217 | 1 |
2 | -0.085285 | 1 |
3 | -0.100871 | 1 |
4 | 0.078571 | 1 |
import bokeh.plotting
import bokeh.io
#!pip install --upgrade bokeh-catplot
import bokeh_catplot
bokeh.io.output_notebook()
p = bokeh_catplot.histogram(
data=result,
cats='Label',
val='Area'
)
bokeh.io.show(p)
p = bokeh_catplot.strip(
data=result,
cats='Label',
val='Area',
horizontal=True,
jitter=True,
height=250,
marker_kwargs=dict(alpha=0.5),
)
p = bokeh_catplot.box(
data=result,
cats='Label',
val='Area',
horizontal=True,
whisker_caps=True,
display_points=False,
outlier_marker='diamond',
box_kwargs=dict(fill_color=None, line_color='black'),
median_kwargs=dict(line_color='black', line_width=2),
whisker_kwargs=dict(line_color='black', line_width=2),
p=p,
)
bokeh.io.show(p)
from scipy.stats import normaltest
k2, p = normaltest(d1['Area'])
alpha = 1e-3
print("p = {:g}".format(p))
print('null hypothesis: Data1 comes from a normal distribution')
if p < alpha: # null hypothesis: Data1 comes from a normal distribution
print("The null hypothesis can be rejected")
else:
print("The null hypothesis cannot be rejected")
k2, p = normaltest(d2['Area'])
alpha = 1e-3
print("p = {:g}".format(p))
print('null hypothesis: Data2 from a normal distribution')
if p < alpha: # null hypothesis: Data2 (Aptamil) from a normal distribution
print("The null hypothesis can be rejected")
else:
print("The null hypothesis cannot be rejected")
p = 0.343844
null hypothesis: Data1 comes from a normal distribution
The null hypothesis cannot be rejected
p = 0.402473
null hypothesis: Data2 from a normal distribution
The null hypothesis cannot be rejected
# t-test
from numpy.random import seed
from numpy.random import randn
from scipy.stats import ttest_ind
# import random
from random import sample
data1=d1['Area'].sample(n=100, random_state=1)
data2=d2['Area'].sample(n=100, random_state=1)
print('null hypothesis: data sets are from the same distribution')
# compare samples
stat, p = ttest_ind(data1, data2)
print('Statistics=%.3f, p=%.16f' % (stat, p))
# interpret
alpha = 0.05
if p > alpha:
print('Same distribution (fail to reject H0)')
else:
print('Different distribution (reject H0)')
null hypothesis: data sets are from the same distribution
Statistics=-9.765, p=0.0000000000000000
Different distribution (reject H0)
# Mann-Whitney U test
from numpy.random import seed
from numpy.random import randn
from scipy.stats import mannwhitneyu
# import random
from random import sample
data1=d1['Area'].sample(n=100, random_state=1)
data2=d2['Area'].sample(n=100, random_state=1)
print('null hypothesis: data sets are from the same distribution')
# compare samples
stat, p = mannwhitneyu(data1, data2)
print('Statistics=%.3f, p=%.16f' % (stat, p))
# interpret
alpha = 0.05
if p > alpha:
print('Same distribution (fail to reject H0)')
else:
print('Different distribution (reject H0)')
null hypothesis: data sets are from the same distribution
Statistics=1682.000, p=0.0000000000000005
Different distribution (reject H0)
from watermark import watermark
watermark(iversions=True, globals_=globals())
print(watermark())
print(watermark(packages="watermark,numpy,scipy,pandas,matplotlib,bokeh,statannotations"))
Last updated: 2023-01-05T13:43:49.111960+01:00
Python implementation: CPython
Python version : 3.9.15
IPython version : 8.8.0
Compiler : MSC v.1929 64 bit (AMD64)
OS : Windows
Release : 10
Machine : AMD64
Processor : Intel64 Family 6 Model 85 Stepping 7, GenuineIntel
CPU cores : 40
Architecture: 64bit
watermark : 2.3.1
numpy : 1.23.5
scipy : 1.10.0
pandas : 1.5.2
matplotlib : 3.6.2
bokeh : 3.0.3
statannotations: 0.5.0