-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdistfreeest.py
172 lines (147 loc) · 5.74 KB
/
distfreeest.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
import scipy.optimize
import numpy as np
import pandas as pd
def length(x):
if type(x) == float or type(x) == int:
return 1
return len(x)
def f(n,alpha,P):
return alpha - (n * P**(n - 1) - (n - 1) * P**n)
def bisection(a,b,n,alpha,tol=1e-8):
xl = a
xr = b
while np.abs(xl-xr) >= tol:
c = (xl+xr)/2
prod = f(n=n,alpha=alpha,P=xl)*f(n=n,alpha=alpha,P=c)
if prod > tol:
xl = c
else:
if prod < tol:
xr = c
return c
def distfreeest2(n = None, alpha = None, P = None, side = 1):
temp = 0
if n == None:
temp += 1
if alpha == None:
temp +=1
if P == None:
temp += 1
if temp > 1:
return 'Must specify values for any two of n, alpha, and P'
if (side != 1 and side != 2):
return 'Must specify a 1-sided or 2-sided interval'
if side == 1:
if n == None:
ret = int(np.ceil(np.log(alpha)/np.log(P)))
if P == None:
ret = np.exp(np.log(alpha)/n)
ret = float(f'{ret:.4f}')
if alpha == None:
ret = 1-P**n
else:
if alpha == None:
ret = 1-(np.ceil((n*P**(n-1)-(n-1)*P**n)*10000))/10000
if n == None:
ret = int(np.ceil(scipy.optimize.brentq(f,a=0,b=1e100,args=(alpha,P),maxiter=1000)))
if P == None:
ret = np.ceil(bisection(0,1,alpha =alpha, n = n, tol = 1e-8)*10000)/10000
return ret
def distfreeest(n = None, alpha = None, P = None, side = 1):
'''
Estimating Various Quantities for Distribution-Free Tolerance Intervals
Description
When providing two of the three quantities n, alpha, and P, this function
solves for the third quantity in the context of distribution-free
tolerance intervals.
Usage
distfreeest(n = None, alpha = None, P = NULL, side = 1)
Parameters
----------
n : int or list of ints, optional
The necessary sample size to cover a proportion P of the population
with confidence 1-alpha. Can be a vector. The default is None.
alpha : float or list of floats, optional
1 minus the confidence level attained when it is desired to cover a
proportion P of the population and a sample size n is provided. Can be
a vector. The default is None.
P : float or list of floats, optional
The proportion of the population to be covered with confidence 1-alpha
when a sample size n is provided. Can be a vector. The default is None.
side : 1 or 2, optional
Whether a 1-sided or 2-sided tolerance interval is assumed
(determined by side = 1 or side = 2, respectively). The default is 1.
Returns
-------
out : dataframe
When providing two of the three quantities n, alpha, and P,
distfreeest returns the third quantity. If more than one value of a
certain quantity is specified, then a table will be returned.
References
Natrella, M. G. (1963), Experimental Statistics: National Bureau of
Standards - Handbook No. 91, United States Government Printing Office,
Washington, D.C.
Examples
# Solving for 1 minus the confidence level.
distfreeest(n = 59, P = 0.95, side = 1)
## Solving for the sample size.
distfreeest(alpha = 0.05, P = 0.95, side = 1)
## Solving for the proportion of the population to cover.
distfreeest(n = 59, alpha = 0.05, side = 1)
## Solving for sample sizes for many tolerance specifications.
distfree.est((alpha = [0.01,0.02,0.05], P = [0.95,0.99],side = 2)
'''
if n == None:
if type(alpha) == float:
alpha = [alpha]
if type(P) == float:
P = [P]
A = length(alpha)
B = length(P)
column_names = np.zeros(B)
row_names = np.zeros(A)
matrix = np.zeros((A,B))
for i in range(A):
row_names[i] = alpha[i]
for j in range(B):
column_names[j] = P[j]
matrix[i,j] = distfreeest2(alpha=alpha[i],P=P[j],side=side)
out = pd.DataFrame(matrix,columns = column_names, index = row_names)
if alpha == None:
if type(n) == float or type(n) == int:
n = [n]
if type(P) == float:
P = [P]
A = length(n)
B = length(P)
column_names = np.zeros(B)
row_names = np.zeros(A)
matrix = np.zeros((A,B))
for i in range(A):
row_names[i] = n[i]
for j in range(B):
column_names[j] = P[j]
matrix[i,j] = distfreeest2(n=n[i],P=P[j],side=side)
out = pd.DataFrame(matrix,columns = column_names, index = row_names)
if P == None:
if type(alpha) == float:
alpha = [alpha]
if type(n) == float or type(n) == int:
n = [n]
A = length(alpha)
B = length(n)
#print(f'length of alpha = {A}',f'length of n = {B}')
column_names = np.zeros(B)
row_names = np.zeros(A)
matrix = np.zeros((A,B))
for i in range(A):
row_names[i] = alpha[i]
for j in range(B):
column_names[j] = n[j]
matrix[i,j] = distfreeest2(alpha=alpha[i],n=n[j],side=side)
out = pd.DataFrame(matrix,columns = column_names, index = row_names)
return out
# print(distfreeest(n = 59, P = 0.95, side = 1))
# print(distfreeest(alpha = 0.05, P = 0.95, side = 1))
# print(distfreeest(n = 59, alpha = 0.05, side = 1))
#print(distfreeest(alpha = [0.01,0.02,0.05], P = [0.95,0.99],side = 2))