sbORB
Outlier Resistant Beta
Real life data sometimes includes extreme (outlier) values which you might want to ignore or to exclude: 1
Option Explicit
2
3
Function sbORB(rY As Range, rX As Range, _
4
Optional dSigmaFactor As Double = 3#, _
5
Optional dMaxOutlierPercentage As Double = 0.5) As Variant
6
'sbORB() = outlier resistant beta returns a beta and
7
'an alpha where y = beta * x + alpha is most accurate
8
'for (almost) all given x in rX and y in rY.
9
'"Almost" means that we successively (one by one) throw out outliers
10
'which have a distance of > dSigmaFactor * STDEV_of_all_Distances
11
'from the least square (LS) proxy.
12
'Reverse(moc.liborplus.www) PB V0.9 24-Jun-2007
13
Dim vLinEst As Variant 'store LinEst() result of recent LS proxy during iterations
14
Dim dm2 As Double 'ortogonal slope to recent LS proxy
15
Dim dc As Double 'constant c in: y2 = m2 * x2 + c which is ortogonal to LS proxy through a given point
16
Dim dx2 As Double 'x2 in: y2 = m2 * x2 + c which is ortogonal to LS proxy through a given point
17
Dim dy2 As Double 'y2 in: y2 = m2 * x2 + c which is ortogonal to LS proxy through a given point
18
Dim i As Long, j As Long
19
Dim lcount As Long 'holds current number of live points
20
Dim lcount_orig As Long 'original (starting) number of points
21
Dim lcount_old As Long 'holds number of live points of previous iteration
22
Dim daverage As Double 'average of distances to LS proxy of current iterations' live points
23
Dim dstdev As Double 'standard deviation of distances to LS proxy of current iterations' live points
24
Dim dDistMax As Double
25
Dim lDistMaxIdx As Long
26
27
lcount = rX.Rows.Count
28
If rX.Columns.Count > lcount Then
29
lcount = rX.Columns.Count
30
End If
31
lcount_orig = lcount
32
lcount_old = lcount + 1
33
34
ReDim dDist(1 To lcount) As Double 'store distances of live points to recent LS proxy (line)
35
ReDim dX(1 To lcount) As Double
36
ReDim dY(1 To lcount) As Double 'store coordinates of "live" points during iterations
37
38
39
If rX.Rows.Count > rX.Columns.Count Then
40
For i = 1 To lcount
41
dX(i) = rX.Cells(i, 1)
42
dY(i) = rY.Cells(i, 1)
43
Next i
44
Else
45
For i = 1 To lcount
46
dX(i) = rX.Cells(1, i)
47
dY(i) = rY.Cells(1, i)
48
Next i
49
End If
50
51
Do
52
lcount_old = lcount
53
ReDim Preserve dDist(1 To lcount) As Double 'store distances of live points to recent LS proxy (line)
54
ReDim Preserve dX(1 To lcount) As Double
55
ReDim Preserve dY(1 To lcount) As Double 'store coordinates of "live" points during iterations
56
vLinEst = Application.WorksheetFunction.LinEst(dY, dX, True, True)
57
dDistMax = 0#
58
lDistMaxIdx = 1
59
For i = 1 To lcount
60
'Calculate distances of live points to recent LS proxy
61
dm2 = -1# / vLinEst(1, 1)
62
dc = dY(i) - dX(i) * dm2
63
dx2 = (dc - vLinEst(1, 2)) / (vLinEst(1, 1) - dm2)
64
dy2 = dm2 * dx2 + dc
65
dDist(i) = Sqr((dX(i) - dx2) * (dX(i) - dx2) + (dY(i) - dy2) * (dY(i) - dy2))
66
'remember largest distance and its index
67
If dDist(i) > dDistMax Then
68
dDistMax = dDist(i)
69
lDistMaxIdx = i
70
End If
71
Next i
72
'calculate average and standard deviation of live points' distances to LS proxy
73
daverage = Application.WorksheetFunction.Average(dDist)
74
dstdev = Application.WorksheetFunction.StDev(dDist)
75
' 'kill points with distance > dSigmaFactor * dstdev 'Attention: might erase too many points
76
' j = 1
77
' For i = 1 To lcount
78
' If dDist(i) <= dstdev * dSigmaFactor Then
79
' dX(j) = dX(i)
80
' dY(j) = dY(i)
81
' j = j + 1
82
' Else
83
' Debug.Print "Lcount: " & lcount & ". Throwing out (" & dX(i) & ";" & dY(i) & ")"
84
' End If
85
' Next i
86
' lcount = j - 1
87
'kill point with largest distance > dSigmaFactor * dstdev
88
If dDist(lDistMaxIdx) >= dstdev * dSigmaFactor Then
89
Debug.Print "Lcount: " & lcount & ". Throwing out (" & dX(lDistMaxIdx) & _
90
";" & dY(lDistMaxIdx) & ")"
91
dX(lDistMaxIdx) = dX(lcount)
92
dY(lDistMaxIdx) = dY(lcount)
93
lcount = lcount - 1
94
End If
95
Loop While lcount_old > lcount And lcount / lcount_orig > 1# - dMaxOutlierPercentage
96
97
If lcount < lcount_old Then
98
vLinEst = Application.WorksheetFunction.LinEst(dY, dX, True, True)
99
End If
100
101
sbORB = vLinEst
102
103
End Function
Copied!