fboehlandt
06-09-2009, 06:09 AM
Hi everyone,
I'm currently looking for a forward stepwise regression algorithm that I can use in Excel 2007. Ideally, I would like the
code to be replicable for large data ranges. I need to write a macro that automatically conducts multiple stepwise
regression analysis for several hundert data ranges. All online add-ins such as statistiXL allow no macro-recordings.
Thus, i have come up with my own algorithm to calculate specific regression output variables. Unfortunately, I'm no
VBA expert so the code is certainnly not optimal. I takes a couple of seconds to calculate the results. Can anyone
please help me streamline the code below? Or alternatively, does anybody know where I can find an appropriate
script that calculates regression statistics for me? Thanks in advance
Option Explicit
Function STEPREG(rangeY As Range, rangeX As Range, Optional Fcrit1, Optional Fcrit2, Optional tolerance)
'This regression model selects predictor variables to enter (forward regression)
'The relevant statistics are F for X(1) and partial F for X(n)
'Variabels are tested for correlation
'Default F-to-enter = 3.84; F-to-leave = 2.71; tolerance = 0.01
'Note that fixed F-to-enter/F-to-leave limits have no probabilistic meaning
'They correspond roughly to a level of significance of .05
'Input: a one-dimensional range of y, at least one range of predictor variables
'Output: regression statistics
'Comment: all ranges must include a label in the first row
'Comment: ata must be arranged in columns
'Comment: the predictor variables have to be adjacent
'Florian Boehlandt, December 2008
Dim i As Integer, j As Integer, counter As Integer, k As Integer
Dim nrow As Integer, ncol As Integer, iter As Integer
Dim SumSqReg As Single, SumSqErr As Single, MeanSqErr As Single, MeanSqReg As Single
Dim dfR As Integer, dfE As Integer, enter As Integer, leave As Integer, varnum As Integer
Dim result As Variant
nrow = rangeX.Rows.Count
ncol = rangeX.Columns.Count
ReDim dummyX(1 To nrow, 1 To ncol)
ReDim dummyY(1 To nrow, 1 To 1)
ReDim dummyXX(1 To nrow, 1 To ncol)
ReDim dummyXY(1 To nrow, 1 To 1)
ReDim cormat(1 To ncol)
ReDim Fstat(1 To ncol)
ReDim SSE(1 To ncol)
ReDim MSR(1 To ncol)
ReDim MSE(1 To ncol)
ReDim maxmse(1 To ncol)
ReDim Fenter(1 To ncol)
ReDim Fleave(1 To ncol)
ReDim reg(1 To 1)
ReDim mincor(2 To 2)
ReDim out(1 To 1)
ReDim maxFp(1 To 1)
ReDim minFp(1 To 1)
ReDim SSER(1 To 1)
'default critical values
If IsMissing(Fcrit1) Then
Fcrit1 = 3.84
Else
End If
If IsMissing(Fcrit2) Then
Fcrit2 = 2.71
Else
End If
If IsMissing(tolerance) Then
tolerance = 0.01
Else
End If
iter = 1000
'first variable to enter
For j = 1 To ncol
For i = 1 To nrow
dummyX(i, j) = rangeX(j).Rows(i).Value
dummyY(i, 1) = rangeY.Rows(i).Value
Next i
Fstat(j) = REGOUTPUT(dummyY, dummyX, 6, 1)
MSR(j) = REGOUTPUT(dummyY, dummyX, 11, 1)
SSE(j) = REGOUTPUT(dummyY, dummyX, 8, 1)
For i = 1 To nrow
dummyX(i, j) = ""
Next i
Next j
maxFp(1) = WorksheetFunction.Max(Fstat)
minFp(1) = 0
If maxFp(1) <= Fcrit1 Then
' MsgBox "No parameter entered (F <= Fcrit)"
Exit Function
Else
reg(1) = WorksheetFunction.Match(maxFp(1), Fstat, 0)
SSER(1) = SSE(reg(1))
For i = 1 To nrow
dummyX(i, reg(1)) = rangeX.Cells(i, reg(1))
Next i
End If
'number of variabels selected
enter = 1
leave = 0
varnum = enter - leave
k = 1
Do While k < iter And varnum < nrow
k = k + 1
ReDim Preserve reg(1 To k)
ReDim Preserve mincor(2 To k)
ReDim Preserve out(1 To k)
ReDim Preserve maxFp(1 To k)
ReDim Preserve minFp(1 To k)
ReDim Preserve SSER(1 To k)
'variable entered
For j = 1 To ncol
If dummyX(1, j) = "" Then
For i = 1 To nrow
dummyX(i, j) = rangeX.Cells(i, j)
Next i
SSE(j) = REGOUTPUT(dummyY, dummyX, 8, 1)
MSE(j) = SSE(j) / (nrow - varnum - 2)
'F-to-enter
Fenter(j) = (SSER(k - 1) - SSE(j)) / MSE(j)
For i = 1 To nrow
dummyX(i, j) = ""
Next i
Else
End If
Next j
maxFp(k) = WorksheetFunction.Large(Fenter, enter)
reg(k) = WorksheetFunction.Match(maxFp(k), Fenter, 0)
SSER(k) = SSE(reg(k))
maxmse(k) = MSE(reg(k))
If maxFp(k) >= Fcrit1 Then
enter = enter + 1
For i = 1 To nrow
dummyX(i, reg(k)) = rangeX.Cells(i, reg(k))
Next i
Else
Exit Do
End If
'correlation matrix (.01 tolerance)
For j = 1 To ncol
If dummyX(1, j) <> "" Then
For i = 1 To nrow
dummyXY(i, 1) = rangeX.Cells(i, j)
Next i
For counter = 1 To ncol
For i = 1 To nrow
If dummyX(1, counter) <> "" And j <> counter Then
dummyXX(i, counter) = rangeX.Cells(i, counter)
Else
dummyXX(i, counter) = ""
End If
Next i
Next counter
cormat(j) = 1 - REGOUTPUT(dummyXY, dummyXX, 2, 1)
Else
cormat(j) = 1
End If
Next j
mincor(k) = WorksheetFunction.Min(cormat)
If mincor(k) <= tolerance Then
leave = leave + 1
SSER(k) = SSER(k - 1)
For i = 1 To nrow
dummyX(i, reg(k)) = ""
Next i
GoTo Endloop
Else
End If
'variable left
For j = 1 To ncol
If j <> reg(k) And dummyX(1, j) <> "" Then
'F-to-leave
For i = 1 To nrow
dummyX(i, j) = ""
Next i
SSE(j) = REGOUTPUT(dummyY, dummyX, 8, 1)
Fleave(j) = (SSE(j) - SSER(k)) / maxmse(k)
For i = 1 To nrow
dummyX(i, j) = rangeX.Cells(i, j)
Next i
Else
End If
Next j
minFp(k) = WorksheetFunction.Small(Fleave, leave + 1)
out(k) = WorksheetFunction.Match(minFp(k), Fleave, 0)
If minFp(k) <= Fcrit2 Then
leave = leave + 1
SSER(k) = SSER(k - 1)
For i = 1 To nrow
dummyX(i, out(k)) = ""
Next i
Else
End If
varnum = enter - leave
Endloop:
Loop
'regression statistics
result = MULTIREG(dummyY, dummyX)
STEPREG = result
End Function
Function STEPREGOUT(rangeY As Range, rangeX As Range, row As Integer, col As Integer, Optional Fcrit1, _
Optional Fcrit2, Optional tolerance)
Dim result As Variant
result = STEPREG(rangeY, rangeX) ', Fcrit1, Fcrit2, tolerance)
STEPREGOUT = result(row, col)
End Function
Note that subroutines are handed over to two functions called REGOUTPUT and MULTIREG. They calculate the
required regression statistics. THe code is as follows:
Option Explicit
Function MULTIREG(rangeY As Variant, rangeX As Variant)
'Input: at most one dependent variables, at least one range of explanatory variables
'Output: regression statistics
'Comment: data has to be arranged in cols
'Comment: no missing values allowed (within)
'Comment: series can be of different lenghts
'Comment: input range has to be the same number of rows for x and y
'Florian Boehlandt, December 2008
Dim df As Integer
Dim i As Integer, j As Integer, countcol As Integer, countrow As Integer
Dim ny As Integer, nx As Integer, col As Integer, ncol As Integer
Dim alpha As Long
Dim frow As Integer, lRow As Integer, Obs As Integer
Dim ymean As Single, yvarp As Single, yvar As Single
Dim xt As Variant, xtx As Variant, yt As Variant, xty As Variant
Dim xtxinv As Variant, coeff As Variant
Dim yhat As Variant
Dim SSE As Single
Dim loglike As Single, akaike As Single, schwarz As Single, hannan As Single
Dim skewness As Single, kurtosis As Single, jarber As Single, jarberp As Single
Dim chisqd As Variant
Dim durwat As Single
Dim durlim As Variant
Dim MSE As Single, regsq As Single, adjregsq As Single, mreg As Single, SEE As Single
Dim totvar As Single, SSR As Single, MSR As Single, Fstat As Single, pvalueF As Single
Dim rngout As Integer
'Errorhandler for array data / range data
If TypeOf rangeX Is Excel.Range And TypeOf rangeY Is Excel.Range Then
ny = rangeY.Rows.Count
nx = rangeX.Rows.Count
col = rangeY.Columns.Count
ncol = rangeX.Columns.Count
ElseIf IsArray(rangeX) And IsArray(rangeY) Then
ny = UBound(rangeY, 1)
nx = UBound(rangeX, 1)
col = UBound(rangeY, 2)
ncol = UBound(rangeX, 2)
End If
'Error message: y-Range
If Not col = 1 Then
MsgBox "Y Range does not match specifications!"
End If
'Error message: identical row numbers
If Not (ny = nx) Then
MsgBox "Data ranges don't match!"
Exit Function
End If
alpha = 0.05
frow = WorksheetFunction.Max(FIRSTINCOLUMN(rangeY), FIRSTINCOLUMN(rangeX)) + 1
lRow = WorksheetFunction.Min(LASTINCOLUMN(rangeY), LASTINCOLUMN(rangeX))
Obs = lRow + 1 - frow
ReDim yfactor(1 To Obs)
ReDim yfactor2(1 To Obs, 1)
ReDim ydiff(1 To Obs)
ReDim ydiffsq(1 To Obs)
ReDim erry(1 To Obs)
ReDim errsq(1 To Obs)
ReDim xfactor(1 To Obs, 1 To 1)
ReDim contr(1 To ncol)
ReDim coeffnam(1 To 2)
countcol = 1
For j = 1 To ncol
contr(j) = rangeX(frow, j)
If contr(j) = "" Then GoTo Nextj
countcol = countcol + 1
ReDim Preserve xfactor(1 To Obs, 1 To countcol)
ReDim Preserve coeffnam(1 To countcol)
coeffnam(countcol) = rangeX(1, j)
countrow = 0
For i = frow To lRow
countrow = countrow + 1
yfactor(countrow) = rangeY(i, 1)
xfactor(countrow, countcol) = rangeX(i, j)
Next i
Nextj:
Next j
'Error message: too many regressors
If Not (nx - countcol) >= 3 Then
MsgBox "There are too many predictor variables for the number of datapoints for each series!"
Exit Function
End If
ymean = WorksheetFunction.Average(yfactor)
yvarp = WorksheetFunction.VarP(yfactor)
yvar = WorksheetFunction.Var(yfactor)
coeffnam(1) = "Intercept"
For i = 1 To Obs
xfactor(i, 1) = 1
ydiff(i) = (yfactor(i) - ymean)
ydiffsq(i) = ydiff(i) ^ 2
Next i
'degrees of freedom (with intercept)
df = Obs - countcol
'matrix calculations
xt = WorksheetFunction.Transpose(xfactor)
xtx = WorksheetFunction.MMult(xt, xfactor)
xtxinv = WorksheetFunction.MInverse(xtx)
yt = WorksheetFunction.Transpose(yfactor)
xty = WorksheetFunction.MMult(xt, yt)
'coefficient estimates
coeff = WorksheetFunction.MMult(xtxinv, xty)
yhat = WorksheetFunction.MMult(xfactor, coeff)
'errors squared
For i = 1 To Obs
yfactor2(i, 1) = yfactor(i)
erry(i) = (yfactor2(i, 1) - yhat(i, 1))
errsq(i) = erry(i) ^ 2
Next i
SSE = WorksheetFunction.Sum(errsq)
loglike = Obs / 2 * (1 + WorksheetFunction.Ln(2 * WorksheetFunction.Pi()) + WorksheetFunction.Ln(SSE / Obs))
akaike = 2 * loglike / Obs + 2 * countcol / Obs
schwarz = 2 * loglike / Obs + countcol * WorksheetFunction.Ln(Obs) / Obs
hannan = 2 * loglike / Obs + 2 * countcol * WorksheetFunction.Ln(WorksheetFunction.Ln(Obs)) / Obs
'normality
skewness = WorksheetFunction.Skew(erry)
kurtosis = WorksheetFunction.Kurt(erry)
jarber = (Obs / 6) * (skewness ^ 2 + kurtosis ^ 2 / 4)
jarberp = WorksheetFunction.ChiDist(jarber, 2)
chisqd = CHINORMAL(erry)
'durbin-watson statistic
ReDim corr(2 To Obs)
For i = 2 To Obs
corr(i) = (erry(i) - erry(i - 1)) ^ 2
Next i
durwat = WorksheetFunction.Sum(corr) / WorksheetFunction.Sum(errsq)
durlim = DURBIN(Obs, countcol)
'heteroscedasticity
ReDim xfactors(1 To Obs)
ReDim szroeterp(2 To countcol)
For i = 2 To countcol
For j = 1 To Obs
xfactors(j) = xfactor(j, i)
Next j
szroeterp(i) = SZROETER(yfactor, xfactors)
Next i
'goodness of fit
MSE = SSE / df
regsq = 1 - SSE / Obs / yvarp
adjregsq = 1 - MSE / yvar
mreg = (regsq) ^ (1 / 2)
SEE = MSE ^ (1 / 2)
totvar = yvar * (Obs - 1)
SSR = totvar - SSE
MSR = SSR / (countcol - 1)
Fstat = MSR / MSE
pvalueF = WorksheetFunction.FDist(Fstat, countcol - 1, df)
'error of coefficient estimates and confidence level estimator
ReDim covmat(1 To countcol, 1 To countcol)
ReDim coefferr(1 To countcol)
ReDim tstat(1 To countcol)
ReDim pvaluet(1 To countcol)
ReDim lowconf(1 To countcol)
ReDim uppconf(1 To countcol)
For i = 1 To countcol
For j = 1 To countcol
covmat(i, j) = xtxinv(i, j) * MSE
Next j
coefferr(i) = covmat(i, i) ^ (1 / 2)
tstat(i) = coeff(i, 1) / coefferr(i)
pvaluet(i) = WorksheetFunction.TDist(Abs(tstat(i)), df, 2)
lowconf(i) = coeff(i, 1) - WorksheetFunction.TInv(alpha, df) * coefferr(i)
uppconf(i) = coeff(i, 1) + WorksheetFunction.TInv(alpha, df) * coefferr(i)
Next i
'correlation matrix of coefficients
ReDim cormat(1 To countcol, 1 To countcol)
For i = 1 To countcol
For j = 1 To countcol
cormat(i, j) = covmat(i, j) / (coefferr(i) * coefferr(j))
Next j
Next i
rngout = WorksheetFunction.Max(11, countcol)
ReDim result(1 To rngout, 1 To 8)
result(1, 1) = mreg
result(2, 1) = regsq
result(3, 1) = adjregsq
result(4, 1) = SEE
result(5, 1) = Obs
result(6, 1) = Fstat
result(7, 1) = pvalueF
result(8, 1) = SSE
result(9, 1) = SSR
result(10, 1) = MSE
result(11, 1) = MSR
For i = 1 To countcol
result(i, 2) = coeffnam(i)
result(i, 3) = coeff(i, 1)
result(i, 4) = pvaluet(i)
Next i
For i = 2 To countcol
result(i, 5) = szroeterp(i)
Next i
result(1, 6) = durwat
For i = 2 To 5
result(i, 6) = durlim(i - 1)
Next i
result(1, 7) = jarber
result(2, 7) = jarberp
For i = 3 To 4
result(i, 7) = chisqd(i - 2)
Next i
result(1, 8) = akaike
result(2, 8) = schwarz
result(3, 8) = hannan
MULTIREG = result
End Function
Function REGOUTPUT(rangeY As Variant, rangeX As Variant, row As Integer, col As Integer)
Dim result As Variant
result = MULTIREG(rangeY, rangeX)
REGOUTPUT = result(row, col)
End Function
I have omitted some functions that were referred to above (like the oen for the DUrbin-Watson stat). They should
not have much impact on the time of the code execution.
I'm currently looking for a forward stepwise regression algorithm that I can use in Excel 2007. Ideally, I would like the
code to be replicable for large data ranges. I need to write a macro that automatically conducts multiple stepwise
regression analysis for several hundert data ranges. All online add-ins such as statistiXL allow no macro-recordings.
Thus, i have come up with my own algorithm to calculate specific regression output variables. Unfortunately, I'm no
VBA expert so the code is certainnly not optimal. I takes a couple of seconds to calculate the results. Can anyone
please help me streamline the code below? Or alternatively, does anybody know where I can find an appropriate
script that calculates regression statistics for me? Thanks in advance
Option Explicit
Function STEPREG(rangeY As Range, rangeX As Range, Optional Fcrit1, Optional Fcrit2, Optional tolerance)
'This regression model selects predictor variables to enter (forward regression)
'The relevant statistics are F for X(1) and partial F for X(n)
'Variabels are tested for correlation
'Default F-to-enter = 3.84; F-to-leave = 2.71; tolerance = 0.01
'Note that fixed F-to-enter/F-to-leave limits have no probabilistic meaning
'They correspond roughly to a level of significance of .05
'Input: a one-dimensional range of y, at least one range of predictor variables
'Output: regression statistics
'Comment: all ranges must include a label in the first row
'Comment: ata must be arranged in columns
'Comment: the predictor variables have to be adjacent
'Florian Boehlandt, December 2008
Dim i As Integer, j As Integer, counter As Integer, k As Integer
Dim nrow As Integer, ncol As Integer, iter As Integer
Dim SumSqReg As Single, SumSqErr As Single, MeanSqErr As Single, MeanSqReg As Single
Dim dfR As Integer, dfE As Integer, enter As Integer, leave As Integer, varnum As Integer
Dim result As Variant
nrow = rangeX.Rows.Count
ncol = rangeX.Columns.Count
ReDim dummyX(1 To nrow, 1 To ncol)
ReDim dummyY(1 To nrow, 1 To 1)
ReDim dummyXX(1 To nrow, 1 To ncol)
ReDim dummyXY(1 To nrow, 1 To 1)
ReDim cormat(1 To ncol)
ReDim Fstat(1 To ncol)
ReDim SSE(1 To ncol)
ReDim MSR(1 To ncol)
ReDim MSE(1 To ncol)
ReDim maxmse(1 To ncol)
ReDim Fenter(1 To ncol)
ReDim Fleave(1 To ncol)
ReDim reg(1 To 1)
ReDim mincor(2 To 2)
ReDim out(1 To 1)
ReDim maxFp(1 To 1)
ReDim minFp(1 To 1)
ReDim SSER(1 To 1)
'default critical values
If IsMissing(Fcrit1) Then
Fcrit1 = 3.84
Else
End If
If IsMissing(Fcrit2) Then
Fcrit2 = 2.71
Else
End If
If IsMissing(tolerance) Then
tolerance = 0.01
Else
End If
iter = 1000
'first variable to enter
For j = 1 To ncol
For i = 1 To nrow
dummyX(i, j) = rangeX(j).Rows(i).Value
dummyY(i, 1) = rangeY.Rows(i).Value
Next i
Fstat(j) = REGOUTPUT(dummyY, dummyX, 6, 1)
MSR(j) = REGOUTPUT(dummyY, dummyX, 11, 1)
SSE(j) = REGOUTPUT(dummyY, dummyX, 8, 1)
For i = 1 To nrow
dummyX(i, j) = ""
Next i
Next j
maxFp(1) = WorksheetFunction.Max(Fstat)
minFp(1) = 0
If maxFp(1) <= Fcrit1 Then
' MsgBox "No parameter entered (F <= Fcrit)"
Exit Function
Else
reg(1) = WorksheetFunction.Match(maxFp(1), Fstat, 0)
SSER(1) = SSE(reg(1))
For i = 1 To nrow
dummyX(i, reg(1)) = rangeX.Cells(i, reg(1))
Next i
End If
'number of variabels selected
enter = 1
leave = 0
varnum = enter - leave
k = 1
Do While k < iter And varnum < nrow
k = k + 1
ReDim Preserve reg(1 To k)
ReDim Preserve mincor(2 To k)
ReDim Preserve out(1 To k)
ReDim Preserve maxFp(1 To k)
ReDim Preserve minFp(1 To k)
ReDim Preserve SSER(1 To k)
'variable entered
For j = 1 To ncol
If dummyX(1, j) = "" Then
For i = 1 To nrow
dummyX(i, j) = rangeX.Cells(i, j)
Next i
SSE(j) = REGOUTPUT(dummyY, dummyX, 8, 1)
MSE(j) = SSE(j) / (nrow - varnum - 2)
'F-to-enter
Fenter(j) = (SSER(k - 1) - SSE(j)) / MSE(j)
For i = 1 To nrow
dummyX(i, j) = ""
Next i
Else
End If
Next j
maxFp(k) = WorksheetFunction.Large(Fenter, enter)
reg(k) = WorksheetFunction.Match(maxFp(k), Fenter, 0)
SSER(k) = SSE(reg(k))
maxmse(k) = MSE(reg(k))
If maxFp(k) >= Fcrit1 Then
enter = enter + 1
For i = 1 To nrow
dummyX(i, reg(k)) = rangeX.Cells(i, reg(k))
Next i
Else
Exit Do
End If
'correlation matrix (.01 tolerance)
For j = 1 To ncol
If dummyX(1, j) <> "" Then
For i = 1 To nrow
dummyXY(i, 1) = rangeX.Cells(i, j)
Next i
For counter = 1 To ncol
For i = 1 To nrow
If dummyX(1, counter) <> "" And j <> counter Then
dummyXX(i, counter) = rangeX.Cells(i, counter)
Else
dummyXX(i, counter) = ""
End If
Next i
Next counter
cormat(j) = 1 - REGOUTPUT(dummyXY, dummyXX, 2, 1)
Else
cormat(j) = 1
End If
Next j
mincor(k) = WorksheetFunction.Min(cormat)
If mincor(k) <= tolerance Then
leave = leave + 1
SSER(k) = SSER(k - 1)
For i = 1 To nrow
dummyX(i, reg(k)) = ""
Next i
GoTo Endloop
Else
End If
'variable left
For j = 1 To ncol
If j <> reg(k) And dummyX(1, j) <> "" Then
'F-to-leave
For i = 1 To nrow
dummyX(i, j) = ""
Next i
SSE(j) = REGOUTPUT(dummyY, dummyX, 8, 1)
Fleave(j) = (SSE(j) - SSER(k)) / maxmse(k)
For i = 1 To nrow
dummyX(i, j) = rangeX.Cells(i, j)
Next i
Else
End If
Next j
minFp(k) = WorksheetFunction.Small(Fleave, leave + 1)
out(k) = WorksheetFunction.Match(minFp(k), Fleave, 0)
If minFp(k) <= Fcrit2 Then
leave = leave + 1
SSER(k) = SSER(k - 1)
For i = 1 To nrow
dummyX(i, out(k)) = ""
Next i
Else
End If
varnum = enter - leave
Endloop:
Loop
'regression statistics
result = MULTIREG(dummyY, dummyX)
STEPREG = result
End Function
Function STEPREGOUT(rangeY As Range, rangeX As Range, row As Integer, col As Integer, Optional Fcrit1, _
Optional Fcrit2, Optional tolerance)
Dim result As Variant
result = STEPREG(rangeY, rangeX) ', Fcrit1, Fcrit2, tolerance)
STEPREGOUT = result(row, col)
End Function
Note that subroutines are handed over to two functions called REGOUTPUT and MULTIREG. They calculate the
required regression statistics. THe code is as follows:
Option Explicit
Function MULTIREG(rangeY As Variant, rangeX As Variant)
'Input: at most one dependent variables, at least one range of explanatory variables
'Output: regression statistics
'Comment: data has to be arranged in cols
'Comment: no missing values allowed (within)
'Comment: series can be of different lenghts
'Comment: input range has to be the same number of rows for x and y
'Florian Boehlandt, December 2008
Dim df As Integer
Dim i As Integer, j As Integer, countcol As Integer, countrow As Integer
Dim ny As Integer, nx As Integer, col As Integer, ncol As Integer
Dim alpha As Long
Dim frow As Integer, lRow As Integer, Obs As Integer
Dim ymean As Single, yvarp As Single, yvar As Single
Dim xt As Variant, xtx As Variant, yt As Variant, xty As Variant
Dim xtxinv As Variant, coeff As Variant
Dim yhat As Variant
Dim SSE As Single
Dim loglike As Single, akaike As Single, schwarz As Single, hannan As Single
Dim skewness As Single, kurtosis As Single, jarber As Single, jarberp As Single
Dim chisqd As Variant
Dim durwat As Single
Dim durlim As Variant
Dim MSE As Single, regsq As Single, adjregsq As Single, mreg As Single, SEE As Single
Dim totvar As Single, SSR As Single, MSR As Single, Fstat As Single, pvalueF As Single
Dim rngout As Integer
'Errorhandler for array data / range data
If TypeOf rangeX Is Excel.Range And TypeOf rangeY Is Excel.Range Then
ny = rangeY.Rows.Count
nx = rangeX.Rows.Count
col = rangeY.Columns.Count
ncol = rangeX.Columns.Count
ElseIf IsArray(rangeX) And IsArray(rangeY) Then
ny = UBound(rangeY, 1)
nx = UBound(rangeX, 1)
col = UBound(rangeY, 2)
ncol = UBound(rangeX, 2)
End If
'Error message: y-Range
If Not col = 1 Then
MsgBox "Y Range does not match specifications!"
End If
'Error message: identical row numbers
If Not (ny = nx) Then
MsgBox "Data ranges don't match!"
Exit Function
End If
alpha = 0.05
frow = WorksheetFunction.Max(FIRSTINCOLUMN(rangeY), FIRSTINCOLUMN(rangeX)) + 1
lRow = WorksheetFunction.Min(LASTINCOLUMN(rangeY), LASTINCOLUMN(rangeX))
Obs = lRow + 1 - frow
ReDim yfactor(1 To Obs)
ReDim yfactor2(1 To Obs, 1)
ReDim ydiff(1 To Obs)
ReDim ydiffsq(1 To Obs)
ReDim erry(1 To Obs)
ReDim errsq(1 To Obs)
ReDim xfactor(1 To Obs, 1 To 1)
ReDim contr(1 To ncol)
ReDim coeffnam(1 To 2)
countcol = 1
For j = 1 To ncol
contr(j) = rangeX(frow, j)
If contr(j) = "" Then GoTo Nextj
countcol = countcol + 1
ReDim Preserve xfactor(1 To Obs, 1 To countcol)
ReDim Preserve coeffnam(1 To countcol)
coeffnam(countcol) = rangeX(1, j)
countrow = 0
For i = frow To lRow
countrow = countrow + 1
yfactor(countrow) = rangeY(i, 1)
xfactor(countrow, countcol) = rangeX(i, j)
Next i
Nextj:
Next j
'Error message: too many regressors
If Not (nx - countcol) >= 3 Then
MsgBox "There are too many predictor variables for the number of datapoints for each series!"
Exit Function
End If
ymean = WorksheetFunction.Average(yfactor)
yvarp = WorksheetFunction.VarP(yfactor)
yvar = WorksheetFunction.Var(yfactor)
coeffnam(1) = "Intercept"
For i = 1 To Obs
xfactor(i, 1) = 1
ydiff(i) = (yfactor(i) - ymean)
ydiffsq(i) = ydiff(i) ^ 2
Next i
'degrees of freedom (with intercept)
df = Obs - countcol
'matrix calculations
xt = WorksheetFunction.Transpose(xfactor)
xtx = WorksheetFunction.MMult(xt, xfactor)
xtxinv = WorksheetFunction.MInverse(xtx)
yt = WorksheetFunction.Transpose(yfactor)
xty = WorksheetFunction.MMult(xt, yt)
'coefficient estimates
coeff = WorksheetFunction.MMult(xtxinv, xty)
yhat = WorksheetFunction.MMult(xfactor, coeff)
'errors squared
For i = 1 To Obs
yfactor2(i, 1) = yfactor(i)
erry(i) = (yfactor2(i, 1) - yhat(i, 1))
errsq(i) = erry(i) ^ 2
Next i
SSE = WorksheetFunction.Sum(errsq)
loglike = Obs / 2 * (1 + WorksheetFunction.Ln(2 * WorksheetFunction.Pi()) + WorksheetFunction.Ln(SSE / Obs))
akaike = 2 * loglike / Obs + 2 * countcol / Obs
schwarz = 2 * loglike / Obs + countcol * WorksheetFunction.Ln(Obs) / Obs
hannan = 2 * loglike / Obs + 2 * countcol * WorksheetFunction.Ln(WorksheetFunction.Ln(Obs)) / Obs
'normality
skewness = WorksheetFunction.Skew(erry)
kurtosis = WorksheetFunction.Kurt(erry)
jarber = (Obs / 6) * (skewness ^ 2 + kurtosis ^ 2 / 4)
jarberp = WorksheetFunction.ChiDist(jarber, 2)
chisqd = CHINORMAL(erry)
'durbin-watson statistic
ReDim corr(2 To Obs)
For i = 2 To Obs
corr(i) = (erry(i) - erry(i - 1)) ^ 2
Next i
durwat = WorksheetFunction.Sum(corr) / WorksheetFunction.Sum(errsq)
durlim = DURBIN(Obs, countcol)
'heteroscedasticity
ReDim xfactors(1 To Obs)
ReDim szroeterp(2 To countcol)
For i = 2 To countcol
For j = 1 To Obs
xfactors(j) = xfactor(j, i)
Next j
szroeterp(i) = SZROETER(yfactor, xfactors)
Next i
'goodness of fit
MSE = SSE / df
regsq = 1 - SSE / Obs / yvarp
adjregsq = 1 - MSE / yvar
mreg = (regsq) ^ (1 / 2)
SEE = MSE ^ (1 / 2)
totvar = yvar * (Obs - 1)
SSR = totvar - SSE
MSR = SSR / (countcol - 1)
Fstat = MSR / MSE
pvalueF = WorksheetFunction.FDist(Fstat, countcol - 1, df)
'error of coefficient estimates and confidence level estimator
ReDim covmat(1 To countcol, 1 To countcol)
ReDim coefferr(1 To countcol)
ReDim tstat(1 To countcol)
ReDim pvaluet(1 To countcol)
ReDim lowconf(1 To countcol)
ReDim uppconf(1 To countcol)
For i = 1 To countcol
For j = 1 To countcol
covmat(i, j) = xtxinv(i, j) * MSE
Next j
coefferr(i) = covmat(i, i) ^ (1 / 2)
tstat(i) = coeff(i, 1) / coefferr(i)
pvaluet(i) = WorksheetFunction.TDist(Abs(tstat(i)), df, 2)
lowconf(i) = coeff(i, 1) - WorksheetFunction.TInv(alpha, df) * coefferr(i)
uppconf(i) = coeff(i, 1) + WorksheetFunction.TInv(alpha, df) * coefferr(i)
Next i
'correlation matrix of coefficients
ReDim cormat(1 To countcol, 1 To countcol)
For i = 1 To countcol
For j = 1 To countcol
cormat(i, j) = covmat(i, j) / (coefferr(i) * coefferr(j))
Next j
Next i
rngout = WorksheetFunction.Max(11, countcol)
ReDim result(1 To rngout, 1 To 8)
result(1, 1) = mreg
result(2, 1) = regsq
result(3, 1) = adjregsq
result(4, 1) = SEE
result(5, 1) = Obs
result(6, 1) = Fstat
result(7, 1) = pvalueF
result(8, 1) = SSE
result(9, 1) = SSR
result(10, 1) = MSE
result(11, 1) = MSR
For i = 1 To countcol
result(i, 2) = coeffnam(i)
result(i, 3) = coeff(i, 1)
result(i, 4) = pvaluet(i)
Next i
For i = 2 To countcol
result(i, 5) = szroeterp(i)
Next i
result(1, 6) = durwat
For i = 2 To 5
result(i, 6) = durlim(i - 1)
Next i
result(1, 7) = jarber
result(2, 7) = jarberp
For i = 3 To 4
result(i, 7) = chisqd(i - 2)
Next i
result(1, 8) = akaike
result(2, 8) = schwarz
result(3, 8) = hannan
MULTIREG = result
End Function
Function REGOUTPUT(rangeY As Variant, rangeX As Variant, row As Integer, col As Integer)
Dim result As Variant
result = MULTIREG(rangeY, rangeX)
REGOUTPUT = result(row, col)
End Function
I have omitted some functions that were referred to above (like the oen for the DUrbin-Watson stat). They should
not have much impact on the time of the code execution.