Skip to content

Commit 9e5d020

Browse files
remove cholesky (#252)
* remove cholesky * speed up
1 parent eda1b08 commit 9e5d020

File tree

4 files changed

+62
-65
lines changed

4 files changed

+62
-65
lines changed

Project.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
name = "FixedEffectModels"
22
uuid = "9d5cd8c9-2029-5cab-9928-427838db53e3"
3-
version = "1.9.5"
3+
version = "1.10.0"
44

55
[deps]
66
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
@@ -28,5 +28,5 @@ StatsBase = "0.33, 0.34"
2828
StatsFuns = "0.9, 1"
2929
StatsModels = "0.7"
3030
Tables = "1"
31-
Vcov = "0.7"
31+
Vcov = "0.8"
3232
julia = "1.6"

src/fit.jl

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -330,7 +330,7 @@ function StatsAPI.fit(::Type{FixedEffectModel},
330330
notcollinear_fe_z = collinear_fe[find_cols_z(n_exo, n_endo, n_z)] .== false
331331
notcollinear_fe_endo_small = notcollinear_fe_endo[basis_endo]
332332

333-
basis_all = basis(eachcol(Xexo)..., eachcol(Z)..., eachcol(Xendo)...; has_intercept = has_intercept)
333+
basis_all = basis(Xexo, Z, eachcol(Xendo)...; has_intercept = has_intercept)
334334
basis_Xexo = basis_all[1:size(Xexo, 2)] .* notcollinear_fe_exo
335335
basis_Z = basis_all[(size(Xexo, 2) +1):(size(Xexo, 2) + size(Z, 2))] .* notcollinear_fe_z
336336
basis_endo_small = basis_all[(size(Xexo, 2) + size(Z, 2) + 1):end] .* notcollinear_fe_endo_small
@@ -354,7 +354,7 @@ function StatsAPI.fit(::Type{FixedEffectModel},
354354
@info "Endogenous vars collinear with ivs. Recategorized as exogenous: $(out)"
355355

356356
# third pass
357-
basis_all = basis(eachcol(Xexo)..., eachcol(Z)..., eachcol(Xendo)...; has_intercept = has_intercept)
357+
basis_all = basis(Xexo, Z, Xendo; has_intercept = has_intercept)
358358
basis_Xexo = basis_all[1:size(Xexo, 2)]
359359
basis_Z = basis_all[(size(Xexo, 2) +1):(size(Xexo, 2) + size(Z, 2))]
360360
end
@@ -366,22 +366,22 @@ function StatsAPI.fit(::Type{FixedEffectModel},
366366

367367
# Build
368368
newZ = hcat(Xexo, Z)
369-
Pi = ldiv!(cholesky!(Symmetric(newZ'newZ)), newZ'Xendo)
369+
Pi = ls_solve(newZ, Xendo)
370370
Xhat = hcat(Xexo, newZ * Pi)
371371
X = hcat(Xexo, Xendo)
372372

373373
# prepare residuals used for first stage F statistic
374374
## partial out Xendo in place wrt (Xexo, Z)
375375
Xendo_res = BLAS.gemm!('N', 'N', -1.0, newZ, Pi, 1.0, Xendo)
376376
## partial out Z in place wrt Xexo
377-
Pi2 = ldiv!(cholesky!(Symmetric(Xexo'Xexo)), Xexo'Z)
377+
Pi2 = ls_solve(Xexo, Z)
378378
Z_res = BLAS.gemm!('N', 'N', -1.0, Xexo, Pi2, 1.0, Z)
379379
else
380380
# get linearly independent columns
381381
n_exo = size(Xexo, 2)
382382
perm = 1:n_exo
383383
notcollinear_fe_exo = collinear_fe[find_cols_exo(n_exo)] .== false
384-
basis_Xexo = basis(eachcol(Xexo)...; has_intercept = has_intercept) .* notcollinear_fe_exo
384+
basis_Xexo = basis(Xexo; has_intercept = has_intercept) .* notcollinear_fe_exo
385385
Xexo = getcols(Xexo, basis_Xexo)
386386
Xhat = Xexo
387387
X = Xexo
@@ -393,9 +393,11 @@ function StatsAPI.fit(::Type{FixedEffectModel},
393393
## Do the regression
394394
##
395395
##############################################################################
396-
397-
crossx = cholesky!(Symmetric(Xhat'Xhat))
398-
coef = ldiv!(crossx, Xhat'y)
396+
crossx = Xhat' * Xhat
397+
Xy = Symmetric(hvcat(2, crossx, Xhat'y, zeros(size(Xhat, 2))', [0.0]))
398+
invsym!(Xy; diagonal = 1:size(Xhat, 2))
399+
invcrossx = Symmetric(.- Xy[1:(end-1),1:(end-1)])
400+
coef = Xy[1:(end-1),end]
399401

400402
##############################################################################
401403
##
@@ -453,9 +455,8 @@ function StatsAPI.fit(::Type{FixedEffectModel},
453455
end
454456

455457
# Compute standard error
456-
vcov_data = Vcov.VcovData(Xhat, crossx, residuals, nobs - size(X, 2) - dof_fes)
458+
vcov_data = Vcov.VcovData(Xhat, crossx, invcrossx, residuals, nobs - size(X, 2) - dof_fes)
457459
matrix_vcov = StatsAPI.vcov(vcov_data, vcov_method)
458-
459460
# Compute Fstat
460461
F = Fstat(coef, matrix_vcov, has_intercept)
461462
# dof_ is the number of estimated coefficients beyond the intercept.
@@ -465,15 +466,15 @@ function StatsAPI.fit(::Type{FixedEffectModel},
465466
# Compute Fstat of First Stage
466467
if has_iv && first_stage
467468
Pip = Pi[(size(Pi, 1) - size(Z_res, 2) + 1):end, :]
468-
try
469+
#try
469470
r_kp = Vcov.ranktest!(Xendo_res, Z_res, Pip,
470471
vcov_method, size(X, 2), dof_fes)
471472
p_kp = chisqccdf(size(Z_res, 2) - size(Xendo_res, 2) + 1, r_kp)
472473
F_kp = r_kp / size(Z_res, 2)
473-
catch
474-
@info "ranktest failed ; first-stage statistics not estimated"
475-
p_kp, F_kp = NaN, NaN
476-
end
474+
# catch
475+
# @info "ranktest failed ; first-stage statistics not estimated"
476+
# p_kp, F_kp = NaN, NaN
477+
# end
477478
end
478479

479480
# Compute rss, tss

src/utils/basecol.jl

Lines changed: 42 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,64 +1,65 @@
1-
##############################################################################
2-
##
3-
## Returns base of [A B C ...]
4-
## Important: it must be the case that it returns the in order, that is [A B A] returns [true true false] not [false true true]
5-
##
6-
##
7-
##############################################################################
8-
function basis(@nospecialize(xs::AbstractVector...); has_intercept = false)
9-
invXX = invsym!(crossprod(collect(xs)); has_intercept = has_intercept)
10-
return diag(invXX) .> 0
11-
end
12-
13-
function crossprod(xs::Vector{<:AbstractVector})
14-
XX = zeros(length(xs), length(xs))
15-
for i in 1:length(xs)
16-
for j in 1:i
17-
XX[i, j] = xs[i]' * xs[j]
18-
end
19-
end
1+
# create the matrix X'X
2+
function crossprod(xs...)
3+
idx = vcat(0, cumsum(size(x, 2) for x in xs))
4+
XX = zeros(idx[end], idx[end])
205
for i in 1:length(xs)
21-
for j in (i+1):length(xs)
22-
XX[i, j] = XX[j, i]
6+
for j in i:length(xs)
7+
XX[(idx[i]+1):idx[i+1], (idx[j]+1):idx[j+1]] .= xs[i]' * xs[j]
238
end
249
end
25-
return XX
10+
return Symmetric(XX, :U)
2611
end
2712

2813
# generalized 2inverse
29-
function invsym!(X::AbstractMatrix; has_intercept = false)
30-
# Options from SAS
31-
# The C value adjusts the check to the relative scale of the variable.
32-
# The C value is equal to the corrected sum of squares for the variable, unless the corrected sum of squares is 0, in which case C is 1.
33-
# If you specify the NOINT option but not the ABSORB statement, PROC GLM uses the uncorrected sum of squares instead.
34-
# The default value of the SINGULAR= option, 107, might be too small, but this value is necessary in order to handle the high-degree polynomials used in the literature to compare regression routines
14+
#actually return minus the symmetric
15+
function invsym!(X::Symmetric; has_intercept = false, setzeros = false, diagonal = 1:size(X, 2))
3516
tols = max.(diag(X), 1)
36-
for j in 1:size(X, 1)
17+
buffer = zeros(size(X, 1))
18+
for j in diagonal
3719
d = X[j,j]
38-
if abs(d) < tols[j] * sqrt(eps())
39-
X[j,:] .= 0
40-
X[:,j] .= 0
20+
if setzeros && abs(d) < tols[j] * sqrt(eps())
21+
X.data[1:j,j] .= 0
22+
X.data[j,(j+1):end] .= 0
4123
else
42-
X[j,:] = X[j,:] ./ d
43-
for i in 1:size(X, 1)
44-
if i != j
45-
X[i,:] .= X[i,:] .- X[i,j] .* X[j,:]
46-
X[i,j] = -X[i,j] / d
47-
end
48-
end
49-
X[j,j] = 1 / d
24+
# used to mimic SAS; now similar to SweepOperators
25+
copy!(buffer, view(X, :, j))
26+
Symmetric(BLAS.syrk!('U', 'N', -1/d, buffer, one(eltype(X)), X.data))
27+
rmul!(buffer, 1 / d)
28+
@views copy!(X.data[1:j-1,j], buffer[1:j-1])
29+
@views copy!(X.data[j, j+1:end], buffer[j+1:end])
30+
X[j,j] = - 1 / d
5031
end
51-
if has_intercept && j == 1
32+
if setzeros && has_intercept && j == 1
5233
tols = max.(diag(X), 1)
5334
end
5435
end
5536
return X
5637
end
5738

39+
## Returns base of [A B C ...]
40+
## Important: it must be the case that it returns the in order, that is [A B A] returns [true true false] not [false true true]
41+
function basis(xs...; has_intercept = false)
42+
invXX = invsym!(crossprod(xs...); has_intercept = has_intercept, setzeros = true)
43+
return diag(invXX) .< 0
44+
end
45+
5846
function getcols(X::AbstractMatrix, basecolX::AbstractVector)
5947
sum(basecolX) == size(X, 2) ? X : X[:, basecolX]
6048
end
6149

50+
51+
function ls_solve(X, y::AbstractVector)
52+
Xy = crossprod(X, y)
53+
invsym!(Xy, diagonal = 1:size(X, 2))
54+
return Xy[1:size(X, 2),end]
55+
end
56+
57+
function ls_solve(X, Y::AbstractMatrix)
58+
XY = crossprod(X, Y)
59+
invsym!(XY, diagonal = 1:size(X, 2))
60+
return XY[1:size(X, 2),(end-size(Y, 2)+1):end]
61+
end
62+
6263
##############################################################################
6364
# Auxiliary functions to find columns of exogeneous, endogenous and IV variables
6465
##############################################################################

test/fit.jl

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
using CUDA, Metal, FixedEffectModels, CategoricalArrays, CSV, DataFrames, Test, LinearAlgebra
1+
using FixedEffectModels, CategoricalArrays, CSV, DataFrames, Test, LinearAlgebra
22
using FixedEffectModels: nullloglikelihood_within
3-
3+
using CUDA, Metal
44

55
##############################################################################
66
##
@@ -723,7 +723,6 @@ end
723723

724724
@testset "missings" begin
725725

726-
# Missing
727726
df1 = DataFrame(a=[1.0, 2.0, 3.0, 4.0], b=[5.0, 7.0, 11.0, 13.0])
728727
df2 = DataFrame(a=[1.0, missing, 3.0, 4.0], b=[5.0, 7.0, 11.0, 13.0])
729728
x = reg(df1, @formula(a ~ b))
@@ -742,7 +741,3 @@ end
742741
x = reg(df1, @formula(a ~ b + fe(c)))
743742
@test coef(x) [0.5] atol = 1e-4
744743
end
745-
746-
747-
748-

0 commit comments

Comments
 (0)