Skip to content

Commit eda1b08

Browse files
rmv combination (#250)
* rmv combination * Update README.md * Update README.md
1 parent 09eb646 commit eda1b08

File tree

7 files changed

+53
-70
lines changed

7 files changed

+53
-70
lines changed

Project.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
name = "FixedEffectModels"
22
uuid = "9d5cd8c9-2029-5cab-9928-427838db53e3"
3-
version = "1.9.4"
3+
version = "1.9.5"
44

55
[deps]
66
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
@@ -19,7 +19,7 @@ Vcov = "ec2bfdc2-55df-4fc9-b9ae-4958c2cf2486"
1919

2020
[compat]
2121
DataFrames = "0.21, 0.22, 1"
22-
FixedEffects = "2"
22+
FixedEffects = "2.3"
2323
PrecompileTools = "1"
2424
Reexport = "0.1, 0.2, 1"
2525
Statistics = "1"

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ reg(df, @formula(Sales ~ NDI + fe(State) + fe(Year)), Vcov.cluster(:State), weig
3232
```
3333

3434

35-
- A typical formula is composed of one dependent variable, exogeneous variables, endogeneous variables, instrumental variables, and a set of high-dimensional fixed effects.
35+
- A typical formula is composed of one dependent variable, exogenous variables, endogenous variables, instrumental variables, and a set of high-dimensional fixed effects.
3636

3737
```julia
3838
dependent variable ~ exogenous variables + (endogenous variables ~ instrumental variables) + fe(fixedeffect variable)
@@ -49,7 +49,7 @@ reg(df, @formula(Sales ~ NDI + fe(State) + fe(Year)), Vcov.cluster(:State), weig
4949
reg(df, @formula(Sales ~ (Price ~ Pimin)))
5050
```
5151

52-
To construct formula programatically, use
52+
To construct formula programmatically, use
5353
```julia
5454
reg(df, term(:Sales) ~ term(:NDI) + fe(:State) + fe(:Year))
5555
```
@@ -111,7 +111,7 @@ df = dataset("plm", "Cigar")
111111
reg(df, @formula(Sales ~ NDI + fe(State) + fe(Year)), method = :CUDA)
112112
```
113113

114-
The package also supports Apple GPUs with `Metal.jl`, although it does not really improve perfomances
114+
The package also supports Apple GPUs with `Metal.jl`, although I could not find a way to get better performance
115115
```julia
116116
using Metal, FixedEffectModels
117117
@assert Metal.functional()

src/fit.jl

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -259,21 +259,20 @@ function StatsAPI.fit(::Type{FixedEffectModel},
259259
# initialize iterations and converged
260260
iterations = Int[]
261261
convergeds = Bool[]
262+
cols = vcat(eachcol(y), eachcol(Xexo))
262263
if has_iv
263-
Xall = Combination(y, Xexo, Xendo, Z)
264-
else
265-
Xall = Combination(y, Xexo)
264+
append!(cols, eachcol(Xendo), eachcol(Z))
266265
end
267266

268267
# compute 2-norm (sum of squares) for each variable
269268
# (to see if they are collinear with the fixed effects)
270-
sumsquares_pre = [sum(abs2, x) for x in eachcol(Xall)]
269+
sumsquares_pre = [sum(abs2, x) for x in cols]
271270

272271
# partial out fixed effects
273-
_, iterations, convergeds = solve_residuals!(Xall, feM; maxiter = maxiter, tol = tol, progress_bar = progress_bar)
272+
_, iterations, convergeds = solve_residuals!(cols, feM; maxiter = maxiter, tol = tol, progress_bar = progress_bar)
274273

275274
# re-compute 2-norm (sum of squares) for each variable
276-
sumsquares_post = [sum(abs2, x) for x in eachcol(Xall)]
275+
sumsquares_post = [sum(abs2, x) for x in cols]
277276

278277
# mark variables that are likely to be collinear with the fixed effects
279278
collinear_tol = min(1e-6, tol / 10)

src/partial_out.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ function partial_out(
112112
formula_x_schema = apply_schema(formula_x, schema(formula_x, subdf, contrasts), StatisticalModel)
113113
X = convert(Matrix{Float64}, modelmatrix(formula_x_schema, subdf))
114114
if has_fes
115-
X, b, c = solve_residuals!(X, feM; maxiter = maxiter, tol = tol, progress_bar = false)
115+
_, b, c = solve_residuals!(eachcol(X), feM; maxiter = maxiter, tol = tol, progress_bar = false)
116116
append!(iterations, b)
117117
append!(convergeds, c)
118118
end
@@ -122,7 +122,7 @@ function partial_out(
122122
end
123123
# Compute residuals
124124
if size(X, 2) > 0
125-
mul!(Y, X, X\Y, -1.0, 1.0)
125+
mul!(Y, X, X \ Y, -1.0, 1.0)
126126
end
127127
residuals = Y
128128

src/utils/basecol.jl

Lines changed: 5 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,3 @@
1-
##############################################################################
2-
##
3-
## Combination behaves like [A B C ...] without forming it
4-
##
5-
##############################################################################
6-
7-
struct Combination{T} <: AbstractMatrix{T}
8-
A::Tuple
9-
cumlength::Vector{Int}
10-
end
11-
12-
function Combination(A::Union{AbstractVector{T}, AbstractMatrix{T}}...) where {T}
13-
Combination{T}(A, cumsum([size(x, 2) for x in A]))
14-
end
15-
16-
Combination() = error("`Combination` requires at least one argument")
17-
18-
Base.size(c::Combination) = (size(c.A[1], 1), c.cumlength[end])
19-
Base.size(c::Combination, i::Integer) = size(c)[i]
20-
21-
function Base.view(c::Combination, ::Colon, j)
22-
index = searchsortedfirst(c.cumlength, j)
23-
newj = index == 1 ? j : j - c.cumlength[index-1]
24-
view(c.A[index], :, newj)
25-
end
26-
27-
281
##############################################################################
292
##
303
## Returns base of [A B C ...]
@@ -54,7 +27,11 @@ end
5427

5528
# generalized 2inverse
5629
function invsym!(X::AbstractMatrix; has_intercept = false)
57-
# The C value adjusts the check to the relative scale of the variable. The C value is equal to the corrected sum of squares for the variable, unless the corrected sum of squares is 0, in which case C is 1. If you specify the NOINT option but not the ABSORB statement, PROC GLM uses the uncorrected sum of squares instead. The default value of the SINGULAR= option, 107, might be too small, but this value is necessary in order to handle the high-degree polynomials used in the literature to compare regression routin
30+
# Options from SAS
31+
# The C value adjusts the check to the relative scale of the variable.
32+
# The C value is equal to the corrected sum of squares for the variable, unless the corrected sum of squares is 0, in which case C is 1.
33+
# If you specify the NOINT option but not the ABSORB statement, PROC GLM uses the uncorrected sum of squares instead.
34+
# The default value of the SINGULAR= option, 107, might be too small, but this value is necessary in order to handle the high-degree polynomials used in the literature to compare regression routines
5835
tols = max.(diag(X), 1)
5936
for j in 1:size(X, 1)
6037
d = X[j,j]
@@ -82,7 +59,6 @@ function getcols(X::AbstractMatrix, basecolX::AbstractVector)
8259
sum(basecolX) == size(X, 2) ? X : X[:, basecolX]
8360
end
8461

85-
8662
##############################################################################
8763
# Auxiliary functions to find columns of exogeneous, endogenous and IV variables
8864
##############################################################################

test/fit.jl

Lines changed: 29 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -661,13 +661,39 @@ end
661661
@test coef(x) [- 0.11752306001586807] atol = 1e-4
662662
@test x.iterations <= 50
663663

664+
665+
666+
# add tests with missing fixed effects
667+
df.Firm_missing = ifelse.(df.Firm .<= 30, missing, df.Firm)
668+
669+
## test with missing fixed effects
670+
m = @formula Wage ~ Emp + fe(Firm_missing)
671+
x = reg(df, m)
672+
@test coef(x) [-.1093657] atol = 1e-4
673+
@test stderror(x) [.032949 ] atol = 1e-4
674+
@test r2(x) 0.8703 atol = 1e-2
675+
@test adjr2(x) 0.8502 atol = 1e-2
676+
@test x.nobs == 821
677+
678+
## test with missing interaction
679+
df.Year2 = df.Year .>= 1980
680+
m = @formula Wage ~ Emp + fe(Firm_missing) & fe(Year2)
681+
x = reg(df, m)
682+
@test coef(x) [-0.100863] atol = 1e-4
683+
@test stderror(x) [0.04149] atol = 1e-4
684+
@test x.nobs == 821
685+
end
686+
687+
688+
@testset "gpu" begin
664689
methods_vec = [:cpu]
665690
if CUDA.functional()
666691
push!(methods_vec, :CUDA)
667692
end
668-
if Metal.functional()
669-
push!(methods_vec, :Metal)
670-
end
693+
#if Metal.functional()
694+
# push!(methods_vec, :Metal)
695+
#end
696+
df = DataFrame(CSV.File(joinpath(dirname(pathof(FixedEffectModels)), "../dataset/EmplUK.csv")))
671697
for method in methods_vec
672698
# same thing with float32 precision
673699
local m = @formula Wage ~ Emp + fe(Firm)
@@ -692,28 +718,6 @@ end
692718
local x = reg(df, m, weights = :Output, method = method, double_precision = false)
693719
@test coef(x) [- 0.043475472188120416] atol = 1e-3
694720
end
695-
696-
697-
# add tests with missing fixed effects
698-
df.Firm_missing = ifelse.(df.Firm .<= 30, missing, df.Firm)
699-
700-
701-
## test with missing fixed effects
702-
m = @formula Wage ~ Emp + fe(Firm_missing)
703-
x = reg(df, m)
704-
@test coef(x) [-.1093657] atol = 1e-4
705-
@test stderror(x) [.032949 ] atol = 1e-4
706-
@test r2(x) 0.8703 atol = 1e-2
707-
@test adjr2(x) 0.8502 atol = 1e-2
708-
@test x.nobs == 821
709-
710-
## test with missing interaction
711-
df.Year2 = df.Year .>= 1980
712-
m = @formula Wage ~ Emp + fe(Firm_missing) & fe(Year2)
713-
x = reg(df, m)
714-
@test coef(x) [-0.100863] atol = 1e-4
715-
@test stderror(x) [0.04149] atol = 1e-4
716-
@test x.nobs == 821
717721
end
718722

719723

test/predict.jl

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -383,14 +383,18 @@ end
383383
model = @formula Sales ~ (Price ~ Pimin) + fe(State) + fe(Year)
384384
result = reg(df, model, subset = df.State .<= 30, weights = :Pop, save = true)
385385
@test fe(result)[1, :fe_Year] + fe(result)[1, :fe_State] 158.91798 atol = 1e-4
386+
end
387+
386388

389+
@testset "gpu" begin
390+
df = DataFrame(CSV.File(joinpath(dirname(pathof(FixedEffectModels)), "../dataset/Cigar.csv")))
387391
methods_vec = [:cpu]
388392
if CUDA.functional()
389393
push!(methods_vec, :CUDA)
390394
end
391-
if Metal.functional()
392-
push!(methods_vec, :Metal)
393-
end
395+
#if Metal.functional()
396+
# push!(methods_vec, :Metal)
397+
#end
394398
for method in methods_vec
395399
local model = @formula Sales ~ Price + fe(Year)
396400
local result = reg(df, model, save = true, method = method, double_precision = false)

0 commit comments

Comments
 (0)