fix collinearity + update CUDA (#241)

matthieugomez · web-flow · commit 6d23721aa865 · 2023-07-26T08:36:43.000-05:00
* fix collinearity +  update CUDA

* update help

* Update README.md

* Update README.md

* Update ci.yml

* Update Project.toml

* Update Project.toml

* correct CUDA spelling
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -15,7 +15,7 @@ jobs:
       fail-fast: false
       matrix:
         version:
-          - '1.6' # Replace this with the minimum Julia version that your package supports. E.g. if your package requires Julia 1.5 or higher, change this to '1.5'.
+          - '1.9' # Replace this with the minimum Julia version that your package supports. E.g. if your package requires Julia 1.5 or higher, change this to '1.5'.
           - '1' # Leave this line unchanged. '1' will automatically expand to the latest stable 1.x release of Julia.
         os:
           - ubuntu-latest
diff --git a/Project.toml b/Project.toml
@@ -1,6 +1,6 @@
 name = "FixedEffectModels"
 uuid = "9d5cd8c9-2029-5cab-9928-427838db53e3"
-version = "1.9.3"
+version = "1.9.4"
 
 [deps]
 DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
diff --git a/README.md b/README.md
@@ -78,7 +78,7 @@ reg(df, @formula(Sales ~ NDI + fe(State) + fe(Year)), Vcov.cluster(:State), weig
 
 - The option `save` can be set to one of the following:  `:none` (default) to save nothing, `:residuals` to save residuals, `:fe` to save fixed effects, and `:all` to save both. Once saved, they can then be accessed using `residuals(m)` or `fe(m)` where `m` is the estimated model (the object returned by the function `reg`). Both residuals and fixed effects are aligned with the original dataframe used to estimate the model.
 
-- The option `method` can be set to one of the following: `:cpu`, `:gpu` (see Performances below).
+- The option `method` can be set to one of the following: `:cpu`, `:CUDA`, or `:Metal` (see Performances below).
 
 
 ## Output
@@ -99,17 +99,27 @@ You may use [RegressionTables.jl](https://github.com/jmboehm/RegressionTables.jl
 ### MultiThreads
 `FixedEffectModels` is multi-threaded. Use the option `nthreads` to select the number of threads to use in the estimation (defaults to `Threads.nthreads()`).
 
-### Nvidia GPU
-The package has support for Nvidia GPUs  (thanks to Paul Schrimpf). This can make the package an order of magnitude faster for complicated problems.
+### GPUs
+The package has an experimental support for GPUs. This can make the package an order of magnitude faster for complicated problems.
 
-If you have a Nvidia GPU, run `using CUDA` before `using FixedEffectModels`. Then, estimate a model with `method = :gpu`. For maximum speed, set the floating point precision to `Float32` with `double_precision = false`.
+If you have a Nvidia GPU, run `using CUDA` before `using FixedEffectModels`. Then, estimate a model with `method = :CUDA`.
 
 ```julia
 using CUDA, FixedEffectModels
+@assert CUDA.functional()
 df = dataset("plm", "Cigar")
-reg(df, @formula(Sales ~ NDI + fe(State) + fe(Year)), method = :gpu, double_precision = false)
+reg(df, @formula(Sales ~ NDI + fe(State) + fe(Year)), method = :CUDA)
 ```
 
+The package also supports Apple GPUs with `Metal.jl`, although it does not really improve perfomances
+```julia
+using Metal, FixedEffectModels
+@assert Metal.functional()
+df = dataset("plm", "Cigar")
+reg(df, @formula(Sales ~ NDI + fe(State) + fe(Year)), method = :Metal)
+```
+
+
 
 ## Solution Method
 Denote the model `y = X β + D θ + e` where X is a matrix with few columns and D is the design matrix from categorical variables. Estimates for `β`, along with their standard errors, are obtained in two steps:
diff --git a/src/fit.jl b/src/fit.jl
@@ -10,9 +10,9 @@ Estimate a linear model with high dimensional categorical variables / instrument
 * `contrasts::Dict = Dict()` An optional Dict of contrast codings for each categorical variable in the `formula`.  Any unspecified variables will have `DummyCoding`.
 * `weights::Union{Nothing, Symbol}` A symbol to refer to a columns for weights
 * `save::Symbol`: Should residuals and eventual estimated fixed effects saved in a dataframe? Default to `:none` Use `save = :residuals` to only save residuals, `save = :fe` to only save fixed effects, `save = :all` for both. Once saved, they can then be accessed using `residuals(m)` or `fe(m)` where `m` is the object returned by the estimation. The returned DataFrame is automatically aligned with the original DataFrame.
-* `method::Symbol`: A symbol for the method. Default is :cpu. Alternatively,  :gpu requires `CuArrays`. In this case, use the option `double_precision = false` to use `Float32`.
-* `nthreads::Integer` Number of threads to use in the estimation. If `method = :cpu`, defaults to `Threads.nthreads()`. If `method = :gpu`, defaults to 256.
-* `double_precision::Bool`: Should the demeaning operation use Float64 rather than Float32? Default to true.
+* `method::Symbol`: A symbol for the method. Default is :cpu. Alternatively,  use :CUDA or :Metal  (in this case, you need to import the respective package before importing FixedEffectModels)
+* `nthreads::Integer` Number of threads to use in the estimation. If `method = :cpu`, defaults to `Threads.nthreads()`. Otherwise, defaults to 256.
+* `double_precision::Bool`: Should the demeaning operation use Float64 rather than Float32? Default to true if `method =:cpu' and false if `method = :CUDA` or `method = :Metal`.
 * `tol::Real` Tolerance. Default to 1e-6.
 * `maxiter::Integer = 10000`: Maximum number of iterations
 * `drop_singletons::Bool = true`: Should singletons be dropped?
@@ -54,7 +54,7 @@ function reg(df,
     save::Union{Bool, Symbol} = :none,
     method::Symbol = :cpu,
     nthreads::Integer = method == :cpu ? Threads.nthreads() : 256,
-    double_precision::Bool = true,
+    double_precision::Bool = method == :cpu,
     tol::Real = 1e-6,
     maxiter::Integer = 10000,
     drop_singletons::Bool = true,
@@ -86,6 +86,11 @@ function StatsAPI.fit(::Type{FixedEffectModel},
     df = DataFrame(df; copycols = false)
     N = size(df, 1)
 
+    if method == :gpu
+        info("method = :gpu is deprecated. Use method = :CUDA or method = :Metal")
+        method = :CUDA
+    end
+
     ##############################################################################
     ##
     ## Parse formula
@@ -318,15 +323,15 @@ function StatsAPI.fit(::Type{FixedEffectModel},
 
         # first pass: remove colinear variables in Xendo
         notcollinear_fe_endo = collinear_fe[find_cols_endo(n_exo, n_endo)] .== false
-    	basis_endo = basis(eachcol(Xendo)...) .* notcollinear_fe_endo
+    	basis_endo = basis(eachcol(Xendo)...; has_intercept = false) .* notcollinear_fe_endo
     	Xendo = getcols(Xendo, basis_endo)
 
     	# second pass: remove colinear variable in Xexo, Z, and Xendo
         notcollinear_fe_exo = collinear_fe[find_cols_exo(n_exo)] .== false
         notcollinear_fe_z = collinear_fe[find_cols_z(n_exo, n_endo, n_z)] .== false
         notcollinear_fe_endo_small = notcollinear_fe_endo[basis_endo]
 
-    	basis_all = basis(eachcol(Xexo)..., eachcol(Z)..., eachcol(Xendo)...)
+    	basis_all = basis(eachcol(Xexo)..., eachcol(Z)..., eachcol(Xendo)...; has_intercept = has_intercept)
         basis_Xexo = basis_all[1:size(Xexo, 2)] .* notcollinear_fe_exo
         basis_Z = basis_all[(size(Xexo, 2) +1):(size(Xexo, 2) + size(Z, 2))] .* notcollinear_fe_z
         basis_endo_small = basis_all[(size(Xexo, 2) + size(Z, 2) + 1):end] .* notcollinear_fe_endo_small
@@ -350,7 +355,7 @@ function StatsAPI.fit(::Type{FixedEffectModel},
             @info "Endogenous vars collinear with ivs. Recategorized as exogenous: $(out)"
                                     
             # third pass
-            basis_all = basis(eachcol(Xexo)..., eachcol(Z)..., eachcol(Xendo)...)
+            basis_all = basis(eachcol(Xexo)..., eachcol(Z)..., eachcol(Xendo)...; has_intercept = has_intercept)
             basis_Xexo = basis_all[1:size(Xexo, 2)]
             basis_Z = basis_all[(size(Xexo, 2) +1):(size(Xexo, 2) + size(Z, 2))]
         end
@@ -377,7 +382,7 @@ function StatsAPI.fit(::Type{FixedEffectModel},
         n_exo = size(Xexo, 2)
         perm = 1:n_exo
         notcollinear_fe_exo = collinear_fe[find_cols_exo(n_exo)] .== false
-        basis_Xexo = basis(eachcol(Xexo)...) .* notcollinear_fe_exo
+        basis_Xexo = basis(eachcol(Xexo)...; has_intercept = has_intercept) .* notcollinear_fe_exo
         Xexo = getcols(Xexo, basis_Xexo)
         Xhat = Xexo
         X = Xexo
diff --git a/src/utils/basecol.jl b/src/utils/basecol.jl
@@ -30,8 +30,8 @@ end
 ## 
 ##
 ##############################################################################
-function basis(@nospecialize(xs::AbstractVector...))
-    invXX = invsym!(crossprod(collect(xs)))
+function basis(@nospecialize(xs::AbstractVector...); has_intercept = false)
+    invXX = invsym!(crossprod(collect(xs)); has_intercept = has_intercept)
     return diag(invXX) .> 0
 end
 
@@ -51,7 +51,7 @@ function crossprod(xs::Vector{<:AbstractVector})
 end
 
 # generalized 2inverse
-function invsym!(X::AbstractMatrix)
+function invsym!(X::AbstractMatrix; has_intercept = false)
     # The C value adjusts the check to the relative scale of the variable. The C value is equal to the corrected sum of squares for the variable, unless the corrected sum of squares is 0, in which case C is 1. If you specify the NOINT option but not the ABSORB statement, PROC GLM uses the uncorrected sum of squares instead. The default value of the SINGULAR= option, 107, might be too small, but this value is necessary in order to handle the high-degree polynomials used in the literature to compare regression routin
     tols = max.(diag(X), 1)
     for j in 1:size(X, 1)
@@ -69,6 +69,9 @@ function invsym!(X::AbstractMatrix)
             end
             X[j,j] = 1 / d
         end
+        if has_intercept && j == 1
+            tols = max.(diag(X), 1)
+        end
     end
     return X
 end
diff --git a/test/Project.toml b/test/Project.toml
@@ -5,6 +5,7 @@ CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
 DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
 FixedEffects = "c8885935-8500-56a7-9867-7708b20db0eb"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+Metal = "dde4c033-4e86-420c-a63e-0dd931031962"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
@@ -13,5 +14,6 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 CategoricalArrays = "0.10"
 CSV = "0.8, 0.9, 0.10"
 CUDA = "1, 2, 3, 4"
+Metal = "0.5"
 DataFrames = "0.21, 0.22, 1"
-FixedEffects = "2"
+FixedEffects = "2.2"
diff --git a/test/collinearity.jl b/test/collinearity.jl
@@ -30,4 +30,17 @@ using DataFrames, CSV, FixedEffectModels, Random, Statistics, Test
   @test rr.coef[1] ≈ 0.0
   @test isnan(stderror(rr)[1])
 
+
+
+  df = DataFrame(
+      [36.9302  44.5105;
+       39.4935  44.5044;
+       38.946   44.5072;
+       37.8005  44.5098;
+       37.2613  44.5103;
+       35.3885  44.5109;], 
+       :auto
+  )
+  rr = reg(df, @formula(x1 ~ x2))
+  @test all(!isnan, stderror(rr))
 end
diff --git a/test/fit.jl b/test/fit.jl
@@ -1,4 +1,4 @@
-using CUDA, FixedEffectModels, CategoricalArrays, CSV, DataFrames, Test, LinearAlgebra
+using CUDA, Metal, FixedEffectModels, CategoricalArrays, CSV, DataFrames, Test, LinearAlgebra
 using FixedEffectModels: nullloglikelihood_within
 
 
@@ -662,8 +662,11 @@ end
 	@test x.iterations <= 50
 
 	methods_vec = [:cpu]
-	if FixedEffectModels.FixedEffects.has_CUDA()
-		push!(methods_vec, :gpu)
+	if CUDA.functional()
+		push!(methods_vec, :CUDA)
+	end
+	if Metal.functional()
+		push!(methods_vec, :Metal)
 	end
 	for method in methods_vec
 		# same thing with float32 precision
diff --git a/test/predict.jl b/test/predict.jl
@@ -302,8 +302,11 @@ end
 	@test fe(result)[1, :fe_Year] + fe(result)[1, :fe_State] ≈ 158.91798 atol = 1e-4
 
 	methods_vec = [:cpu]
-	if FixedEffectModels.FixedEffects.has_CUDA()
-		push!(methods_vec, :gpu)
+	if CUDA.functional()
+		push!(methods_vec, :CUDA)
+	end
+	if Metal.functional()
+		push!(methods_vec, :Metal)
 	end
 	for method in methods_vec
 		local model = @formula Sales ~ Price + fe(Year)