@@ -215,7 +215,7 @@ function StatsAPI.fit(::Type{FixedEffectModel},
215215 end
216216
217217 # collect all variable names (outcome, exo [, endo, iv])
218- var_names_all = [ response_name; coef_names]
218+ var_names_all = vcat ( response_name, coef_names)
219219
220220 if has_iv
221221 subdf = Tables. columntable ((; (x => disallowmissing (view (df[! , x], esample)) for x in endo_vars). .. ))
@@ -281,7 +281,9 @@ function StatsAPI.fit(::Type{FixedEffectModel},
281281 if i == 1
282282 @info " Dependent variable $(var_names_all[1 ]) is probably perfectly explained by fixed effects (tol = $collinear_tol )."
283283 else
284- @info " RHS-variable $(var_names_all[i]) is probably collinear with the fixed effects (tol = $collinear_tol )."
284+ @info " RHS-variable $(var_names_all[i]) is collinear with the fixed effects (tol = $collinear_tol )."
285+ # set to zero so that removed when taking basis
286+ cols[i] .= 0.0
285287 end
286288 end
287289
@@ -305,86 +307,114 @@ function StatsAPI.fit(::Type{FixedEffectModel},
305307 end
306308 end
307309
308-
309-
310-
311310 # #############################################################################
312311 # #
313312 # # Get Linearly Independent Components of Matrix
314313 # #
315314 # #############################################################################
316315 # Compute linearly independent columns + create the Xhat matrix
317316 if has_iv
318- n_exo = size (Xexo, 2 )
319- n_endo = size (Xendo, 2 )
320- n_z = size (Z, 2 )
321- perm = 1 : (n_exo + n_endo)
322-
323- # first pass: remove colinear variables in Xendo
324- notcollinear_fe_endo = collinear_fe[(n_exo+ 2 ): (n_exo+ n_endo+ 1 )] .== false
325- basis_endo = basis (Xendo; has_intercept = false ) .* notcollinear_fe_endo
326- Xendo = getcols (Xendo, basis_endo)
327-
328- # second pass: remove colinear variable in Xexo, Z, and Xendo
329- notcollinear_fe_exo = collinear_fe[2 : (n_exo+ 1 )] .== false
330- notcollinear_fe_z = collinear_fe[(n_exo+ n_endo+ 2 ): (n_exo+ n_endo+ n_z+ 1 )] .== false
331- notcollinear_fe_endo_small = notcollinear_fe_endo[basis_endo]
332-
333-
334- basis_all = basis (Xexo, Z, Xendo; has_intercept = has_intercept)
335- basis_Xexo = basis_all[1 : size (Xexo, 2 )] .* notcollinear_fe_exo
336- basis_Z = basis_all[(size (Xexo, 2 ) + 1 ): (size (Xexo, 2 ) + size (Z, 2 ))] .* notcollinear_fe_z
337- basis_endo_small = basis_all[(size (Xexo, 2 ) + size (Z, 2 ) + 1 ): end ] .* notcollinear_fe_endo_small
317+ perm = 1 : (size (Xexo, 2 ) + size (Xendo, 2 ))
318+
319+ # first pass: remove collinear variables in Xendo
320+ XendoXendo = Xendo' * Xendo
321+ basis_endo = basis! (Symmetric (deepcopy (XendoXendo)); has_intercept = false )
322+ if ! all (basis_endo)
323+ Xendo = Xendo[:, basis_endo]
324+ XendoXendo = XendoXendo[basis_endo, basis_endo]
325+ end
338326
327+ # second pass: remove collinear variable in Xexo, Z, and Xendo
328+ XexoXexo = Xexo' Xexo
329+ XexoZ = Xexo' Z
330+ XexoXendo = Xexo' Xendo
331+ ZZ = Z' Z
332+ ZXendo = Z' Xendo
333+ XexoZXendo = Symmetric (hvcat (3 , XexoXexo, XexoZ, XexoXendo,
334+ zeros (size (Z, 2 ), size (Xexo, 2 )), ZZ, ZXendo,
335+ zeros (size (Xendo, 2 ), size (Xexo, 2 )), zeros (size (Xendo, 2 ), size (Z, 2 )), XendoXendo))
336+ basis_all = basis! (XexoZXendo; has_intercept = has_intercept)
337+ basis_Xexo, basis_Z, basis_endo_small = basis_all[1 : size (Xexo, 2 )], basis_all[(size (Xexo, 2 ) + 1 ): (size (Xexo, 2 ) + size (Z, 2 ))], basis_all[(size (Xexo, 2 ) + size (Z, 2 ) + 1 ): end ]
338+ # basis_endo_small has same length as number of basis_endo who are true
339339 if ! all (basis_endo_small)
340340 # if adding Xexo and Z makes Xendo collinear, consider these variables are exogeneous
341- Xexo = hcat (Xexo, getcols (Xendo, .! basis_endo_small))
342- Xendo = getcols (Xendo, basis_endo_small)
341+ Xexo = hcat (Xexo, Xendo[:, .! basis_endo_small])
342+ Xendo = Xendo[:, basis_endo_small]
343+ XexoXexo = Xexo' Xexo
344+ XexoZ = Xexo' Z
345+ XexoXendo = Xexo' Xendo
346+ ZXendo = Z' Xendo
347+ XendoXendo = Xendo' Xendo
343348
344349 # out returns false for endo collinear with instruments
345350 basis_endo2 = trues (length (basis_endo))
346351 basis_endo2[basis_endo] = basis_endo_small
347-
348- # Change coef_names and oldX
349352 # TODO : I should probably also change formula in this case so that predict still works
350353 ans = 1 : length (basis_endo)
351354 ans = vcat (ans[.! basis_endo2], ans[basis_endo2])
352355 perm = vcat (1 : length (basis_Xexo), length (basis_Xexo) .+ ans)
353-
356+ # there are basis_endo - basis_endo_small in endo
354357 out = join (coefendo_names[.! basis_endo2], " " )
355358 @info " Endogenous vars collinear with ivs. Recategorized as exogenous: $(out) "
356-
359+
357360 # third pass
358- basis_all = basis (Xexo, Z, Xendo; has_intercept = has_intercept)
359- basis_Xexo = basis_all[1 : size (Xexo, 2 )]
360- basis_Z = basis_all[(size (Xexo, 2 ) + 1 ): (size (Xexo, 2 ) + size (Z, 2 ))]
361+ XexoZXendo = Symmetric (hvcat (3 , XexoXexo, XexoZ, XexoXendo,
362+ zeros (size (Z, 2 ), size (Xexo, 2 )), ZZ, ZXendo,
363+ zeros (size (Xendo, 2 ), size (Xexo, 2 )), zeros (size (Xendo, 2 ), size (Z, 2 )), XendoXendo))
364+ basis_all = basis! (XexoZXendo; has_intercept = has_intercept)
365+ basis_Xexo, basis_Z, basis_endo_small2 = basis_all[1 : size (Xexo, 2 )], basis_all[(size (Xexo, 2 ) + 1 ): (size (Xexo, 2 ) + size (Z, 2 ))], basis_all[(size (Xexo, 2 ) + size (Z, 2 ) + 1 ): end ]
361366 end
362-
363- Xexo = getcols (Xexo, basis_Xexo)
364- Z = getcols (Z, basis_Z)
365- size (Z, 2 ) >= size (Xendo, 2 ) || throw (" Model not identified. There must be at least as many ivs as endogeneous variables" )
366- basis_coef = vcat (basis_Xexo, basis_endo[basis_endo_small])
367+ if ! all (basis_Xexo)
368+ Xexo = Xexo[:, basis_Xexo]
369+ XexoXexo = XexoXexo[basis_Xexo, basis_Xexo]
370+ XexoXendo = XexoXendo[basis_Xexo, :]
371+ end
372+ if ! all (basis_Z)
373+ Z = Z[:, basis_Z]
374+ ZZ = ZZ[basis_Z, basis_Z]
375+ ZXendo = ZXendo[basis_Z, :]
376+ end
377+ XexoZ = XexoZ[basis_Xexo, basis_Z]
378+ size (ZXendo, 1 ) >= size (ZXendo, 2 ) || throw (" Model not identified. There must be at least as many ivs as endogeneous variables" )
379+ # basis_endo is true for stuff non colinear
380+ # I need to have same vector but removeing the true that have been reclassified as exo and replace them by nothing. so i need to create a vector equal to false if non endo and non basis_endo_small, which is basis_endo2
381+ basis_endo2 = trues (length (basis_endo))
382+ basis_endo2[basis_endo] = basis_endo_small
383+ basis_coef = vcat (basis_Xexo, basis_endo[basis_endo2])
367384
368385 # Build
369386 newZ = hcat (Xexo, Z)
370- Pi = ls_solve (newZ, Xendo)
371- Xhat = hcat (Xexo, newZ * Pi)
387+ # now create Pi = newZ \ Xendo
388+ newZnewZ = hvcat (2 , XexoXexo, XexoZ,
389+ XexoZ' , ZZ)
390+ newZXendo = vcat (XexoXendo, ZXendo)
391+ Pi = ls_solve! (Symmetric (hvcat (2 , newZnewZ, newZXendo,
392+ zeros (size (newZXendo' )), zeros (size (Xendo, 2 ), size (Xendo, 2 )))),
393+ size (newZnewZ, 2 ))
394+ newnewZ = newZ * Pi
395+ Xhat = hcat (Xexo, newnewZ)
396+ XhatXhat = Symmetric (hvcat (2 , XexoXexo, Xexo' newnewZ,
397+ zeros (size (newnewZ, 2 ), size (Xexo, 2 )), newnewZ' newnewZ))
372398 X = hcat (Xexo, Xendo)
373-
374399 # prepare residuals used for first stage F statistic
375400 # # partial out Xendo in place wrt (Xexo, Z)
376401 Xendo_res = BLAS. gemm! (' N' , ' N' , - 1.0 , newZ, Pi, 1.0 , Xendo)
377402 # # partial out Z in place wrt Xexo
378- Pi2 = ls_solve (Xexo, Z)
403+ # Now create Pi2 = Xexo \ Z
404+ Pi2 = ls_solve! (Symmetric (hvcat (2 , XexoXexo, XexoZ,
405+ zeros (size (Z, 2 ), size (Xexo, 2 )), ZZ)), size (Xexo, 2 ))
379406 Z_res = BLAS. gemm! (' N' , ' N' , - 1.0 , Xexo, Pi2, 1.0 , Z)
380407 else
381408 # get linearly independent columns
382- n_exo = size (Xexo, 2 )
383- perm = 1 : n_exo
384- notcollinear_fe_exo = collinear_fe[2 : (n_exo+ 1 )] .== false
385- basis_Xexo = basis (Xexo; has_intercept = has_intercept) .* notcollinear_fe_exo
386- Xexo = getcols (Xexo, basis_Xexo)
409+ perm = 1 : size (Xexo, 2 )
410+ XexoXexo = Xexo' Xexo
411+ basis_Xexo = basis! (Symmetric (deepcopy (XexoXexo)); has_intercept = has_intercept)
412+ if ! all (basis_Xexo)
413+ Xexo = Xexo[:, basis_Xexo]
414+ XexoXexo = XexoXexo[basis_Xexo, basis_Xexo]
415+ end
387416 Xhat = Xexo
417+ XhatXhat = Symmetric (XexoXexo)
388418 X = Xexo
389419 basis_coef = basis_Xexo
390420 end
@@ -394,11 +424,10 @@ function StatsAPI.fit(::Type{FixedEffectModel},
394424 # # Do the regression
395425 # #
396426 # #############################################################################
397-
398- crossx = Xhat' * Xhat
399- Xy = Symmetric (hvcat (2 , crossx, Xhat' y, zeros (size (Xhat, 2 ))' , [0.0 ]))
427+ Xy = Symmetric (hvcat (2 , XhatXhat, Xhat' y,
428+ zeros (size (Xhat, 2 ))' , [0.0 ]))
400429 invsym! (Xy; diagonal = 1 : size (Xhat, 2 ))
401- invcrossx = Symmetric (.- Xy[1 : (end - 1 ),1 : (end - 1 )])
430+ invXhatXhat = Symmetric (.- Xy[1 : (end - 1 ),1 : (end - 1 )])
402431 coef = Xy[1 : (end - 1 ),end ]
403432
404433 # #############################################################################
@@ -419,7 +448,10 @@ function StatsAPI.fit(::Type{FixedEffectModel},
419448
420449 augmentdf = DataFrame ()
421450 if save_fe
422- oldX = getcols (oldX[:, perm], basis_coef)
451+ oldX = oldX[:, perm]
452+ if ! all (basis_coef)
453+ oldX = oldX[:, basis_coef]
454+ end
423455 newfes, b, c = solve_coefficients! (oldy - oldX * coef, feM; tol = tol, maxiter = maxiter)
424456 for fekey in fekeys
425457 augmentdf[! , fekey] = df[:, fekey]
@@ -457,7 +489,7 @@ function StatsAPI.fit(::Type{FixedEffectModel},
457489 end
458490
459491 # Compute standard error
460- vcov_data = Vcov. VcovData (Xhat, crossx, invcrossx , residuals, nobs - size (X, 2 ) - dof_fes)
492+ vcov_data = Vcov. VcovData (Xhat, XhatXhat, invXhatXhat , residuals, nobs - size (X, 2 ) - dof_fes)
461493 matrix_vcov = StatsAPI. vcov (vcov_data, vcov_method)
462494 # Compute Fstat
463495 F = Fstat (coef, matrix_vcov, has_intercept)
0 commit comments