因子分析(迭代因子法)代码
#此代码是我手动搭建的
#除KMO和bar检验外未使用因子分析的专用工具包(factor_analyzer)
#此代码仅能解决需要输出两个潜变量的因子分析问题
#如需将此代码放在自己个人社交平台,请给个引用"B站 耿大哥讲算法" 作者:耿大哥讲算法
import numpy as np
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn.preprocessing import normalize
import matplotlib;matplotlib.rc("font", family='Microsoft YaHei')
from factor_analyzer import calculate_kmo,calculate_bartlett_sphericity
#读取数据&校准化
# 指标1 指标2 指标3 指标4指标5 指标6 指标7 指标8
data=np.array([[40.4,24.7,7.20,6.10,8.30,8.70,2.442,20.0], #样本1
[25.0,12.7,11.2,11.0,12.9,20.2,3.542,9.10], #样本2
[13.2,3.30,3.90,4.30,4.40,5.50,0.578,3.60], #样本3
[22.3,6.70,5.60,3.70,6.00,7.40,0.176,7.30], #样本4
[34.3,11.8,7.10,7.10,8.00,8.90,1.726,27.5], #样本5
[35.6,12.5,16.4,16.7,22.8,29.3,3.017,26.6], #样本6
[22.0,7.80,9.90,10.2,12.6,17.6,0.847,10.6], #样本7
[48.4,13.4,10.9,9.90,10.9,13.9,1.772,17.8], #样本8
[40.6,19.1,19.8,19.0,29.7,39.6,2.449,35.8], #样本9
[24.8,8.00,9.80,8.90,11.9,16.2,0.789,13.7], #样本10
[12.5,9.70,4.20,4.20,4.60,6.50,0.874,3.90], #样本11
[1.80,0.60,0.70,0.70,0.80,1.10,0.056,1.00], #样本13
[32.3,13.9,9.40,8.30,9.80,13.3,2.126,17.1], #样本13
[38.5,9.10,11.3,9.50,12.2,16.4,1.327,11.6]])#样本14
# 对样本Z-score标准化
X=preprocessing.scale(data,axis=0)
#KMO和bartlett检验
bartlett,KMO=calculate_bartlett_sphericity(data),calculate_kmo(data)[1]
print('-'*20,'KMO和bartlett检验','-'*19)
print('KMO='+str('%.3f'%calculate_kmo(data)[1]),end='\t')
if KMO<=1 and KMO>=0.9:print('非常好')
elif KMO<0.9 and KMO>=0.8:print('好')
elif KMO<0.8 and KMO>=0.7:print('一般')
elif KMO<0.7 and KMO>=0.6:print('差')
elif KMO<0.6 and KMO>=0.5:print('很差')
else:print('不能接受')
print('bartlett 统计量='+str('%.3f'%bartlett[0]))
print('bartlett 自由度='+str(int(X.shape[1]*(X.shape[1]-1)/2)))
print('bartlett P值='+str('%.3f'%bartlett[1]))
X=preprocessing.scale(X,axis=0)
#求样本协方差矩阵
C=np.dot(X.T,X)/(X.shape[0])
#主因子迭代25次(SPSS默认25次)
m,D,S0=2,np.eye(C.shape[0]),0
list1,list2,F,A=[[1+i/1000] for i in range(X.shape[1])],[1],0,0
for k in range(25):
λ,Q0=np.linalg.eig(C-D);Q0=np.mat(Q0)
Q0=normalize(np.array(Q0),axis=0)
# 特征根和特征向量排序
tezhenggen1=[i for i in λ];tezhenggen1.sort(reverse=True)
Q=np.zeros(shape=(Q0.shape[1],m))
for i in range(Q0.shape[1]):
for j in range(m):Q[i,j]=Q0[i,list(λ).index(tezhenggen1[j])]
#计算载荷矩阵和D
diag=np.diag([tezhenggen1[i]**0.5 for i in range(m)])
A=Q*np.mat(diag)
D=np.diag([(C-A*A.T)[i,i] for i in range(Q0.shape[1])])
list2.append(np.linalg.det(D))
for i in range(X.shape[1]):list1[i].append(D[i,i])
#print('第'+str(k+1)+'次迭代完成!')
S0=((A.T*np.mat(D).I*A).I*A.T*np.mat(D).I).T
#绘制迭代过程中D的行列式和D的主对角线元素的变换曲线
plt.subplot(1,2,1)
for i in range(X.shape[1]):plt.plot(range(len(list1[i])),list1[i])
plt.xlabel('迭代次数');plt.ylabel('各分量的方差')
plt.subplot(1,2,2)
plt.plot(range(len(list2)),list2,'r-')
plt.xlabel('迭代次数');plt.ylabel('D的行列式')
plt.show()
# 对载荷矩阵A施以二维正交旋转(后面的代码与主成分法后面的代码相同)
# 定义载荷间的总方差函数ff(θ)
def ff(a):
B=np.mat(A)*np.mat([[np.cos(a),-np.sin(a)],[np.sin(a),np.cos(a)]])
zongfangcha=0
for k in range(B.shape[1]):
junzhi=sum(B[i,k]**2/sum(A[i,j]**2 for j in range(A.shape[1]))
for i in range(B.shape[0]))/B.shape[0]
zongfangcha+=sum((B[i,k]**2/sum(A[i,j]**2
for j in range(A.shape[1]))-junzhi)**2
for i in range(B.shape[0]))
return zongfangcha
#计算并修正载荷矩阵的旋转解和得分系数矩阵
x=np.arange(0,np.pi/2,0.01);y=[ff(i) for i in x];θ=x[y.index(max(y))]
for i in range(4):
θ+=i*np.pi/2
Q=np.mat([[np.cos(θ),-np.sin(θ)],[np.sin(θ),np.cos(θ)]])
B=A*Q
list0=[abs(B[j,0]) for j in range(B.shape[0])]
list1=[abs(B[j,1]) for j in range(B.shape[0])]
a,b=B[list0.index(max(list0)),0],B[list1.index(max(list1)),1]
if a>0 and b>0:break
else:pass
Q=np.mat([[np.cos(θ),-np.sin(θ)],[np.sin(θ),np.cos(θ)]])
B=A*Q;S=S0*Q
list2=[(B.T*B)[j,j] for j in range(B.shape[1])]
if list2.index(max(list2))==0:pass
else:B=B*np.mat([[0,1],[1,0]]);S=S*np.mat([[0,1],[1,0]])
list2.sort(reverse=True)
# 输出旋转角度θ和载荷间的总方差最大值max(ff)
print('-'*54)
print('旋转角度θ='+str('%.2f'%(θ*180/np.pi))+'°',end='\t')
print('载荷间的总方差='+str('%.3f'%ff(θ)))
# 输出使载荷间的总方差函数达到最大的旋转矩阵Q
print('旋转矩阵Q')
for i in range(Q.shape[0]):
for j in range(Q.shape[1]):print('%.3f'%Q[i,j],end='\t')
print()
print('旋转方法:Kaiser 标准化最大方差法')
# 输出载荷矩阵的旋转解
print('-'*54);print('载荷矩阵的旋转解')
print('指标|组件',end='\t')
for i in range(A.shape[1]):print('潜变量f'+str(i+1),end='\t')
print('共同度')
for i in range(B.shape[0]):
print('指标'+str(i+1),end='\t')
for j in range(B.shape[1]):print('%.3f'%B[i,j],end='\t')
print('%.3f'%sum(B[i,k]**2 for k in range(B.shape[1])),end='\t')
print()
print('总贡献',end='\t')
for i in list2:print('%.3f'%i,end='\t')
print('%.3f'%sum(list2));print('-'*53)
# 输出旋转后的成分得分系数矩阵
print('成分得分系数矩阵(旋转后)')
print('指标|组件',end='\t')
for i in range(A.shape[1]):print('潜变量f'+str(i+1),end='\t')
print()
for i in range(S.shape[0]):
print('指标'+str(i+1),end='\t')
for j in range(S.shape[1]):print('%.3f'%S[i,j],end='\t')
print()
#计算各样本的综合得分并排序
print('-'*54)
print('各样本的得分及排序');print('样本|fi',end='\t')
for i in range(A.shape[1]):print('潜变量f'+str(i+1),end='\t')
print('综合变量f',end='\t');F=X*S
print('按f1排序',end='\t');print('按f2排序',end='\t');print('按f排序')
def xvhao(x):
list3=[i for i in x]
list3.sort(reverse=True)
return [list3.index(i)+1 for i in x]
list4=[i/sum(list2) for i in list2]
list5=[sum(list4[j]*F[i,j] for j in range(F.shape[1])) for i in range(F.shape[0])]
for i in range(F.shape[0]):
print('样本'+str(i+1),end='\t')
for j in range(F.shape[1]):print('%.3f'%F[i,j],end='\t')
print('%.3f'%list5[i],' ',end='\t');print(xvhao(F[:,0])[i],' ',end='\t')
print(xvhao(F[:,1])[i],' ',end='\t');print(xvhao(list5)[i],' ',end='\t')
print()
print('-'*54)
#绘制载荷图(旋转后的空间组件图)
plt.plot(B[:,0],B[:,1],'o');plt.xlabel('潜变量f1');plt.ylabel('潜变量f2')
plt.title('载荷图(旋转后的空间组件图)')
for i in range(B.shape[0]):
plt.text(B[i,0]+0.01,B[i,1]+0.01,'指标'+str(i+1))
plt.show()