Predicted versus measured outputs for the validation dataset. Top: PCR using \textit {four} principal components. Bottom: PLSR using \textit {two} latent variables. Left: first output. Right: second output.
importnumpyasnpimportmatplotlib.pyplotaspltfromscipy.linalgimportsvdn=100p=2q=5#np.random.seed(1)np.random.seed(0)# True parametersB=np.vstack((np.zeros((3,2)),[-1,1],[0,0]))# Generate dataX=np.random.rand(n,q-1)xq=X@np.ones((q-1,1))/np.sqrt(q-1)+0.01*np.random.rand(n,1)X=np.hstack((X,xq))Y=X@B+0.1*np.random.rand(n,p)# Cross-validation dataX2=np.random.rand(n,q)Y2=X2@B+0.1*np.random.rand(n,p)Xdat=np.vstack((X,X2))Ydat=np.vstack((Y,Y2))# Save the data in low precisionwithopen('pca_pls_data.dat','w')asmyfile:myfile.write('# x1 x2 x3 x4 x5 y1 y2\n')foriinrange(2*n):line=' '.join(f'{val:8.3f}'forvalinnp.hstack((Xdat[i],Ydat[i])))myfile.write(f'{line}\n')# Reload data to ensure consistencyData=np.loadtxt('pca_pls_data.dat',skiprows=1)X,Y=Data[:n,:q],Data[:n,q:q+p]X2,Y2=Data[n:,:q],Data[n:,q:q+p]# Full least squaresBls=np.linalg.inv(X.T@X)@X.T@Y# PCR or SVDU,S,Vt=svd(X,full_matrices=False)V=Vt.TBsvd=[np.zeros((q,p))for_inrange(q)]Ysvd=[np.zeros((n,p))for_inrange(q)]errsvd=np.zeros(q)forlinrange(q):Ul,Sl,Vl=U[:,:l+1],np.diag(S[:l+1]),V[:,:l+1]Bsvd[l]=Vl@np.linalg.inv(Sl)@Ul.T@YYsvd[l]=X@Bsvd[l]errsvd[l]=np.linalg.norm(Y-Ysvd[l])# PLS algorithmU,T,P,Q=[],[],[],[]E,F=X.copy(),Y.copy()Bpls=[np.zeros((q,p))for_inrange(q)]Ypls=[np.zeros((n,p))for_inrange(q)]errpls=np.zeros(q)foriinrange(q):UU,SS,VVt=svd(E.T@F,full_matrices=False)u,v=UU[:,0:1],VVt.T[:,0:1]t=E@ut=t/np.linalg.norm(t)pp=E.T@tqq=F.T@tE=E-t@pp.TF=F-t@qq.TU.append(u)T.append(t)P.append(pp)Q.append(qq)R=np.hstack(U)@np.linalg.inv(np.hstack(P).T@np.hstack(U))Bpls[i]=R@np.hstack(Q).TYpls[i]=X@Bpls[i]errpls[i]=np.linalg.norm(Y-Ypls[i])plt.figure()plt.plot(range(q),errsvd,'-x')plt.plot(range(q),errpls,'-o')# Save results in correct format for gnuplottab1=np.column_stack((np.arange(1,q+1),errsvd))tab2=np.column_stack((np.arange(1,q+1),errpls))tab3=np.hstack((Y,np.hstack(Ysvd)))tab4=np.hstack((Y,np.hstack(Ypls)))tab5=np.column_stack((np.arange(1,q+1),[np.linalg.norm(Y2-X2@Bsvd[i])foriinrange(q)]))tab6=np.column_stack((np.arange(1,q+1),[np.linalg.norm(Y2-X2@Bpls[i])foriinrange(q)]))tab7=np.hstack((Y2,np.hstack([X2@Bsvd[i]foriinrange(q)])))tab8=np.hstack((Y2,np.hstack([X2@Bpls[i]foriinrange(q)])))# Concatenate tables into a single filewithopen('pca_pls.dat','w')asf:f.write("# tab1 (errors for SVD)\n")np.savetxt(f,tab1,fmt='%f')f.write("\n\n")f.write("# tab2 (errors for PLS)\n")np.savetxt(f,tab2,fmt='%f')f.write("\n\n")f.write("# tab3 (Y and Ysvd)\n")np.savetxt(f,tab3,fmt='%f')f.write("\n\n")f.write("# tab4 (Y and Ypls)\n")np.savetxt(f,tab4,fmt='%f')f.write("\n\n")f.write("# tab5 (cross-validation errors for SVD)\n")np.savetxt(f,tab5,fmt='%f')f.write("\n\n")f.write("# tab6 (cross-validation errors for PLS)\n")np.savetxt(f,tab6,fmt='%f')f.write("\n\n")f.write("# tab7 (cross-validation Y2 and Y2svd)\n")np.savetxt(f,tab7,fmt='%f')f.write("\n\n")f.write("# tab8 (cross-validation Y2 and Y2pls)\n")np.savetxt(f,tab8,fmt='%f')