module draw_factors
	use basicmodule
	implicit none

	contains
	
	subroutine set_bandxxi(l,band,sm)
		real	:: band(nphi+nld,-nphi+1:capT),sm(-nphi+1:capT)
		integer	:: l
		real	:: V(nphi+nld,nphi+nld),h(nphi+nld),m(nphi+nld),tmp
		integer	:: i,j,t,i1,t1
		band=0; sm=0
!$omp parallel do private(t,h,i,tmp,i1,V,m,t1) reduction(+:sm,band)		
		do j=1,n
			do t=1,capT
				us(t,j)=us(t,j)+sum(loads(:,t,l,j)*us(t:t-nld+1:-1,n+l))
				h(1:nld)=loads(:,t,l,j)
				h(nld+1:)=0
				do i=1,nphi
					h(i+1:i+nld)=h(i+1:i+nld)-phis(i,t,j)*loads(:,t-i,l,j)
				enddo
				do i=1,nphi+nld
					tmp=h(i)/s2s(t,j)
					do i1=i,nphi+nld
						V(i1,i)=h(i1)*tmp
					enddo
				enddo
				m=h*(us(t,j)-sum(phis(:,t,j)*us(t-1:t-nphi-1:-1,j)))/s2s(t,j)
				do i1=0,nphi+nld-1
					t1=t-i1
					if(t1<-nphi+1) cycle
					sm(t1)=sm(t1)+m(i1+1)
					band(i1+1:nphi+nld,t1)=band(i1+1:nphi+nld,t1)+V(nphi+nld:i1+1:-1,i1+1)
				enddo
			enddo
		enddo
	end subroutine
	
	subroutine set_bandSigi(l,bsigi,s2s)
		integer	:: l
		real	:: bsigi(nphi+1,-nphi+1:capT),s2s(0:capT)
		real	:: tmp(1+nphi,-nphi+1:capT),tmp2(nphi,nphi)
		integer	:: t,i,i2
		do t=-nphi+1,0
			tmp(1:-t+1,t)=acholini(t+nphi:,t+nphi,n+l)
			do i=-t+1,nphi
				tmp(i+1,t)=-phis(i,t+i,n+l)
			enddo
		enddo
		
		do t=1,capT-nphi
			tmp(1,t)=1
			do i=1,nphi
				if(t+i>capT) cycle
				tmp(i+1,t)=-phis(i,t+i,n+l)
			enddo
		enddo

		do t=capT-nphi+1,capT
			tmp(1,t)=1
			do i=1,capT-t
				tmp(i+1,t)=-phis(i,t+i,n+l)
			enddo
		enddo
		do t=-nphi+1,0
			do i=2-t,nphi+1
				bsigi(i,t)=0
				do i2=1,i
					bsigi(i,t)=bsigi(i,t)+tmp(i2,t)*tmp(nphi+2-i+i2-1,t-nphi-1+i)/s2s(max(t+i2-1,0))
				enddo
			enddo
		enddo
		do t=1,capT-nphi
			do i=1,nphi+1
				bsigi(i,t)=sum(tmp(1:i,t)*tmp(nphi+2-i:,t-nphi-1+i)/s2s(t:t+i-1))
			enddo
		enddo
		do t=capT-nphi+1,capT
			do i=1,nphi+1
				i2=min(capT-t+1,i)
				bsigi(i,t)=sum(tmp(1:i2,t)*tmp(nphi+2-i:nphi+2-i+i2-1,t-nphi-1+i)/s2s(t:t+i2-1))
			enddo
		enddo
	end subroutine
	
	subroutine set_fs_fast(coef,s2s,bxxi,sm,ll,m)
		use lapack95, only: pbtrf,gbtrs,pbtrs
		real	:: coef(0:ntcoefs),s2s(0:capT),bxxi(nphi+nld,-nphi+1:capT),sm(-nphi+1:capT),ll,m(-nphi+1:capT)
		real	:: bi(nphi+nld,-nphi+1:capT),bsigi(nphi+1,-nphi+1:capT),mu(-nphi+1:capT)

		call set_bandSigi(1,bsigi,s2s)
		bi=bxxi
		bi(nld:nphi+nld,:)=bi(nld:nphi+nld,:)+bsigi
		call pbtrf(bi)
		mu=sm
		call pbtrs(bi,mu)
		call pbtrf(bsigi)
		ll=+.5*sum(sm*mu)+sum(log(bsigi(nphi+1,:)))-sum(log(bi(nphi+nld,:)))
		ll=ll-.5*(coef(0)-s2fac_prior(1)-tgs2)**2/s2fac_prior(2)
		ll=ll-.5*sum((coef(1:)-s2coefs(oup:,0))**2)/s2coefs_var(oup)

		call rnnoa(m)
		call gbtrs(bi,m,ipivot(1:capT+nphi),kl=0)
		m=m+mu
	end subroutine
	
	subroutine draw_fs_fast
		integer	::  l
		real	:: bxxi(nphi+nld,-nphi+1:capT)
		real	:: sm(-nphi+1:capT),ms(-nphi+1:capT,2)
		real	:: new_coef(0:ntcoefs,2), new_s2s(0:capT,2)
		real	:: ll(2),u(1),z(0:ntcoefs)
		integer	:: j,t,i
		do l=1,nfac
			call set_bandxxi(l,bxxi,sm)
			if(imh==0)  then
				call rnnoa(z)
				z=z*mhscales(0,ofs_fast)
			else
				z=0
				if(imh==1) then
					call rnnoa(z(0:0))
				else
					call rnnoa(z(1:))
				endif
			endif

			new_coef(:,1)=s2coefs(ou0:,n+l)
			new_coef(0,1)=new_coef(0,1)+mhscales(1,ofs_fast)*sqrt(s2fac_prior(2)/capT)*z(0)
			new_coef(1:,1)=new_coef(1:,1)+mhscales(2,ofs_fast)*sqrt(s2coefs_var(oup)/capT)*z(1:)
			new_coef(:,2)=s2coefs(ou0:,n+l)
			new_s2s(1:,1)=s2bs(:,n+l)*exp(matmul(transpose(cost),new_coef(:,1)))
			new_s2s(0,1)=new_s2s(1,1)/s2bs(1,n+l)
			new_s2s(:,2)=s2s(:,n+l)
!$omp parallel do 			
			do i=1,2
				call set_fs_fast(new_coef(:,i),new_s2s(:,i),bxxi,sm,ll(i),ms(:,i))
			enddo
			
			call rnun(u)
			i=merge(1,2,u(1)<exp(ll(1)-ll(2)))
			us(:,n+l)=ms(:,i)
			if(i==1) then
				s2s(:,n+l)=new_s2s(:,1)
				s2coefs(ou0:,n+l)=new_coef(:,1)
				acc(imh,ofs_fast)=acc(imh,ofs_fast)+1
			endif
			call sub_fac(l)
		enddo

	end subroutine
	
	subroutine sub_fac(l)
		integer	:: l,j,t
		do j=1,n
			do t=1,capT
				us(t,j)=us(t,j)-sum(loads(:,t,l,j)*us(t:t-nld+1:-1,n+l))
			enddo
		enddo
	end subroutine 
	
end module
