module kmeans
	implicit none
	 
	contains
	
!***********************************************************************
!* KMPP - K-Means++ - Traditional data clustering with a special initialization
!* Public Domain - This program may be used by any person for any purpose.
!*
!* Origin:
!*    Hugo Steinhaus, 1956
!*
!* Refer to:
!*    "kmeans++: the advantages of careful seeding"
!*    David Arthur and Sergei Vassilvitskii
!*    Proceedings of the eighteenth annual ACM-SIAM symposium 
!*      on Discrete algorithms, 2007
!*
!*____Variable_______I/O_______Description___________________Type_______
!*    X(P,N)         In        Data points                   Real
!*    P              In        Dimension of the data         Integer
!*    N              In        Number of points              Integer
!*    K              In        # clusters                    Integer
!*    C(P,K)         Out       Center points of clusters     Real
!*    Z(N)           Out       What cluster a point is in    Integer
!*    WORK(N)        Neither                                 Real
!*    IFAULT         Out       Error code                    Integer
!************************************************************************
      SUBROUTINE KMPP (X, C, Z, IFAULT)
 
       IMPLICIT NONE
       INTEGER P, N, K, Z(:), IFAULT
	   real		:: x(:,:),c(:,:),work(size(x,2))
 
!               constants
       INTEGER, parameter	:: ITER=1000                 ! maximum iterations
       REAL, parameter		::  BIG=1E100                   ! arbitrary large number
!                local variables
       INTEGER h,i,i1,j,l,l0,l1
 
       REAL best, d2,tot,w
 
       LOGICAL CHANGE             ! whether any points have been reassigned
 
	   p=size(x,1)
	   n=size(x,2)
	   k=size(c,2)
	   
       IFAULT = 0
       IF (K < 1 .OR. K > N) THEN       ! K out of bounds
         IFAULT = 3
         RETURN
       END IF
       DO I = 1, N                       ! clear Z
         Z(I) = 0
       END DO
 
       DO I = 1, N
         WORK(I) = BIG
       END DO
 
       CALL RANDOM_NUMBER (W)
       I1 = MIN(INT(W * FLOAT(N)) + 1, N)  ! choose first center at random
       DO J = 1, P
         C(J,1) = X(J,I1)
       END DO
 
       DO L = 2, K                    ! initialize other centers
         TOT = 0.
         DO I = 1, N                     ! measure from each point
           BEST = WORK(I)
           D2 = 0.                         ! to prior center
           DO J = 1, P
             D2 = D2 + (X(J,I) - C(J,L-1)) **2  ! Squared Euclidean distance
             IF (D2 .GE. BEST) GO TO 10               ! needless to add to D2
           END DO                          ! next J
           IF (D2 < BEST) BEST = D2          ! shortest squared distance 
           WORK(I) = BEST 
  10       TOT = TOT + BEST             ! cumulative squared distance
         END DO                      ! next data point
 
         CALL RANDOM_NUMBER (W)
         W = W * TOT    ! uniform at random over cumulative distance
         TOT = 0.
         DO I = 1, N
           I1 = I
           TOT = TOT + WORK(I)
           IF (TOT > W) GO TO 20
         END DO                ! next I
  20     CONTINUE
         DO J = 1, P         ! assign center
           C(J,L) = X(J,I1)
         END DO
       END DO               ! next center to initialize
 
       DO H = 1, ITER
         CHANGE = .FALSE.
 
         DO I = 1, N
           L0 = Z(I)
           L1 = 0
           BEST = BIG
           DO L = 1, K
             D2 = 0.
             DO J = 1, P
               D2 = D2 + (X(J,I) - C(J,L)) **2
               IF (D2 .GE. BEST) GO TO 30
             END DO
  30         CONTINUE
             IF (D2 < BEST) THEN           ! new nearest center
               BEST = D2
               L1 = L
             END IF             
           END DO        ! next L
 
           IF (L0 .NE. L1) THEN
             Z(I) = L1                   !  reassign point 
             CHANGE = .TRUE.
           END IF
         END DO         ! next I
         IF (.NOT. CHANGE) RETURN      ! success
 
         DO L = 1, K              ! zero population
           WORK(L) = 0.
         END DO
         DO L = 1, K               ! zero centers
           DO J = 1, P
             C(J,L) = 0.
           END DO
         END DO
 
         DO I = 1, N
           L = Z(I)
           WORK(L) = WORK(L) + 1.             ! count
           DO J = 1, P
             C(J,L) = C(J,L) + X(J,I)         ! add
           END DO
         END DO
 
         DO L = 1, K
           IF (WORK(L) < 0.5) THEN          ! empty cluster check
             IFAULT = 1                     ! fatal error
             RETURN
           END IF
           W = 1. / WORK(L)
           DO J = 1, P
             C(J,L) = C(J,L) * W     ! multiplication is faster than division
           END DO
         END DO
 
       END DO                   ! next H
       IFAULT = 2                ! too many iterations
       RETURN
 
      END  ! of KMPP
 
 end module