; Classify particles assigned to each reference view
 ;
 ; SOURCE:   spider/docs/techs/recon1/Procs/verify-class-byview.spi
 ;
 ; PURPOSE:  Classify particles assigned to each reference view
 ;
 ; USAGE:    clean ; ./spider spi/dat @verify-class-byview
 ;
 ; REQUIRES:     spider/docs/techs/recon1/Procs/verify-settings.spi
 ;
 ; INPUTS:  (Where *** denotes group,  ### denotes view,   ??? denotes class)
 ;  [params]           ../params                             Parameter document            (one)
 ;  [ref_view_list]    [rec_dir]/sel_proj                    List of projection views      (one)
 ;  [view_list]        [view_dir]/prj###/sel_part_byv        Particle selection by view doc files  (one/view) 
 ;  [filtered_stack]   [view_dir]/prj###/filtavg             Filtered particle image stacks        (one/view 
 ;
 ; OUTPUTS:  
 ;  [class_doc]       [view_dir]/prj###/sel_part_byclass_??? Particle selection by class (one/class/view) 
 ;  [class_avg]       [view_dir]/prj###/classavg???          Class-average files         (one/class/view)
 ;  [class_var]       [view_dir]/prj###/classvar???          OPTIONAL variance file for each class (one/class/view)
 ;  [eigen_img]       [view_dir]/prj###/eigenimg???          Eigenimage template            (one/view)
 ;  [cas_prefix]      [view_dir]/prj###/verify               CA S output file prefix      (several files)
 ;  [eigenvalue_doc]  [view_dir]/prj###/listeigenvalues      List of eigenvalues            (one/view)
 ;  [class_stats_doc] [view_dir]/prj###/listclasses          CCC for each class-average     (one/view)
 ;  [summary_doc]     summary_classify                       Summary doc file               (one)
 ;
 ; ------------------- Parameters -------------------

 [first-view]      = 1    ; First reference-view
 [last-view]       = -1   ; Last reference-view (<1 == all)
 [ca-pca]          = 1    ; Correspondence analysis == 1, PCA == 2 , IPCA == 3
 [parts-per-class] = 75   ; Particle-to-class ratio
 [min-2classes]    = 40   ; Minimum number of particles for 2 classes
 [num-factors]     = 9    ; Number of eigenvalues to use
 [save-eigenimg]   = 0    ; Flag to save eigenimages (1 == save) (reconstituted images commented out) 
 [reduction]       = 2    ; Reduction factor applied to input particles
 [label-dim]       = 115  ; Window size (temporary) for labeling
                          ; (Class number and size unlikely to fit in label after size-reduction.)
                          ; Suggestions: 115 for 3-digit class# + 3-digit class-size, 128 for 3+4 or 4+3
 [user-constant]   = 0    ; Additive constant for correspondence analysis (0 == automatic)
                          ; (CorAn only works on non-negative pixel values.)
                          ; If unsure, check the range of a few particles or the noise-reference for positivity
 [save-variance]   = 0    ; Flag to save variance (1 == save)

 ; --------------- END BATCH HEADER ---------------

 MD
   TR OFF                               ; Decrease results file output
 MD
   VB OFF                               ; Decrease results file output

 ; Set common filenames & parameters
 @verify-settings
 [iter] = 0

 ; Temporary filenames
 [temp_class_map]   = 'tmpclassmap'     ; Temp list of particles by assigned class

 [temp_mask]        = '_6'              ; Mask
 [temp_ref_proj]    = '_5'              ; Reduced reference-projection
 [temp_rec_pos]     = '_91'             ; Positive reconstituted image
 [temp_rec_neg]     = '_93'             ; Negative reconstituted image
 [temp_class_avg]   = '_4'              ; Class average
 [temp_class_var]   = '_7'              ; Class variance
 [temp_exp_avg]     = '_2'              ; Expanded average (to dimension LABEL-DIM)
 [temp_labeled_avg] = '_3'              ; Expanded, labeled average

 SYS
   echo "  Using projections from: [ref_projs] for CCC" ; echo

 ; Get original image dimension
 UD 17, [sp_winsiz]
   [params]
 UD E                                 ; Close doc

 [winsiz] = [sp_winsiz]/[reduction]   ; Reduced image dimension

 ; Calculate mask radius
 [mask-radius] = int([winsiz]/2)

 ; Make circular mask
 MO  
   [temp_mask]
   [winsiz],[winsiz]
   C                    ; _C_ircle
   [mask-radius]

 IF ( [last-view] < 1 ) THEN
   ; Get number of views
   UD N [last-view]
     [ref_view_list]
 ENDIF

 ; Initialize particle-counter
 [total-particles] = 0

 ; Clean up pre-existing file
 DE
   [summary_doc]
   
 ; Label summary file
 SD /       VIEWNUM      NUM_PARTS   MAX_CLASSSIZE  NUM_CLASSES  DAVIESBOULDIN
   [summary_doc]

 SYS
   echo -n "  Beginning classification    "; date ;    echo

 IF ( [save-eigenimg]==1 ) THEN
   SYS
     echo "  Will generate eigenimages" ;    echo
 ENDIF

 ; Loop through reference views
 DO [view-key] = [first-view],[last-view] ; Loop through ref views ---------------

    ; Get reference-view #
    UD IC [view-key], [view]
      [ref_view_list]

    DE
      [class_stats_doc][view]_unsort

    IQ FI [exists]
      [view_list][view]
    IF ( [exists] == 0 ) THEN
      [num-classes]    = 0
      [max-class-size] = 0
      GOTO LB19
    ENDIF

    ; Get number of particles in current reference view
    UD N [view-parts]
      [view_list][view]

    ; Initialize registers
    [max-class-size] = 0         ; Maximum class size
    [min-img]        = 0         ; Image with minimum pixel value

    ; Skip unpopulated views
    IF ( [view-parts] == 0 ) THEN
      [num-classes]    = 0
      [max-class-size] = 0
      GOTO LB19
    ENDIF

    ; Clean up
    [class] = 1
    DE A                           ; Delete file series
      [class_doc][view][class]     ; First deleted file
    DE A
      [class_avg][view][class]
    DE A
      [class_var][view][class]
    DE
      [eigenvalue_doc][view]

    ; Shrink reference-projection
    DC S
      [ref_projs]@{****[view]}        ; Normalized reference-projection file   (input)
      [temp_ref_proj]                ; Reduced reference-projection file     (output)
      [reduction],[reduction]

    ; Determine number of classes to use
    [num-classes] = int(([view-parts]/[parts-per-class]) + 0.5)

    ; Trap for tiny classes
    IF ( [num-classes] < 2 ) THEN
      IF ( [view-parts] >= [min-2classes]) THEN 
        ; Force two classes if greater than specified threshold
        [num-classes] = 2
      ELSE
        [num-classes] = 1  ; Minimum number of classes will be 1
        [class]   = 1                     ; Dummy variable: class #

        ; Sort particles by cross-correlation coefficient
        DOC REN  
          [view_list][view]                ; Selection doc file (unsorted)    (input)
          [class_doc][view][class]_noccc   ; Class doc file, sorted by CCC   (output)
;;          [class_doc][view][class]_unsort  ; Class doc file, sorted by CCC   (output)

        SYS
          echo "  View: {%I4%[view]}  Has: {%I3%[num-classes]} class  "

        [class-parts] = [view-parts]

        GOTO LB3            ; Skip past correspondence analysis + classification
      ENDIF
    ENDIF

    ; If CA selected:
    IF ( [ca-pca] == 1 ) THEN
        ; Check for small pixel intensities

        ; Initialize minimum
        [overall-min] = 999999

        ; Loop through particles
        DO [part-key5] = 1,[view-parts]  ; Loop through particles --------
          ; Get particle number
          UD IC [part-key5],[view-slice]
            [view_list][view]

          ; Get image min
          FS
            [filtered_stack][view]@******[view-slice]
          FI H [img-min]
            [filtered_stack][view]@******[view-slice]
            FMIN                            ; Header location for min

          ; Update if necessary
          IF ( [img-min] < [overall-min] ) THEN
            [overall-min] = [img-min]
            [min-img]     = [view-slice]
          ENDIF
        ENDDO                               ; End particle-loop --------------------------

        UD ICE                              ; Close document
          [view_list][view]

        ; If additive constant set to automatic, set it
        IF ( [overall-min] < 0.05 ) THEN
          IF ( [user-constant] == 0 ) [add-constant] = 0.05 - [overall-min]
        ELSE
          [add-constant] = [user-constant]
        ENDIF

        SYS
          echo "  View: {%i4%[view]}, Minimum: {%f10.3%[overall-min]} at image: {******[min-img]}, Add constant: {%f11.4%[add-constant]}"
.
    ENDIF

    ; RUN MULTIVARIATE STATISICAL ANALYSIS

    IF ( [ca-pca] == 1 ) THEN
      ; Run correspondence analysis
      CA S
        [filtered_stack][view]@******
        [view_list][view] 
        [temp_mask]          ; Mask
        [num-factors]        ; Number of eigenvalues
        C                    ; Correspondence analysis
        [add-constant]
        [cas_prefix][view]   ; File prefix    (output)
    ENDIF

    IF ( [ca-pca] == 2) THEN
      ; Run iterative principle component analysis
      CA S
        [filtered_stack][view]@******
        [view_list][view] 
        [temp_mask]          ; Mask
        [num-factors]        ; Number of eigenvalues
        P                    ; Principle component analysis
        [cas_prefix][view]   ; File prefix    (output)
    ENDIF

    IF ( [ca-pca] == 3 ) THEN
      ; Run principle component analysis
      CA S
        [filtered_stack][view]@******
        [view_list][view]             ; Files                     (input)
        [temp_mask]                   ; Mask
        [num-factors]                 ; Number of eigenvalues
        I                             ; Iterative PCA
        [cas_prefix][view]            ; File prefix              (output)
    ENDIF


    ; Run K-means classification
    CL KM [km-bet],[km-win],[km-coleman],[km-harabasz],[km-db]
      [cas_prefix][view]_IMC         ; File                      (input)
      [num-classes]                  ; Number of classes
      1-[num-factors]                ; Factors to use
      0                              ; No factor weighting
      0                              ; No random seed
      [class_doc][view]_noccc        ; Temp class-list doc file template (output)
      [temp_class_map]               ; Unused class membership file      (output)

    IF ( [save-eigenimg] >= 1 ) THEN
      ; If eigenimage-flag is 1, then save

      ; GENERATE EIGENIMAGES (OPTIONALLY)

 ;     ; Calculate dimensions for reconstituted images
 ;     [double-idim] = [winsiz]*2  ; Double image-dimension
 ;     [idim-plus1]  = [winsiz]+1  ; Image-dimension + 1

      ; Loop through eigenvalues
      DO [factor-num] = 1,[num-factors]  ; Loop through eigenvalues ----------

        ; If (I)PCA, extra question asked when generating eigenimages
        IF ( [ca-pca] .NE. 1 ) THEN
          CA SRE
            [cas_prefix][view]      ; Files         (input)
            N                       ; Subtract average?
            [factor-num]            ; Factor
            [eigen_img][view][factor-num]  ; File   (output)

        ELSE  ; Correspondence analysis

          CA SRE
            [cas_prefix][view]      ; Files         (input)
            [factor-num]
            [eigen_img][view][factor-num] ; File    (output)
        ENDIF

 ;      ; Generate positive reconstituted image
 ;      CA SRA
 ;        [cas_prefix][view]        ; Files         (input)
 ;        [factor-num]
 ;        0.2                       ; Eigenvalue
 ;        [temp_rec_pos]            ; File          (output)
 ;
 ;      ; pad image to twice the height
 ;      PD
 ;        [temp_rec_pos]           ; Positive reconstituted image (input)
 ;        [recon_img][factor-num]
 ;        [winsiz],[double-idim]
 ;        B                        ; Set background to Border
 ;        1,1                      ; Top-left coordinates
 ;
 ;      ; generate negative reconstituted image
 ;      CA SRA
 ;        [cas_prefix][view]       ; Files         (input)
 ;        [factor-num]
 ;        -0.2                     ; Eigenvalue
 ;        [temp_rec_neg]           ; File          (output)
 ;
 ;      ; Insert negative reconstituted image into larger image
 ;      IN
 ;        [temp_rec_neg]           ; Negative reconstituted image   (input)
 ;        [recon_img][factor-num]  ; File    (overwritten)   (input)
 ;        1,[idim-plus1]           ; X-,Y-coords

        SD [factor-num], [factor-num],[factor-num]
          [eigenvalue_doc][view]   ; File          (output)
        ENDDO                      ; End eigenvalue-loop -----------------------

        SD E
          [eigenvalue_doc][view]
    ENDIF

    LB3                        ; Skip to here if only one class

    ; GENERATE CLASS AVERAGES

    ; Loop through classes
    DO [class] = 1,[num-classes]
      ; ADD PARTICLE#, CCC, ETC. TO CLASS DOC

      ; Add info from selection doc
      DOC AND
        [view_list][view]
        [class_doc][view][class]_noccc
        [class_doc][view][class]_unsort
        1                        ; Column # to intersect: view-slice #

;;      LB3                        ; Skip to here if only one class

      ; Sort individual particles by CCC
      DOC SORT
        [class_doc][view][class]_unsort  ; Temp vile  : w/CCC, unsorted     (input)
        [class_doc][view][class]_noccc   ; Class doc file , sorted by CCROT  (output)
        4                                ; Column # to sort: CCROT
        Y                                ; Renumber

      ; Get # of particles in class
      UD N [class-parts]
        [class_doc][view][class]_noccc

      ; If size greater than maximum, then update
      IF ( [class-parts] > [max-class-size]) [max-class-size] = [class-parts]

      SD /       VIEW_WIN    GLOBAL_NUM     GRP_WIN         CCROT        MIRROR       GRP_NUM        VIEW
        [class_doc][view][class]       ; File             (output)
      SD E
        [class_doc][view][class]       ; File             (closed)

      [ccc-sum] = 0                   ; Initialize cumulative CCC-sum

      ; Loop through particles
      DO [part-key6] = 1,[class-parts]

        ; Read view-slice#, other parameters, from selection file
        UD IC [part-key6], [view-slice],[global-part],[grp-slice],[ccrot],[mirror],[group-num],[view]
          [class_doc][view][class]_noccc             ; File             (input)

        ; Calculate CCC
        CC C [ccc]
          [temp_ref_proj]                            ; Reduced reference-projection     (input)
          [filtered_stack][view]@******[view-slice]  ; Particle         (input)
          [temp_mask]                                ; Reduced mask     (input)

        [ccc-sum] = [ccc-sum] + [ccc]

        ; Write to doc
        SD [part-key6], [view-slice],[global-part],[ccc],[ccrot],[mirror],[group-num],[grp-slice]
          [class_doc][view][class]      ; File             (output)
      ENDDO

      ; Clean up
      UD ICE
        [class_doc][view][class]_noccc  ; File             (closed)
      SD E
        [class_doc][view][class]        ; File             (closed)

      ; Calculate unlabeled average
      AS R
        [filtered_stack][view]@******
        [class_doc][view][class]     ; Class-list doc file      (input)
        A                            ; All images
        [temp_class_avg]             ; Class average file       (output)
        [temp_class_var]             ; Cclass variance file     (output)

      ; LABEL AVERAGES

      ; Expand to fit text label, if necessary
      IP
        [temp_class_avg]            ; Class-average file            (input)
        [temp_exp_avg]              ; Expanded class-average file   (output)
        [label-dim],[label-dim]

      ; Get class size
      UD N [class-size]
        [class_doc][view][class]

      [total-particles] = [total-particles] + [class-size]

      ; Label with class number & size
      LA B
        [temp_exp_avg]             ; Expanded class-average file           (input)
        [temp_labeled_avg]         ; Expanded, labeled class-average file  (output)
        {***[class]},n={***[class-size]}

      [label-ydim] = ([label-dim]+36)*[winsiz]/[label-dim]     ; Label is 36 pixels tall

      ; Shrink back down
      IP
        [temp_labeled_avg]         ; Expanded, labeled class-average file  (input)
        [class_avg][view][class]   ; File              (output)
        [winsiz],[label-ydim]

      ; Delete variance if not needed
      IF ( [save-variance] == 1 ) THEN
        CP
          [temp_class_var]            ; File              (input)
          [class_var][view][class]    ; File              (output)
      ENDIF

      ; Calculate avg CCC
      [ccc-avg] = [ccc-sum]/[class-parts]

      ; Get standard deviation of the variance
      FS M [var-max],[var-min],[var-avg],[var-sd]
        [temp_class_var]                   ; File              (input)
        [temp_mask]                        ; File reduced mask (input)

      ; Trap for variance of a single image
      IF ( [class-size] == 1 ) [var-sd]=999
      ; Otherwise, the variance of a single image would be NaN

      ; Write CCC and variance-SD to doc file
      SD [class], [class],[ccc-avg],[var-sd]
        [class_stats_doc][view]_unsort      ; File    (output)

      ; Clean up intermediate doc files
      DE
        [class_doc][view][class]_noccc      ; File    (removed)
      DE
        [class_doc][view][class]_unsort     ; File    (removed)
    ENDDO                  ; End class-loop -------------------------------


    SYS
      echo "  View: {%I4%[view]}  Classes: {%I3%[num-classes]}  Max class size: {%I3%[max-class-size]}"     

    ; Close document
    SD E
      [class_stats_doc][view]_unsort  ; File    (output)

    DE
      [class_stats_doc][view]         ; File    (removed)
    SD /      CLASSNUM       CCC        VARIANCE_SD
      [class_stats_doc][view]         ; File    (closed)
    SD E
      [class_stats_doc][view]         ; File    (closed)

     ; Sort by CCC
    DOC SORT A
      [class_stats_doc][view]_unsort  ; File    (input)
      [class_stats_doc][view]         ; File    (output)
      2                               ; Column for CCC
      Y                               ; Renumber 

    DE
      [class_stats_doc][view]_unsort  ; File    (removed)

    ; Remove class-map doc containing list of particles by assigned class
    DE 
      [temp_class_map]                ; File    (removed)

    SYS M
      rm -f [cas_prefix][view]_SEQ.$DATEXT ;
      rm -f [cas_prefix][view]_SET.$DATEXT ;
      rm -f [cas_prefix][view]_PIX.$DATEXT ;
      rm -f [cas_prefix][view]_MAS.$DATEXT ;
.
    LB19  ; Skip to here if view unpopulated

    SD [view-key], [view],[view-parts],[max-class-size],[num-classes],[km-db]
      [summary_doc]
 ENDDO                         ; End reference-view loop --------------------

 SD / TOTAL_PARTS
   [summary_doc]               ; File   (output)
 [dummy] = -[last-view]
 SD [dummy], [total-particles]
   [summary_doc]               ; File   (output)

 ; Close docs
 UD ICE
   [ref_view_list]             ; File   (closed)
 SD E
   [summary_doc]               ; File   (closed)

 SYS
   echo ; echo -n "  Done -- classified: {%I0%[total-particles]} particles    "; date;    echo 

 EN

 ; Modified 2016-03-23
 ;   TO DO: write unsorted class doc in core
 ;   2016-03-23 (agl)       -- Filenames altered, echo formatting altered
 ;   2013-11-27 (trs)       -- Bug fix with new syntax when view has single class
 ;   2013-10-23 (agl)       -- New file names, modernized syntax & cosmetic
 ;   2012-03-01 (trs)       -- Added #classes and Davies-Bouldin# to summary doc
 ;   2012-02-29 (trs)       -- K-means uses same factors as CA/PCA (had been hard-wired to 9)
 ;   2009-07-13 (trs)       -- Added summary-file output
 ;   2009-07-03 (trs)       -- Prints maximum class size to screen
 ;   2009-06-22 (trs)       -- Extra question added by CA SRE when using (I)PCA
 ;   2009-06-09 (trs)       -- Can operate on non-consecutive views from list
 ;   2009-05-27 (trs)       -- Keeping _EIG file from CA S
 ;   2009-05-26 (trs)       -- Option to save eigenimages and reconstituted images, adapted from ca-pca.spi
 ;   2009-05-26 (trs)       -- Number of eigenvalues now user-defined
 ;   2009-05-22 (trs)       -- Uses selection doc in CA S instead of first N particles
 ;   2008-11-12 (trs)       -- Now a parameter to specify CA, PCA, or IPCA
 ;   2007-11-27 (trs)       -- Calculates averaged CCC instead of CCC of class-average
 ;   2007-11-27 (trs)       -- Can force poorly-populated views into two classes
 ;   2007-10-11 (trs)       -- Last reference-view now an input parameter
 ;   2007-10-01 (trs)       -- Defocus-group list now optional
 ;   2007-09-06 (trs)       -- Input particles are now in stacks
 ;   2007-09-06 (trs)       -- Uses iterative PCA instead of CorAn
 ;   2006-08-29 (trs)       -- Gets last defocus-group number automatically
 ;   2006-07-27 (trs)       -- Bug fix in reference-projection file-pattern
 ;   2006-04-05 (trs)       -- Uses last defocus group projections for CCC
 ;   2006-02-06 (trs & pp)  -- Updated for changes in projection-matching
 ;   2005-03-27 (trs)       -- No longer needs how_many file
 ;   2005-01-27 (trs & gra) -- Deals with variances of single-image classes
 ;   2005-01-24 (trs & js)  -- Bug fix in loop that checks for low intensities
 ;   2004-12-22 (trs)       -- Checks for images with low intensities
 ;   2004-12-22 (trs & jl)  -- Prints standard deviation of the class-variance to stats document
 ;   2004-12-22 (trs)       -- First reference-view is a parameter, in case you need to resume
 ;   2004-05-11 (trs)       -- Handles poorly-populated classes differently
 ;   2004-05-04 (trs)       -- Adds option to save/delete variance
 ;   2004-04-22 (trs)       -- Sorts individual images by CCC (worst to best)
 ;   2004-04-16 (trs)       -- Uses CorAn instead of PCA
 ;   2004-04-14 (trs)       -- Deletes unused 'CA S' output (all except _IMC)

 ;