jdiedrichsen · maedbhk · Feb 14, 2018 · Mar 10, 2018 · Mar 28, 2018 · Apr 10, 2018
diff --git a/graph/scatterplot.m b/graph/scatterplot.m
@@ -45,6 +45,7 @@
 %       'yaxisIncl'      : Points (min,max) on yaxis that need to be shown: YLim will
 %                          be larger of the same 
 % Joern Diedrichsen (joern.diedrichsen@googlemail.com)
+% MK: added label options: labelcolor, labelsize, labelfont
 
 [Nx n] = size(y);
 if (n>1)
@@ -65,6 +66,8 @@
 polyorder=1;
 label=[];
 labelformat='%d';
+labelsize=10; 
+labelfont='arial'; 
 wfun='bisquare';
 leg=[];
 r2=[];t=[];
@@ -80,7 +83,7 @@
 xaxisIncl=[];
 
 variables={'markertype','markercolor','markerfill','markersize','CAT',...
-    'subset','split','leg','leglocation','color','label',...
+    'subset','split','leg','leglocation','color','label','labelsize','labelfont'...
     'regression','polyorder','intercept',...
     'colormap',...
     'bubble','bubble_minsize','bubble_maxsize',...
@@ -120,6 +123,7 @@
 F.markertype=markertype;
 F.markercolor=markercolor;
 F.markerfill=markerfill;
+F.labelcolor=labelcolor; % MK added
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % re-code cell array
@@ -251,14 +255,16 @@
         xoffset=(xlims(2)-xlims(1))/40;
         yoffset=(ylims(2)-ylims(1))/40;
         for i=1:length(x)
-            text(x(i)+xoffset,y(i)+yoffset,label{i});
+            th=text(x(i)+xoffset,y(i)+yoffset,label{i});
+            set(th,'Color',labelcolor{i},'FontSize',labelsize,'FontName',labelfont) % MK added
         end;
     else
         label=label(find(subset));
         xoffset=(xlims(2)-xlims(1))/40;
         yoffset=(ylims(2)-ylims(1))/40;
         for i=1:length(x)
-            text(x(i)+xoffset,y(i)+yoffset,sprintf(labelformat,label(i)));
+            th=text(x(i)+xoffset,y(i)+yoffset,sprintf(labelformat,label(i)));
+            set(th,'Color',labelcolor{i},'FontSize',labelsize,'FontName',labelfont) % MK added
         end;
     end;
 end;

diff --git a/stats/RandIndex.m b/stats/RandIndex.m
@@ -0,0 +1,62 @@
+function [AR,RI,MI,HI]=RandIndex(c1,c2)
+%RANDINDEX - calculates Rand Indices to compare two partitions
+% ARI=RANDINDEX(c1,c2), where c1,c2 are vectors listing the 
+% class membership, returns the "Hubert & Arabie adjusted Rand index".
+% [AR,RI,MI,HI]=RANDINDEX(c1,c2) returns the adjusted Rand index, 
+% the unadjusted Rand index, "Mirkin's" index and "Hubert's" index.
+%
+% See L. Hubert and P. Arabie (1985) "Comparing Partitions" Journal of 
+% Classification 2:193-218
+
+%(C) David Corney (2000)   		D.Corney@cs.ucl.ac.uk
+% Maedbh King edited so that zeros and NaNs are ignored
+
+if nargin < 2 | min(size(c1)) > 1 | min(size(c2)) > 1
+   error('RandIndex: Requires two vector arguments')
+   return
+end
+
+C=Contingency(c1,c2);	%form contingency matrix
+
+n=sum(sum(C));
+nis=sum(sum(C,2).^2);		%sum of squares of sums of rows
+njs=sum(sum(C,1).^2);		%sum of squares of sums of columns
+
+t1=nchoosek(n,2);		%total number of pairs of entities
+t2=sum(sum(C.^2));	%sum over rows & columnns of nij^2
+t3=.5*(nis+njs);
+
+%Expected index (for adjustment)
+nc=(n*(n^2+1)-(n+1)*nis-(n+1)*njs+2*(nis*njs)/n)/(2*(n-1));
+
+A=t1+t2-t3;		%no. agreements
+D=  -t2+t3;		%no. disagreements
+
+if t1==nc
+   AR=0;			%avoid division by zero; if k=1, define Rand = 0
+else
+   AR=(A-nc)/(t1-nc);		%adjusted Rand - Hubert & Arabie 1985
+end
+
+RI=A/t1;			%Rand 1971		%Probability of agreement
+MI=D/t1;			%Mirkin 1970	%p(disagreement)
+HI=(A-D)/t1;	    %Hubert 1977	%p(agree)-p(disagree)
+
+function Cont=Contingency(Mem1,Mem2)
+
+if nargin < 2 | min(size(Mem1)) > 1 | min(size(Mem2)) > 1
+   error('Contingency: Requires two vector arguments')
+   return
+end
+
+Cont=zeros(max(Mem1),max(Mem2));
+
+Mem1(isnan(Mem1))=0; % MK add
+Mem2(isnan(Mem2))=0; % MK add
+for i = 1:length(Mem1);
+    if Mem1(i)==0 || Mem2(i)==0, % MK add
+        i=i+1;
+    else
+   Cont(Mem1(i),Mem2(i))=Cont(Mem1(i),Mem2(i))+1;
+    end
+end
diff --git a/stats/semiNonNegMatFac.m b/stats/semiNonNegMatFac.m
@@ -1,17 +1,19 @@
-function [F,G,Err2]=semiNonNegMatFac(X,k,varargin); 
+function [F,G,Info,winner]=semiNonNegMatFac(X,k,varargin); 
 % Semi-nonnegative matrix factorization 
 % as in Ding & Jordan
 % X: NxP Matrix of P observations to be clustered over N variables 
 % X = F *G' 
 % k: number of clusters 
 % Joern Diedrichsen 
+% Maedbh King: added winner-take-all and R2 and R as output
+
 G0= [];                 % Startibg value (otherwise uses Kmeans + 0.2)
 threshold = 0.01;       % Threshold on reconstruction error 
 maxIter = 5000;         % Maximal number of iterations 
 normaliseF = 1;         % Normalise F to unit vectors afterwards?
 vararginoptions(varargin,{'G0','threshold','maxIter','normaliseF'}); 
 if (isempty(G0))
-    g=kmeans(X',k); 
+    g=kmeans(X',k);
     G0 = indicatorMatrix('identity',g); 
 end; 
 df = inf; 
@@ -48,4 +50,23 @@
     f=sqrt(sum(F.^2)); % Factor for normalisation 
     F=bsxfun(@times,F,1./f); 
     G=bsxfun(@times,G,f);  % Normalise the group factor the other way
-end; 
+end; 
+
+% Provide fitting info
+% R2
+SSR=nansum(R.*R); 
+SST=nansum(X.*X); 
+
+% R
+P=F*G'; 
+SXP=nansum(nansum(X.*P,1)); 
+SPP=nansum(nansum(P.*P)); 
+
+%Info.numiter = n-1; 
+Info.error   = Err2(end);
+Info.R2_vox  = 1-nansum(R.*R)./nansum(X.*X);  % MK: R2=1-SSR/SST
+Info.R2      = 1-nansum(SSR)./nansum(SST); 
+Info.R       = SXP./sqrt(nansum(SST).*SPP);   % MK: R=covar(X,P)/var(X)*var(P)
+
+% Do winner-take-all
+[~,winner]=max(G,[],2); % MK