EMD

由于给定内容中缺少博客具体信息,无法提炼出关键信息形成摘要。
/*
    emd.c

    Last update: 3/14/98

    An implementation of the Earth Movers Distance.
    Based of the solution for the Transportation problem as described in
    "Introduction to Mathematical Programming" by F. S. Hillier and 
    G. J. Lieberman, McGraw-Hill, 1990.

    Copyright (C) 1998 Yossi Rubner
    Computer Science Department, Stanford University
    E-Mail: rubner@cs.stanford.edu   URL: http://vision.stanford.edu/~rubner
*/

#include <stdio.h>
#include <stdlib.h>
#include <math.h>

#include "emd.h"

#define DEBUG_LEVEL 0
/*
 DEBUG_LEVEL:
   0 = NO MESSAGES
   1 = PRINT THE NUMBER OF ITERATIONS AND THE FINAL RESULT
   2 = PRINT THE RESULT AFTER EVERY ITERATION
   3 = PRINT ALSO THE FLOW AFTER EVERY ITERATION
   4 = PRINT A LOT OF INFORMATION (PROBABLY USEFUL ONLY FOR THE AUTHOR)
*/


#define MAX_SIG_SIZE1  (MAX_SIG_SIZE+1)  /* FOR THE POSIBLE DUMMY FEATURE */

/* NEW TYPES DEFINITION */

/* node1_t IS USED FOR SINGLE-LINKED LISTS */
typedef struct node1_t {
  int i;
  double val;
  struct node1_t *Next;
} node1_t;

/* node1_t IS USED FOR DOUBLE-LINKED LISTS */
typedef struct node2_t {
  int i, j;
  double val;
  struct node2_t *NextC;               /* NEXT COLUMN */
  struct node2_t *NextR;               /* NEXT ROW */
} node2_t;



/* GLOBAL VARIABLE DECLARATION */
static int _n1, _n2;                          /* SIGNATURES SIZES */
static float _C[MAX_SIG_SIZE1][MAX_SIG_SIZE1];/* THE COST MATRIX */
static node2_t _X[MAX_SIG_SIZE1*2];            /* THE BASIC VARIABLES VECTOR */
/* VARIABLES TO HANDLE _X EFFICIENTLY */
static node2_t *_EndX, *_EnterX;
static char _IsX[MAX_SIG_SIZE1][MAX_SIG_SIZE1];
static node2_t *_RowsX[MAX_SIG_SIZE1], *_ColsX[MAX_SIG_SIZE1];
static double _maxW;
static float _maxC;

/* DECLARATION OF FUNCTIONS */
static float init(signature_t *Signature1, signature_t *Signature2,
		  float (*Dist)(feature_t *, feature_t *));
static void findBasicVariables(node1_t *U, node1_t *V);
static int isOptimal(node1_t *U, node1_t *V);
static int findLoop(node2_t **Loop);
static void newSol();
static void russel(double *S, double *D);
static void addBasicVariable(int minI, int minJ, double *S, double *D, 
			     node1_t *PrevUMinI, node1_t *PrevVMinJ,
			     node1_t *UHead);
#if DEBUG_LEVEL > 0
static void printSolution();
#endif


/******************************************************************************
float emd(signature_t *Signature1, signature_t *Signature2,
	  float (*Dist)(feature_t *, feature_t *), flow_t *Flow, int *FlowSize)
  
where

   Signature1, Signature2  Pointers to signatures that their distance we want
              to compute.
   Dist       Pointer to the ground distance. i.e. the function that computes
              the distance between two features.
   Flow       (Optional) Pointer to a vector of flow_t (defined in emd.h) 
              where the resulting flow will be stored. Flow must have n1+n2-1
              elements, where n1 and n2 are the sizes of the two signatures
              respectively.
              If NULL, the flow is not returned.
   FlowSize   (Optional) Pointer to an integer where the number of elements in
              Flow will be stored
              
******************************************************************************/

float emd(signature_t *Signature1, signature_t *Signature2,
	  float (*Dist)(feature_t *, feature_t *),
	  flow_t *Flow, int *FlowSize)
{
  int itr;
  double totalCost;
  float w;
  node2_t *XP;
  flow_t *FlowP;
  node1_t U[MAX_SIG_SIZE1], V[MAX_SIG_SIZE1];

  w = init(Signature1, Signature2, Dist);

#if DEBUG_LEVEL > 1
  printf("/nINITIAL SOLUTION:/n");
  printSolution();
#endif
 
  if (_n1 > 1 && _n2 > 1)  /* IF _n1 = 1 OR _n2 = 1 THEN WE ARE DONE */
    {
      for (itr = 1; itr < MAX_ITERATIONS; itr++)
	{
	  /* FIND BASIC VARIABLES */
	  findBasicVariables(U, V);
	  
	  /* CHECK FOR OPTIMALITY */
	  if (isOptimal(U, V))
	    break;
	  
	  /* IMPROVE SOLUTION */
	  newSol();
	  
#if DEBUG_LEVEL > 1
	  printf("/nITERATION # %d /n", itr);
	  printSolution();
#endif
	}

      if (itr == MAX_ITERATIONS)
	fprintf(stderr, "emd: Maximum number of iterations has been reached (%d)/n",
		MAX_ITERATIONS);
    }

  /* COMPUTE THE TOTAL FLOW */
  totalCost = 0;
  if (Flow != NULL)
    FlowP = Flow;
  for(XP=_X; XP < _EndX; XP++)
    {
      if (XP == _EnterX)  /* _EnterX IS THE EMPTY SLOT */
	continue;
      if (XP->i == Signature1->n || XP->j == Signature2->n)  /* DUMMY FEATURE */
	continue;
      
      if (XP->val == 0)  /* ZERO FLOW */
	continue;

      totalCost += (double)XP->val * _C[XP->i][XP->j];
      if (Flow != NULL)
	{
	  FlowP->from = XP->i;
	  FlowP->to = XP->j;
	  FlowP->amount = XP->val;
	  FlowP++;
	}
    }
  if (Flow != NULL)
    *FlowSize = FlowP-Flow;

#if DEBUG_LEVEL > 0
  printf("/n*** OPTIMAL SOLUTION (%d ITERATIONS): %f ***/n", itr, totalCost);
#endif

  /* RETURN THE NORMALIZED COST == EMD */
  return (float)(totalCost / w);
}





/**********************
   init
**********************/
static float init(signature_t *Signature1, signature_t *Signature2, 
		  float (*Dist)(feature_t *, feature_t *))
{
  int i, j;
  double sSum, dSum, diff;
  feature_t *P1, *P2;
  double S[MAX_SIG_SIZE1], D[MAX_SIG_SIZE1];
 
  _n1 = Signature1->n;
  _n2 = Signature2->n;

  if (_n1 > MAX_SIG_SIZE || _n2 > MAX_SIG_SIZE)
    {
      fprintf(stderr, "emd: Signature size is limited to %d/n", MAX_SIG_SIZE);
      exit(1);
    }
  
  /* COMPUTE THE DISTANCE MATRIX */
  _maxC = 0;
  for(i=0, P1=Signature1->Features; i < _n1; i++, P1++)
    for(j=0, P2=Signature2->Features; j < _n2; j++, P2++) 
      {
	_C[i][j] = Dist(P1, P2);
	if (_C[i][j] > _maxC)
	  _maxC = _C[i][j];
      }
	
  /* SUM UP THE SUPPLY AND DEMAND */
  sSum = 0.0;
  for(i=0; i < _n1; i++)
    {
      S[i] = Signature1->Weights[i];
      sSum += Signature1->Weights[i];
      _RowsX[i] = NULL;
    }
  dSum = 0.0;
  for(j=0; j < _n2; j++)
    {
      D[j] = Signature2->Weights[j];
      dSum += Signature2->Weights[j];
      _ColsX[j] = NULL;
    }

  /* IF SUPPLY DIFFERENT THAN THE DEMAND, ADD A ZERO-COST DUMMY CLUSTER */
  diff = sSum - dSum;
  if (fabs(diff) >= EPSILON * sSum)
    {
      if (diff < 0.0)
	{
	  for (j=0; j < _n2; j++)
	    _C[_n1][j] = 0;
	  S[_n1] = -diff;
	  _RowsX[_n1] = NULL;
	  _n1++;
	}
      else
	{
	  for (i=0; i < _n1; i++)
	    _C[i][_n2] = 0;
	  D[_n2] = diff;
	  _ColsX[_n2] = NULL;
	  _n2++;
	}
    }

  /* INITIALIZE THE BASIC VARIABLE STRUCTURES */
  for (i=0; i < _n1; i++)
    for (j=0; j < _n2; j++)
	_IsX[i][j] = 0;
  _EndX = _X;
   
  _maxW = sSum > dSum ? sSum : dSum;

  /* FIND INITIAL SOLUTION */
  russel(S, D);

  _EnterX = _EndX++;  /* AN EMPTY SLOT (ONLY _n1+_n2-1 BASIC VARIABLES) */

  return sSum > dSum ? dSum : sSum;
}


/**********************
    findBasicVariables
 **********************/
static void findBasicVariables(node1_t *U, node1_t *V)
{
  int i, j, found;
  int UfoundNum, VfoundNum;
  node1_t u0Head, u1Head, *CurU, *PrevU;
  node1_t v0Head, v1Head, *CurV, *PrevV;

  /* INITIALIZE THE ROWS LIST (U) AND THE COLUMNS LIST (V) */
  u0Head.Next = CurU = U;
  for (i=0; i < _n1; i++)
    {
      CurU->i = i;
      CurU->Next = CurU+1;
      CurU++;
    }
  (--CurU)->Next = NULL;
  u1Head.Next = NULL;

  CurV = V+1;
  v0Head.Next = _n2 > 1 ? V+1 : NULL;
  for (j=1; j < _n2; j++)
    {
      CurV->i = j;
      CurV->Next = CurV+1;
      CurV++;
    }
  (--CurV)->Next = NULL;
  v1Head.Next = NULL;

  /* THERE ARE _n1+_n2 VARIABLES BUT ONLY _n1+_n2-1 INDEPENDENT EQUATIONS,
     SO SET V[0]=0 */
  V[0].i = 0;
  V[0].val = 0;
  v1Head.Next = V;
  v1Head.Next->Next = NULL;

  /* LOOP UNTIL ALL VARIABLES ARE FOUND */
  UfoundNum=VfoundNum=0;
  while (UfoundNum < _n1 || VfoundNum < _n2)
    {

#if DEBUG_LEVEL > 3
      printf("UfoundNum=%d/%d,VfoundNum=%d/%d/n",UfoundNum,_n1,VfoundNum,_n2);
      printf("U0=");
      for(CurU = u0Head.Next; CurU != NULL; CurU = CurU->Next)
	printf("[%d]",CurU-U);
      printf("/n");
      printf("U1=");
      for(CurU = u1Head.Next; CurU != NULL; CurU = CurU->Next)
	printf("[%d]",CurU-U);
      printf("/n");
      printf("V0=");
      for(CurV = v0Head.Next; CurV != NULL; CurV = CurV->Next)
	printf("[%d]",CurV-V);
      printf("/n");
      printf("V1=");
      for(CurV = v1Head.Next; CurV != NULL; CurV = CurV->Next)
	printf("[%d]",CurV-V);
      printf("/n/n");
#endif
      
      found = 0;
      if (VfoundNum < _n2)
	{
	  /* LOOP OVER ALL MARKED COLUMNS */
	  PrevV = &v1Head;
	  for (CurV=v1Head.Next; CurV != NULL; CurV=CurV->Next)
	    {
	      j = CurV->i;
	      /* FIND THE VARIABLES IN COLUMN j */
	      PrevU = &u0Head;
	      for (CurU=u0Head.Next; CurU != NULL; CurU=CurU->Next)
		{
		  i = CurU->i;
		  if (_IsX[i][j])
		    {
		      /* COMPUTE U[i] */
		      CurU->val = _C[i][j] - CurV->val;
		      /* ...AND ADD IT TO THE MARKED LIST */
		      PrevU->Next = CurU->Next;
		      CurU->Next = u1Head.Next != NULL ? u1Head.Next : NULL;
		      u1Head.Next = CurU;
		      CurU = PrevU;
		    }
		  else
		    PrevU = CurU;
		}
	      PrevV->Next = CurV->Next;
	      VfoundNum++;
	      found = 1;
	    }
	}
     if (UfoundNum < _n1)
	{
	  /* LOOP OVER ALL MARKED ROWS */
	  PrevU = &u1Head;
	  for (CurU=u1Head.Next; CurU != NULL; CurU=CurU->Next)
	    {
	      i = CurU->i;
	      /* FIND THE VARIABLES IN ROWS i */
	      PrevV = &v0Head;
	      for (CurV=v0Head.Next; CurV != NULL; CurV=CurV->Next)
		{
		  j = CurV->i;
		  if (_IsX[i][j])
		    {
		      /* COMPUTE V[j] */
		      CurV->val = _C[i][j] - CurU->val;
		      /* ...AND ADD IT TO THE MARKED LIST */
		      PrevV->Next = CurV->Next;
		      CurV->Next = v1Head.Next != NULL ? v1Head.Next: NULL;
		      v1Head.Next = CurV;
		      CurV = PrevV;
		    }
		  else
		    PrevV = CurV;
		}
	      PrevU->Next = CurU->Next;
	      UfoundNum++;
	      found = 1;
	    }
	}
     if (! found)
       {
	 fprintf(stderr, "emd: Unexpected error in findBasicVariables!/n");
	 fprintf(stderr, "This typically happens when the EPSILON defined in/n");
	 fprintf(stderr, "emd.h is not right for the scale of the problem./n");
	 exit(1);
       }
    }
}




/**********************
    isOptimal
 **********************/
static int isOptimal(node1_t *U, node1_t *V)
{    
  double delta, deltaMin;
  int i, j, minI, minJ;

  /* FIND THE MINIMAL Cij-Ui-Vj OVER ALL i,j */
  deltaMin = INFINITY;
  for(i=0; i < _n1; i++)
    for(j=0; j < _n2; j++)
      if (! _IsX[i][j])
	{
	  delta = _C[i][j] - U[i].val - V[j].val;
	  if (deltaMin > delta)
	    {
              deltaMin = delta;
	      minI = i;
	      minJ = j;
	    }
	}

#if DEBUG_LEVEL > 3
  printf("deltaMin=%f/n", deltaMin);
#endif

   if (deltaMin == INFINITY)
     {
       fprintf(stderr, "emd: Unexpected error in isOptimal./n");
       exit(0);
     }
   
   _EnterX->i = minI;
   _EnterX->j = minJ;
   
   /* IF NO NEGATIVE deltaMin, WE FOUND THE OPTIMAL SOLUTION */
   return deltaMin >= -EPSILON * _maxC;

/*
   return deltaMin >= -EPSILON;
 */
}


/**********************
    newSol
**********************/
static void newSol()
{
    int i, j, k;
    double xMin;
    int steps;
    node2_t *Loop[2*MAX_SIG_SIZE1], *CurX, *LeaveX;
 
#if DEBUG_LEVEL > 3
    printf("EnterX = (%d,%d)/n", _EnterX->i, _EnterX->j);
#endif

    /* ENTER THE NEW BASIC VARIABLE */
    i = _EnterX->i;
    j = _EnterX->j;
    _IsX[i][j] = 1;
    _EnterX->NextC = _RowsX[i];
    _EnterX->NextR = _ColsX[j];
    _EnterX->val = 0;
    _RowsX[i] = _EnterX;
    _ColsX[j] = _EnterX;

    /* FIND A CHAIN REACTION */
    steps = findLoop(Loop);

    /* FIND THE LARGEST VALUE IN THE LOOP */
    xMin = INFINITY;
    for (k=1; k < steps; k+=2)
      {
	if (Loop[k]->val < xMin)
	  {
	    LeaveX = Loop[k];
	    xMin = Loop[k]->val;
	  }
      }

    /* UPDATE THE LOOP */
    for (k=0; k < steps; k+=2)
      {
	Loop[k]->val += xMin;
	Loop[k+1]->val -= xMin;
      }

#if DEBUG_LEVEL > 3
    printf("LeaveX = (%d,%d)/n", LeaveX->i, LeaveX->j);
#endif

    /* REMOVE THE LEAVING BASIC VARIABLE */
    i = LeaveX->i;
    j = LeaveX->j;
    _IsX[i][j] = 0;
    if (_RowsX[i] == LeaveX)
      _RowsX[i] = LeaveX->NextC;
    else
      for (CurX=_RowsX[i]; CurX != NULL; CurX = CurX->NextC)
	if (CurX->NextC == LeaveX)
	  {
	    CurX->NextC = CurX->NextC->NextC;
	    break;
	  }
    if (_ColsX[j] == LeaveX)
      _ColsX[j] = LeaveX->NextR;
    else
      for (CurX=_ColsX[j]; CurX != NULL; CurX = CurX->NextR)
	if (CurX->NextR == LeaveX)
	  {
	    CurX->NextR = CurX->NextR->NextR;
	    break;
	  }

    /* SET _EnterX TO BE THE NEW EMPTY SLOT */
    _EnterX = LeaveX;
}



/**********************
    findLoop
**********************/
static int findLoop(node2_t **Loop)
{
  int i, steps;
  node2_t **CurX, *NewX;
  char IsUsed[2*MAX_SIG_SIZE1]; 
 
  for (i=0; i < _n1+_n2; i++)
    IsUsed[i] = 0;

  CurX = Loop;
  NewX = *CurX = _EnterX;
  IsUsed[_EnterX-_X] = 1;
  steps = 1;

  do
    {
      if (steps%2 == 1)
	{
	  /* FIND AN UNUSED X IN THE ROW */
	  NewX = _RowsX[NewX->i];
	  while (NewX != NULL && IsUsed[NewX-_X])
	    NewX = NewX->NextC;
	}
      else
	{
	  /* FIND AN UNUSED X IN THE COLUMN, OR THE ENTERING X */
	  NewX = _ColsX[NewX->j];
	  while (NewX != NULL && IsUsed[NewX-_X] && NewX != _EnterX)
	    NewX = NewX->NextR;
	  if (NewX == _EnterX)
	    break;
 	}

     if (NewX != NULL)  /* FOUND THE NEXT X */
       {
	 /* ADD X TO THE LOOP */
	 *++CurX = NewX;
	 IsUsed[NewX-_X] = 1;
	 steps++;
#if DEBUG_LEVEL > 3
	 printf("steps=%d, NewX=(%d,%d)/n", steps, NewX->i, NewX->j);    
#endif
       }
     else  /* DIDN'T FIND THE NEXT X */
       {
	 /* BACKTRACK */
	 do
	   {
	     NewX = *CurX;
	     do 
	       {
		 if (steps%2 == 1)
		   NewX = NewX->NextR;
		 else
		   NewX = NewX->NextC;
	       } while (NewX != NULL && IsUsed[NewX-_X]);
	     
	     if (NewX == NULL)
	       {
		 IsUsed[*CurX-_X] = 0;
		 CurX--;
		 steps--;
	       }
	 } while (NewX == NULL && CurX >= Loop);
	 
#if DEBUG_LEVEL > 3
	 printf("BACKTRACKING TO: steps=%d, NewX=(%d,%d)/n",
		steps, NewX->i, NewX->j);    
#endif
           IsUsed[*CurX-_X] = 0;
	   *CurX = NewX;
	   IsUsed[NewX-_X] = 1;
       }     
    } while(CurX >= Loop);
  
  if (CurX == Loop)
    {
      fprintf(stderr, "emd: Unexpected error in findLoop!/n");
      exit(1);
    }
#if DEBUG_LEVEL > 3
  printf("FOUND LOOP:/n");
  for (i=0; i < steps; i++)
    printf("%d: (%d,%d)/n", i, Loop[i]->i, Loop[i]->j);
#endif

  return steps;
}



/**********************
    russel
**********************/
static void russel(double *S, double *D)
{
  int i, j, found, minI, minJ;
  double deltaMin, oldVal, diff;
  double Delta[MAX_SIG_SIZE1][MAX_SIG_SIZE1];
  node1_t Ur[MAX_SIG_SIZE1], Vr[MAX_SIG_SIZE1];
  node1_t uHead, *CurU, *PrevU;
  node1_t vHead, *CurV, *PrevV;
  node1_t *PrevUMinI, *PrevVMinJ, *Remember;

  /* INITIALIZE THE ROWS LIST (Ur), AND THE COLUMNS LIST (Vr) */
  uHead.Next = CurU = Ur;
  for (i=0; i < _n1; i++)
    {
      CurU->i = i;
      CurU->val = -INFINITY;
      CurU->Next = CurU+1;
      CurU++;
    }
  (--CurU)->Next = NULL;
  
  vHead.Next = CurV = Vr;
  for (j=0; j < _n2; j++)
    {
      CurV->i = j;
      CurV->val = -INFINITY;
      CurV->Next = CurV+1;
      CurV++;
    }
  (--CurV)->Next = NULL;
  
  /* FIND THE MAXIMUM ROW AND COLUMN VALUES (Ur[i] AND Vr[j]) */
  for(i=0; i < _n1 ; i++)
    for(j=0; j < _n2 ; j++)
      {
	float v;
	v = _C[i][j];
	if (Ur[i].val <= v)
	  Ur[i].val = v;
	if (Vr[j].val <= v)
	  Vr[j].val = v;
      }
  
  /* COMPUTE THE Delta MATRIX */
  for(i=0; i < _n1 ; i++)
    for(j=0; j < _n2 ; j++)
      Delta[i][j] = _C[i][j] - Ur[i].val - Vr[j].val;

  /* FIND THE BASIC VARIABLES */
  do
    {
#if DEBUG_LEVEL > 3
      printf("Ur=");
      for(CurU = uHead.Next; CurU != NULL; CurU = CurU->Next)
	printf("[%d]",CurU-Ur);
      printf("/n");
      printf("Vr=");
      for(CurV = vHead.Next; CurV != NULL; CurV = CurV->Next)
	printf("[%d]",CurV-Vr);
      printf("/n");
      printf("/n/n");
#endif
 
      /* FIND THE SMALLEST Delta[i][j] */
      found = 0; 
      deltaMin = INFINITY;      
      PrevU = &uHead;
      for (CurU=uHead.Next; CurU != NULL; CurU=CurU->Next)
	{
	  int i;
	  i = CurU->i;
	  PrevV = &vHead;
	  for (CurV=vHead.Next; CurV != NULL; CurV=CurV->Next)
	    {
	      int j;
	      j = CurV->i;
	      if (deltaMin > Delta[i][j])
		{
		  deltaMin = Delta[i][j];
		  minI = i;
		  minJ = j;
		  PrevUMinI = PrevU;
		  PrevVMinJ = PrevV;
		  found = 1;
		}
	      PrevV = CurV;
	    }
	  PrevU = CurU;
	}
      
      if (! found)
	break;

      /* ADD X[minI][minJ] TO THE BASIS, AND ADJUST SUPPLIES AND COST */
      Remember = PrevUMinI->Next;
      addBasicVariable(minI, minJ, S, D, PrevUMinI, PrevVMinJ, &uHead);

      /* UPDATE THE NECESSARY Delta[][] */
      if (Remember == PrevUMinI->Next)  /* LINE minI WAS DELETED */
	{
	  for (CurV=vHead.Next; CurV != NULL; CurV=CurV->Next)
	    {
	      int j;
	      j = CurV->i;
	      if (CurV->val == _C[minI][j])  /* COLUMN j NEEDS UPDATING */
		{
		  /* FIND THE NEW MAXIMUM VALUE IN THE COLUMN */
		  oldVal = CurV->val;
		  CurV->val = -INFINITY;
		  for (CurU=uHead.Next; CurU != NULL; CurU=CurU->Next)
		    {
		      int i;
		      i = CurU->i;
		      if (CurV->val <= _C[i][j])
			CurV->val = _C[i][j];
		    }
		  
		  /* IF NEEDED, ADJUST THE RELEVANT Delta[*][j] */
		  diff = oldVal - CurV->val;
		  if (fabs(diff) < EPSILON * _maxC)
		    for (CurU=uHead.Next; CurU != NULL; CurU=CurU->Next)
		      Delta[CurU->i][j] += diff;
		}
	    }
	}
      else  /* COLUMN minJ WAS DELETED */
	{
	  for (CurU=uHead.Next; CurU != NULL; CurU=CurU->Next)
	    {
	      int i;
	      i = CurU->i;
	      if (CurU->val == _C[i][minJ])  /* ROW i NEEDS UPDATING */
		{
		  /* FIND THE NEW MAXIMUM VALUE IN THE ROW */
		  oldVal = CurU->val;
		  CurU->val = -INFINITY;
		  for (CurV=vHead.Next; CurV != NULL; CurV=CurV->Next)
		    {
		      int j;
		      j = CurV->i;
		      if(CurU->val <= _C[i][j])
			CurU->val = _C[i][j];
		    }
		  
		  /* If NEEDED, ADJUST THE RELEVANT Delta[i][*] */
		  diff = oldVal - CurU->val;
		  if (fabs(diff) < EPSILON * _maxC)
		    for (CurV=vHead.Next; CurV != NULL; CurV=CurV->Next)
		      Delta[i][CurV->i] += diff;
		}
	    }
	}
    } while (uHead.Next != NULL || vHead.Next != NULL);
}




/**********************
    addBasicVariable
**********************/
static void addBasicVariable(int minI, int minJ, double *S, double *D, 
			     node1_t *PrevUMinI, node1_t *PrevVMinJ,
			     node1_t *UHead)
{
  double T;
  
  if (fabs(S[minI]-D[minJ]) <= EPSILON * _maxW)  /* DEGENERATE CASE */
    {
      T = S[minI];
      S[minI] = 0;
      D[minJ] -= T; 
    }
  else if (S[minI] < D[minJ])  /* SUPPLY EXHAUSTED */
    {
      T = S[minI];
      S[minI] = 0;
      D[minJ] -= T; 
    }
  else  /* DEMAND EXHAUSTED */
    {
      T = D[minJ];
      D[minJ] = 0; 
      S[minI] -= T; 
    }

  /* X(minI,minJ) IS A BASIC VARIABLE */
  _IsX[minI][minJ] = 1; 

  _EndX->val = T;
  _EndX->i = minI;
  _EndX->j = minJ;
  _EndX->NextC = _RowsX[minI];
  _EndX->NextR = _ColsX[minJ];
  _RowsX[minI] = _EndX;
  _ColsX[minJ] = _EndX;
  _EndX++;

  /* DELETE SUPPLY ROW ONLY IF THE EMPTY, AND IF NOT LAST ROW */
  if (S[minI] == 0 && UHead->Next->Next != NULL)
    PrevUMinI->Next = PrevUMinI->Next->Next;  /* REMOVE ROW FROM LIST */
  else
    PrevVMinJ->Next = PrevVMinJ->Next->Next;  /* REMOVE COLUMN FROM LIST */
}





/**********************
    printSolution
**********************/
static void printSolution()
{
  node2_t *P;
  double totalCost;

  totalCost = 0;

#if DEBUG_LEVEL > 2
  printf("SIG1/tSIG2/tFLOW/tCOST/n");
#endif
  for(P=_X; P < _EndX; P++)
    if (P != _EnterX && _IsX[P->i][P->j])
      {
#if DEBUG_LEVEL > 2
	printf("%d/t%d/t%f/t%f/n", P->i, P->j, P->val, _C[P->i][P->j]);
#endif
	totalCost += (double)P->val * _C[P->i][P->j];
      }

  printf("COST = %f/n", totalCost);
}


#ifndef _EMD_H
#define _EMD_H
/*
    emd.h

    Last update: 3/24/98

    An implementation of the Earth Movers Distance.
    Based of the solution for the Transportation problem as described in
    "Introduction to Mathematical Programming" by F. S. Hillier and 
    G. J. Lieberman, McGraw-Hill, 1990.

    Copyright (C) 1998 Yossi Rubner
    Computer Science Department, Stanford University
    E-Mail: rubner@cs.stanford.edu   URL: http://vision.stanford.edu/~rubner
*/


/* DEFINITIONS */
#define MAX_SIG_SIZE   100
#define MAX_ITERATIONS 500
#define INFINITY       1e20
#define EPSILON        1e-6

/*****************************************************************************/
/* feature_t SHOULD BE MODIFIED BY THE USER TO REFLECT THE FEATURE TYPE      */
typedef int feature_t;
/*****************************************************************************/


typedef struct
{
  int n;                /* Number of features in the signature */
  feature_t *Features;  /* Pointer to the features vector */
  float *Weights;       /* Pointer to the weights of the features */
} signature_t;


typedef struct
{
  int from;             /* Feature number in signature 1 */
  int to;               /* Feature number in signature 2 */
  float amount;         /* Amount of flow from "from" to "to" */
} flow_t;



float emd(signature_t *Signature1, signature_t *Signature2,
	  float (*func)(feature_t *, feature_t *),
	  flow_t *Flow, int *FlowSize);

#endif

  

Code for the Earth Movers Distance (EMD)

 

 

Introduction
This is an implementation of the Earth Movers Distance, as described in [1]. The EMD computes the distance between two distributions, which are represented by signatures. The signatures are sets of weighted features that capture the distributions. The features can be of any type and in any number of dimensions, and are defined by the user.

The EMD is defined as the minimum amount of work needed to change one signature into the other. The notion of "work" is based on the user-defined ground distance which is the distance between two features. The size of the two signatures can be different. Also, the sum of weights of one signature can be different than the sum of weights of the other (partial match). Because of this, the EMD is normalized by the smaller sum.

The code is implemented in C, and is based on the solution for the Transportation problem as described in [2]

Please let me know of any bugs you find, or any questions, comments, suggestions, and criticisms you have. If you find this code useful for your work, I would like very much to hear from you. Once you do, I'll inform you of any improvements, etc. Also, an acknowledgment in any publication describing work that uses this code would be greatly appreciated.

 

Usage
To use the code, perform the following steps:
  1. download the files emd.h and emd.c (check the log of changes for latest changes).
  2. In emd.h, modify the line
    typedef int feature_t;
    

    to reflect your feature data type. Structures may be used too, for example

    typedef struct {
       int X,Y,Z;
    } feature_t;
    
  3. To compute an EMD, call:
    float emd(signature_t *Signature1, signature_t *Signature2,
    	  float (*func)(feature_t *, feature_t *),
    	  flow_t *Flow, int *FlowSize);
    

    where

    Signature1, Signature2:
    Pointers to the two signatures that their distance we want to compute.
    Dist:
    Pointer to the ground distance function. i.e. the function that computes the distance between two features.
    Flow:
    (Optional) Pointer to a vector of flow_t (defined in emd.h) where the resulting flow will be stored. Flow must have n1+n2-1 elements, where n1 and n2 are the sizes of the two signatures respectively. If NULL, the flow is not returned.
    FlowSize:
    (Optional) In case Flow is not NULL, FlowSize should point to a integer where the number of flow elements (always less or equal to n1+n2-1) will be written.
  4. Compile emd.c and link it to your code.

The signature data type signature_t is defined in emd.h as:

typedef struct
{
  int n;                /* Number of features in the distribution */
  feature_t *Features;  /* Pointer to the features vector */
  float *Weights;       /* Pointer to the weights of the features */
} signature_t;

The feature data type feature_t is defined in emd.h and should be modified by the user to reflect his feature type.

In special cases, the user might want to change some of the values in the definitions in emd.h:

#define MAX_SIG_SIZE 100
The maximum allowed number of features in a signature
#define MAX_ITERATIONS 100
Maximum number of iterations. For ordinary problems, 100 should be more than enough.
#define INFINITY 1e20
INFINITY should be much larger than any other value in the problem
#define EPSILON 1e-6
EPSILON determines the accuracy of the solution.
 
Examples
  1. example1.c
    In this example the features are in three-dimensional space, and the ground distance is the Euclidean distance.
    To try this example, you need to modify feature_t in emd.h to be

    typedef struct { int X,Y,Z; } feature_t;
  2. example2.c
    Here, instead of providing a function to compute the ground distances, they are given in a predefined matrix. This is done by defining feature_t as int (the default), and setting the features in each signature to have consecutive numbers. The ground distance function uses these numbers as indexes to the predefined cost matrix.
    The resulting flow is returned from the emd function by passing as the last parameters a vector of flow_t, and a pointer to int where the actual number of flow elements will be written.
    Also, in this example the total weights of the two signatures are different. The first signature has a total weight of 1 while the second signature has a total weight of 0.9.

 

Log of changes
DateDecription
05/10/98Changed from absolute error check to relative error check + Fixed a bug that caused the "Unexpected error in findBasicVariables" error message (thanks Mark Ruzon)
03/04/98Fixed a bug that sometime caused a crash when the second signature had only one feature (thanks Mark Ruzon)
03/31/98Fixed a bug in the case where *both* signatures have only one feature (thanks Rajesh Batra)


References
[1] Y. Rubner, C. Tomasi, and L. J. Guibas. A Metric for Distributions with Applications to Image Databases. Proceedings of the 1998 IEEE International Conference on Computer Vision, Bombay, India, January 1998, pp. 59-66.

[2] F. S. Hillier and G. J. Lieberman. Introduction to Mathematical Programming McGraw-Hill, 1990.

 

03-20
<think>好的,我现在需要处理用户关于EMD算法和EMD库使用说明的查询。首先,用户提到了EMD算法和EMD库,我需要明确这两个术语的具体含义。EMD通常有两种常见解释:经验模态分解(Empirical Mode Decomposition)和地球移动距离(Earth Mover's Distance)。根据用户提供的引用内容,里面提到了Python实现EMD算法,以及图像处理中的应用,特别是图像匹配和特征描述符提取。这更符合地球移动距离(EMD)的应用场景,因为EMD在图像处理中常用于比较两个分布之间的差异,比如在图像检索或匹配中。而经验模态分解更多用于信号处理,比如分解时间序列信号。不过,需要进一步确认用户的具体需求。 接下来,用户要求查找与EMD相关的技术内容,特别是使用说明。用户可能想了解如何在自己的项目中实现或使用EMD算法,或者如何使用现有的EMD库。根据引用中提到的Python实现,可能需要介绍Python中的相关库,比如PyEMD(用于经验模态分解)或OpenCV中的EMD方法(用于地球移动距离)。需要检查这些库的文档和使用方法,确保提供准确的步骤和示例代码。 然后,我需要确保回答符合系统级指令的要求。所有行内数学公式用$...$,独立公式用$$...$$,并且正确使用LaTeX语法。例如,提到EMD的数学定义时,需要用合适的公式表达。比如,地球移动距离可以表示为优化问题,需要运输成本的最小化。同时,要生成相关问题,并正确添加引用标识。引用标识应该放在相关段落末尾,如[^1]。 另外,用户提供的引用内容提到了EMD在图像处理中的应用,特别是图像匹配中的特征描述符提取。这可能涉及到如何计算两个图像特征之间的EMD距离,因此需要说明相关步骤,比如特征提取、距离计算等。可能需要引用OpenCV中的cv2.EMD函数,并给出代码示例。 同时,需要注意区分EMD的两种不同含义,避免混淆。可能需要询问用户具体指的是哪种EMD,或者在回答中分别介绍两种算法,并说明其不同的应用场景。不过,根据用户提供的引用内容,更可能是指地球移动距离,所以应重点放在这一部分。 最后,生成相关问题需要考虑用户可能感兴趣的方面,比如EMD算法的数学原理、优化方法、在不同领域的应用、与其他距离度量的比较等。确保问题覆盖用户可能进一步探索的方向。 总结步骤: 1. 确定EMD的具体含义(地球移动距离或经验模态分解),根据引用内容倾向于地球移动距离。 2. 介绍EMD算法的基本概念和数学公式,正确使用LaTeX格式。 3. 提供Python中使用EMD库的示例,如OpenCV的cv2.EMD或PyEMD库。 4. 分步骤说明使用方法,包括安装、导入、数据准备、计算等。 5. 引用相关文献,添加引用标识。 6. 生成相关问题,帮助用户深入理解。</think>### EMD算法与库使用说明 #### 1. EMD算法简介 **地球移动距离(Earth Mover's Distance, EMD)** 是一种衡量两个概率分布之间差异的度量方法,常用于图像处理和模式识别。其核心思想是计算将一种分布“搬运”到另一种分布所需的最小工作量,形式化为线性规划问题: $$ \text{EMD}(P, Q) = \min_{f_{ij}} \sum_{i,j} f_{ij} d_{ij} \quad \text{满足约束} \quad \sum_j f_{ij} \leq P_i, \sum_i f_{ij} \leq Q_j, \sum_{i,j} f_{ij} = 1 $$ 其中$P$和$Q$为分布,$d_{ij}$为距离矩阵,$f_{ij}$为流量矩阵。 #### 2. EMD库的使用方法 ##### 2.1 安装与导入 - **OpenCV库(用于图像处理中的EMD)**: ```bash pip install opencv-python ``` 代码示例: ```python import cv2 import numpy as np # 创建两个归一化的直方图 hist1 = np.array([[0.5], [0.5]], dtype=np.float32) hist2 = np.array([[0.3], [0.7]], dtype=np.float32) # 计算EMD距离 distance, _, _ = cv2.EMD(hist1, hist2, cv2.DIST_L2) print("EMD距离:", distance) ``` - **PyEMD库(用于经验模态分解)**: ```bash pip install EMD-signal ``` 代码示例: ```python from PyEMD import EMD import numpy as np # 生成信号 t = np.linspace(0, 1, 1000) signal = np.sin(2*np.pi*5*t) + np.sin(2*np.pi*10*t) # 执行EMD分解 emd = EMD() IMFs = emd(signal) ``` ##### 2.2 关键步骤说明 1. **数据准备**:输入需为归一化的概率分布(OpenCV)或时间序列信号(PyEMD)。 2. **参数配置**: - OpenCV中需指定距离类型(如`cv2.DIST_L2`)。 - PyEMD可调整分解的停止条件(如幅值或频率阈值)。 3. **结果解析**:EMD返回距离值或本征模态函数(IMFs),需结合应用场景分析。 #### 3. 应用场景 - **图像匹配**:通过比较图像特征(如颜色直方图)的EMD距离实现相似性度量。 - **信号处理**:经验模态分解用于去噪、特征提取(如EEG信号分析)。
评论 1
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值