这个要和svmlight配合一起使用
Python语言:
高亮代码由发芽网提供
001
# svmlight.py
002 #
003 # Author: Clint Burfoot <clint@burfoot.info>
004 #
005
006 """
007 An interface class for U{SVM light<http://svmlight.joachims.org/>}
008 """
009
010 import os
011 import tempfile
012 import math
013 from subprocess import call
014
015 class SVMLight :
016 """
017 An interface class for U{SVM light<http://svmlight.joachims.org/>}
018
019 This class currently supports classification with default options
020 only. It calls the SVMLight binaries as external programs.
021
022 Future versions should add a SWIG interface and support for use of
023 non-default SVMlight options.
024
025 C{SVMLight} reads sparse feature vectors - dictionaries with
026 numeric keys, representing features, and arbitrary numeric values.
027 """
028
029 learn_binary = "svm_learn"
030 classify_binary = "svm_classify"
031
032 def __init__( self , svm_path , labels = None , vectors = None , model = None ,
033 cleanup = False ):
034 """
035 Trains a new classifier.
036
037 @type svm_path: C{str}
038 @param svm_path: The filesystem path to the SVMLight binaries
039 @type labels: C{tuple}
040 @param labels: A tuple of boolean training set labels.
041 @type vectors: C{tuple}
042 @param vectors: A tuple of sparse feature vectors.
043 @type model: A C{tuple} of C{str}
044 @param model: The lines from an SVMlight model file. Specify this
045 instead of C{labels} and C{vectors} to use a pre-trained classifier.
046 """
047 self . _svm_learn = os . sep . join(( svm_path , SVMLight . learn_binary))
048 self . _svm_classify = os . sep . join(( svm_path , SVMLight . classify_binary))
049 self . _cleanup = cleanup
050 self . _devnull = None
051
052 self . _directory = tempfile . mkdtemp()
053 self . _example_fname = os . sep . join(( self . _directory , "example"))
054 self . _model_fname = os . sep . join(( self . _directory , "model"))
055 self . _input_fname = os . sep . join(( self . _directory , "input"))
056 self . _output_fname = os . sep . join(( self . _directory , "output"))
057
058 if model is not None :
059 self . _write_model( self . _model_fname , model)
060 self . model = model
061 elif len( labels) != len( vectors ):
062 raise ValueError( "labels and vectors arrays are different lengths")
063
064 self . _write_vectors( self . _example_fname , labels , vectors)
065 ret = call(( self . _svm_learn , self . _example_fname , self . _model_fname ),
066 stdout = self . devnull)
067 assert ret == 0
068 if model is None :
069 self . model = self . _read_model()
070
071 def _get_devnull( self ):
072 # Return a handle to /dev/null (or windows equivalent).
073 if self . _devnull is None :
074 if os . name == 'posix' :
075 self . _devnull = open( "/dev/null" , "w")
076 else :
077 # Assume we're on windows.
078 self . _devnull = open( "NUL:" , "w")
079 return self . _devnull
080 devnull = property( _get_devnull)
081
082 def __getstate__( self ):
083 state = self . __dict__ . copy()
084 state [ '_devnull' ] = None
085 return state
086
087 def classify( self , vectors ):
088 """
089 Classify feature vectors.
090
091 @type vectors: C{tuple}
092 @param vectors: A tuple of sparse binary feature vectors.
093 @rtype: C{tuple}
094 @return: A tuple of C{float} vector classifications.
095 """
096 self . _write_vectors( self . _input_fname , [ "0" for v in vectors ], vectors)
097 ret = call(( self . _svm_classify , self . _input_fname , self . _model_fname ,
098 self . _output_fname ), stdout = self . devnull)
099 assert ret == 0
100 results = self . _read_classification()
101 assert len( results) == len( vectors)
102 return results
103
104 def _write_vectors( self , fname , labels , vectors ):
105 # Writes the given array to the given filename with the given labels.
106 # Vectors are written in the SVMlight format.
107 file = open( fname , "w")
108 assert len( labels) == len( vectors)
109 for i in range( 0 , len( labels )):
110 label = "-1"
111 if labels [ i ]:
112 label = "1"
113 feature_strings = list()
114 features = vectors [ i ] . keys()
115 features . sort()
116 for feature in features :
117 feature_strings . append( " %d : %s " % ( feature + 1 ,
118 str( vectors [ i ][ feature ])))
119 file . write( " %s %s \n " % ( label , " " . join( feature_strings)))
120 file . close()
121
122 def _write_model( self , fname , model ):
123 # Writes the model file.
124 file = open( fname , "w")
125 for line in model :
126 file . write( " %s \n " % line)
127 file . close()
128
129 def _read_classification( self ):
130 # Reads the SVMlight output file.
131 file = open( self . _output_fname , "r")
132 result = []
133 for line in file . readlines ():
134 result . append( float( line))
135 file . close()
136 assert len( result) > 0
137 return result
138
139 def _read_model( self ):
140 # Reads the SVMlight model file.
141 file = open( self . _model_fname , "r")
142 result = []
143 for line in file . readlines ():
144 line = line . rstrip()
145 result . append( line)
146 file . close()
147 assert len( result) > 0
148 return result
149
150 def __del__( self ):
151 if self . _cleanup :
152 for fname in os . listdir( self . _directory ):
153 os . unlink( os . sep . join(( self . _directory , fname)))
154 os . rmdir( self . _directory)
002 #
003 # Author: Clint Burfoot <clint@burfoot.info>
004 #
005
006 """
007 An interface class for U{SVM light<http://svmlight.joachims.org/>}
008 """
009
010 import os
011 import tempfile
012 import math
013 from subprocess import call
014
015 class SVMLight :
016 """
017 An interface class for U{SVM light<http://svmlight.joachims.org/>}
018
019 This class currently supports classification with default options
020 only. It calls the SVMLight binaries as external programs.
021
022 Future versions should add a SWIG interface and support for use of
023 non-default SVMlight options.
024
025 C{SVMLight} reads sparse feature vectors - dictionaries with
026 numeric keys, representing features, and arbitrary numeric values.
027 """
028
029 learn_binary = "svm_learn"
030 classify_binary = "svm_classify"
031
032 def __init__( self , svm_path , labels = None , vectors = None , model = None ,
033 cleanup = False ):
034 """
035 Trains a new classifier.
036
037 @type svm_path: C{str}
038 @param svm_path: The filesystem path to the SVMLight binaries
039 @type labels: C{tuple}
040 @param labels: A tuple of boolean training set labels.
041 @type vectors: C{tuple}
042 @param vectors: A tuple of sparse feature vectors.
043 @type model: A C{tuple} of C{str}
044 @param model: The lines from an SVMlight model file. Specify this
045 instead of C{labels} and C{vectors} to use a pre-trained classifier.
046 """
047 self . _svm_learn = os . sep . join(( svm_path , SVMLight . learn_binary))
048 self . _svm_classify = os . sep . join(( svm_path , SVMLight . classify_binary))
049 self . _cleanup = cleanup
050 self . _devnull = None
051
052 self . _directory = tempfile . mkdtemp()
053 self . _example_fname = os . sep . join(( self . _directory , "example"))
054 self . _model_fname = os . sep . join(( self . _directory , "model"))
055 self . _input_fname = os . sep . join(( self . _directory , "input"))
056 self . _output_fname = os . sep . join(( self . _directory , "output"))
057
058 if model is not None :
059 self . _write_model( self . _model_fname , model)
060 self . model = model
061 elif len( labels) != len( vectors ):
062 raise ValueError( "labels and vectors arrays are different lengths")
063
064 self . _write_vectors( self . _example_fname , labels , vectors)
065 ret = call(( self . _svm_learn , self . _example_fname , self . _model_fname ),
066 stdout = self . devnull)
067 assert ret == 0
068 if model is None :
069 self . model = self . _read_model()
070
071 def _get_devnull( self ):
072 # Return a handle to /dev/null (or windows equivalent).
073 if self . _devnull is None :
074 if os . name == 'posix' :
075 self . _devnull = open( "/dev/null" , "w")
076 else :
077 # Assume we're on windows.
078 self . _devnull = open( "NUL:" , "w")
079 return self . _devnull
080 devnull = property( _get_devnull)
081
082 def __getstate__( self ):
083 state = self . __dict__ . copy()
084 state [ '_devnull' ] = None
085 return state
086
087 def classify( self , vectors ):
088 """
089 Classify feature vectors.
090
091 @type vectors: C{tuple}
092 @param vectors: A tuple of sparse binary feature vectors.
093 @rtype: C{tuple}
094 @return: A tuple of C{float} vector classifications.
095 """
096 self . _write_vectors( self . _input_fname , [ "0" for v in vectors ], vectors)
097 ret = call(( self . _svm_classify , self . _input_fname , self . _model_fname ,
098 self . _output_fname ), stdout = self . devnull)
099 assert ret == 0
100 results = self . _read_classification()
101 assert len( results) == len( vectors)
102 return results
103
104 def _write_vectors( self , fname , labels , vectors ):
105 # Writes the given array to the given filename with the given labels.
106 # Vectors are written in the SVMlight format.
107 file = open( fname , "w")
108 assert len( labels) == len( vectors)
109 for i in range( 0 , len( labels )):
110 label = "-1"
111 if labels [ i ]:
112 label = "1"
113 feature_strings = list()
114 features = vectors [ i ] . keys()
115 features . sort()
116 for feature in features :
117 feature_strings . append( " %d : %s " % ( feature + 1 ,
118 str( vectors [ i ][ feature ])))
119 file . write( " %s %s \n " % ( label , " " . join( feature_strings)))
120 file . close()
121
122 def _write_model( self , fname , model ):
123 # Writes the model file.
124 file = open( fname , "w")
125 for line in model :
126 file . write( " %s \n " % line)
127 file . close()
128
129 def _read_classification( self ):
130 # Reads the SVMlight output file.
131 file = open( self . _output_fname , "r")
132 result = []
133 for line in file . readlines ():
134 result . append( float( line))
135 file . close()
136 assert len( result) > 0
137 return result
138
139 def _read_model( self ):
140 # Reads the SVMlight model file.
141 file = open( self . _model_fname , "r")
142 result = []
143 for line in file . readlines ():
144 line = line . rstrip()
145 result . append( line)
146 file . close()
147 assert len( result) > 0
148 return result
149
150 def __del__( self ):
151 if self . _cleanup :
152 for fname in os . listdir( self . _directory ):
153 os . unlink( os . sep . join(( self . _directory , fname)))
154 os . rmdir( self . _directory)