文件的种类繁多,文件的识别就会很麻烦,并且还要考虑到可扩展性,所以这个文件的识别是以插件的形式来存在的,这里我们简要的说一下,这个插件的框架。
由于文件的格式很多,而且标准也很多,很难肯定某一文件就是某种的格式,因而就引入一个确信度的概念。
typedef Oscl_Vector<OSCL_HeapString<OsclMemAllocator>, OsclMemAllocator> PVMFRecognizerMIMEStringList;
typedef enum _PVMFRecognizerConfidence
{
PVMFRecognizerConfidenceNotCertain, //
100% sure not the format
PVMFRecognizerConfidenceNotPossible, //
Maybe not the format
PVMFRecognizerConfidenceUnknown, //
Not sure one way or the other
PVMFRecognizerConfidencePossible, //
Maybe the format
PVMFRecognizerConfidenceCertain //
100% sure of the format
} PVMFRecognizerConfidence;
然后是文件识别的结果:
class PVMFRecognizerResult
{
public:
PVMFRecognizerResult()
{
};
// Copy constructor for use in Oscl_Vector
PVMFRecognizerResult(const PVMFRecognizerResult& aSrc)
{
iRecognizedFormat = aSrc.iRecognizedFormat;
iRecognitionConfidence = aSrc.iRecognitionConfidence;
// iRecognizerSubFormatList=aSrc.iRecognizerSubFormatList;
};
~PVMFRecognizerResult()
{
};
// The format of interest as a MIME string
OSCL_HeapString<OsclMemAllocator> iRecognizedFormat;
// The confidence level of recognition
PVMFRecognizerConfidence iRecognitionConfidence;
// If the format is a container format, the format of
content within
// Oscl_Vector<PVMFRecognizerResult, OsclMemAllocator> iRecognizerSubFormatList;
};
格式和确信度。
然后就是一个事件的监控者Observer。
class PVMFRecognizerCommmandHandler
{
public:
virtual void RecognizerCommandCompleted(const PVMFCmdResp&
aResponse) = 0;
virtual ~PVMFRecognizerCommmandHandler() {}
};
#endif // PVMF_RECOGNIZER_TYPES_H_INCLUDED
这些都只是纯虚类。
文件识别,一般都是读取文件中的某些特殊的字节,然后才能做判断的,由于不同的文件读取的地方不同,并且识别的规则也是不同,所以定义一个基本的接口。
class PVMFRecognizerPluginInterface
{
public:
virtual
~PVMFRecognizerPluginInterface()
{
};
本插件支持下面的一些格式的识别,以一个列表的形式返回
注意我们的文件识别都是以插件的形式存在的,这样就可以保证文件识别的可扩展性,每一个插件都必须从这个接口来继承,另外还必须有一个类统一管理这些插件,如插件的查找和注册
virtual
PVMFStatus SupportedFormats(PVMFRecognizerMIMEStringList& aSupportedFormatsList) = 0;
//给定一个数据源让你去识别,有一个优先考虑的格式列表参数,返回的是一个识别结果的列表,列出可能的格式和确信度。
virtual PVMFStatus Recognize(PVMFDataStreamFactory&
aSourceDataStreamFactory,
PVMFRecognizerMIMEStringList*
aFormatHint,
Oscl_Vector<PVMFRecognizerResult,
OsclMemAllocator>& aRecognizerResult) = 0;
//或者识别所需的最小字节数
virtual
PVMFStatus GetRequiredMinBytesForRecognition(uint32& aBytes) = 0;
};
每一个插件都需要占用一定内存,因而就有下面的定义:一个插件的工厂的基类,每一个插件都要有一个这样的工厂接口来创建和删除某一个插件。
class PVMFRecognizerPluginFactory: public HeapBase
{
public:
virtual ~PVMFRecognizerPluginFactory()
{
};
virtual PVMFRecognizerPluginInterface* CreateRecognizerPlugin()
= 0;
virtual void DestroyRecognizerPlugin(PVMFRecognizerPluginInterface*
aPlugIn) = 0;
};
然后就是一个模板的定义,因为插件很多,这里我们写一个简单的模板:
template<class T>
class PVMFRecognizerPluginFactoryBasic : public PVMFRecognizerPluginFactory
{
public:
virtual ~PVMFRecognizerPluginFactoryBasic()
{
};
PVMFRecognizerPluginInterface* CreateRecognizerPlugin()
{
T* plugin = OSCL_NEW(T, ());
return plugin;
};
void DestroyRecognizerPlugin(PVMFRecognizerPluginInterface*
aPlugIn)
{
T* plugin = (T*)aPlugIn;
OSCL_DELETE(plugin);
};
};
这样关于插件的基类,我们基本搞定,下面的就是这么多插件在一起如何去管理。这里就是一个静态类(static)。负责插件的注册和注销,并且负责文件的的寻找,来了文件,历遍所有注册过的插件来找到最匹配的格式。这和微软的DirectShow框架极为相似,不过微软的Filter是这侧到注册表里面去了,并且,微软Filter的概念不仅仅是文件的识别,在微软的注册表中,有一项就是Media
Type,规定了系统所能够识别的文件类型,每一个类型都要有匹配码,例如前面八个字节是什么,后面四个字节是什么,这些都是在注册表中,注册的另外的一个就是打开这个文件所需的Source
Filter的ID,这样要播放的文件的时候按照一定优先级匹配这些注册信息就行了。
这里关于插件的管理有一个单独的静态类,静态类的特点就是不管类是不是存在,这个东西都是存在的,并且全局可访问。这才是我们识别的关键代码:
class PVMFRecognizerRegistry
{
public:
OSCL_IMPORT_REF static PVMFStatus Init();
/**
使用完之后,就要清空
* This static methods shuts down and
cleans up the recognizer registry. This method must be called once
* after there is no more use for the
recognizer registry to properly release the memory allocated for the
* registry
**/
OSCL_IMPORT_REF static void Cleanup();
OSCL_IMPORT_REF static PVMFStatus RegisterPlugin(PVMFRecognizerPluginFactory&
aPluginFactory);
OSCL_IMPORT_REF static PVMFStatus RemovePlugin(PVMFRecognizerPluginFactory&
aPluginFactory);
OSCL_IMPORT_REF static PVMFStatus OpenSession(PVMFSessionId&
aSessionId, PVMFRecognizerCommmandHandler& aCmdHandler);
OSCL_IMPORT_REF static PVMFStatus CloseSession(PVMFSessionId
aSessionId);
OSCL_IMPORT_REF static PVMFCommandId Recognize(PVMFSessionId
aSessionId, PVMFDataStreamFactory& aSourceDataStreamFactory, PVMFRecognizerMIMEStringList* aFormatHintList,
Oscl_Vector<PVMFRecognizerResult,
OsclMemAllocator>& aRecognizerResult, OsclAny* aCmdContext = NULL, uint32 aTimeout = 0);
OSCL_IMPORT_REF static PVMFCommandId CancelCommand(PVMFSessionId
aSessionId, PVMFCommandId aCommandToCancelId, OsclAny* aCmdContext = NULL);
};
这样我们的识别文件格式的框架就完成了。下面看一些具体的实现。
很遗憾,在OSDL中,我们已经定义好了一个类似于微软注册表的东西,它存在与OSDL中,在OSDL之上的任何代码都是可以访问的。
class OsclTLSRegistry
{
public:
OSCL_IMPORT_REF static OsclAny* getInstance(uint32
ID, int32 &error);
OSCL_IMPORT_REF static void registerInstance(OsclAny*
ptr, uint32 ID, int32 &error);
private:
OsclTLSRegistry()
{}
typedef OsclAny* registry_type;
typedef registry_type* registry_pointer_type;
#if ( OSCL_TLS_IS_KEYED)
class TKeyItem
{
public:
TKeyItem():
iTlsKey(NULL), iThreadId(0)
{}
TOsclTlsKey
*iTlsKey;
TOsclTlsThreadId
iThreadId;
};
class TlsKeyTable
{
public:
TlsKeyTable():
iNumKeys(0)
{}
_OsclBasicLock
iLock;
uint32
iNumKeys;
TKeyItem
iKeys[OSCL_TLS_MAX_THREADS];
};
//The key table is a global variable.
static TlsKeyTable* iTlsKeyTable;
static void GetThreadId(TOsclTlsThreadId
&threadId, int32&);
static TOsclTlsKey* LookupTlsKey(int32&);
static bool SaveTlsKey(TOsclTlsKey* key,
int32&);
static bool RemoveTlsKey(Oscl_DefAlloc&
alloc, TOsclTlsKey* key, int32&);
#endif
private:
OSCL_IMPORT_REF static void initialize(Oscl_DefAlloc
&alloc, int32 &error);
OSCL_IMPORT_REF static void cleanup(Oscl_DefAlloc
&alloc, int32 &error);
friend class OsclBase;
};
这个类的实现可以参见具体的代码,所以这里我们就不去多说。这里我们在注册信息中填入的是一个什么呢?
我们看静态的初始化函数。
OSCL_EXPORT_REF PVMFStatus PVMFRecognizerRegistry::Init()
{
//
Check that there is no existing registry
PVMFRecognizerRegistryImpl* pvrecregimpl = OSCL_STATIC_CAST(PVMFRecognizerRegistryImpl*, PVMFRECOGNIZER_REGISTRY::getInstance(PVMFRECOGNIZER_REGISTRY_ID));
// 如果存在,那么我就不用注册这个一项了
if (pvrecregimpl != NULL)
{
// Registry is already present so no need to instantiate
again
// Just increment the refcount
(pvrecregimpl->iRefCount)++;
return PVMFSuccess;
}
//
Instantiate the registry implementation,否则就开始分配新的空间开始注册
Oscl_TAlloc<PVMFRecognizerRegistryImpl, OsclMemAllocator> talloc;
pvrecregimpl = OSCL_ALLOC_NEW(talloc, PVMFRecognizerRegistryImpl, ());
//
Save it on singleton or TLS
PVMFRECOGNIZER_REGISTRY::registerInstance(pvrecregimpl, PVMFRECOGNIZER_REGISTRY_ID);
return PVMFSuccess;
}
继续往下面讲之前,我觉得我们可以看一下PVMFRecognizerRegistryImpl,这个是怎么实现的,PVMFRecognizerRegistryImpl是我们注册表中保存的一个东东。主要实现文件的识别,包含很多的信息。我以前以为只有变量才能注册的,其实函数也能注册的,函数代码的空间也是数据,是数据就有指针,所以整个类也是可以注册的。注册后,如果要清空,那么就要调用Cleanup,
OSCL_EXPORT_REF void PVMFRecognizerRegistry::Cleanup()
{
// Retrieve the registry implementation instance from singleton or TLS and destroy it
首先单件模式,返回我们注册的PVMFRecognizerRegistryImpl指针,这个指针指向的内容是存在的,因为我们前面在初始化的时候就分配了内存
PVMFRecognizerRegistryImpl* pvrecregimpl = OSCL_STATIC_CAST(PVMFRecognizerRegistryImpl*,
PVMFRECOGNIZER_REGISTRY::getInstance(PVMFRECOGNIZER_REGISTRY_ID));
if (pvrecregimpl != NULL)
{
//
First decrement the refcount,直接自减就行了,自身负责分配
--(pvrecregimpl->iRefCount);
//
If the resulting refcount is 0, then delete the instance
if ((pvrecregimpl->iRefCount) <= 0)
{
Oscl_TAlloc<PVMFRecognizerRegistryImpl,
OsclMemAllocator> talloc;
OSCL_ALLOC_DELETE(pvrecregimpl,
talloc, PVMFRecognizerRegistryImpl);
//
Unregister by putting NULL pointer in singleton or TLS 释放后,注意把注册表中的信息清空
PVMFRECOGNIZER_REGISTRY::registerInstance(NULL,
PVMFRECOGNIZER_REGISTRY_ID);
}
}
else
{
// Registry has already been cleaned up so nothing to
do
}
}
然后就是注册我们的文件识别组件
OSCL_EXPORT_REF PVMFStatus PVMFRecognizerRegistry::RegisterPlugin(PVMFRecognizerPluginFactory& aPluginFactory)
{
PVMFRecognizerRegistryImpl* pvrecregimpl = OSCL_STATIC_CAST(PVMFRecognizerRegistryImpl*,
PVMFRECOGNIZER_REGISTRY::getInstance(PVMFRECOGNIZER_REGISTRY_ID));
if (pvrecregimpl != NULL)
{
return
pvrecregimpl->RegisterPlugin(aPluginFactory);
}
else
{
// Registry hasn't been initialized yet. Assert
OSCL_ASSERT(false);
return PVMFErrNotReady;
}
}
其实很简单,调用对应的成员就行了。
下面的几个函数基本上都是这样子了,RemovePlugin、OpenSession、CloseSession、Recognize、CancelCommand。
下面我们看看
PVMFRecognizerRegistryImpl的实现。
因为文件的识别是一个比较“卡”的过程,为了不让我们的软件给人很卡的感官,我们这里把这个类另辟一个线程来进行,并且以异步的方式来调用,通过命令。
识别的命令有下面的两种类型:
typedef enum
{
PVMFRECREG_COMMAND_RECOGNIZE = 1,
PVMFRECREG_COMMAND_CANCELCOMMAND,
} PVMFRecRegImplCommandType;
一个就是开始识别,一个就是取消识别。
class PVMFRecRegSessionInfo
{
public:
PVMFRecRegSessionInfo()
{
};
PVMFRecRegSessionInfo(const PVMFRecRegSessionInfo& aSrc)
{
iRecRegSessionId = aSrc.iRecRegSessionId;
iRecRegCmdHandler = aSrc.iRecRegCmdHandler;
};
~PVMFRecRegSessionInfo()
{
};
PVMFSessionId iRecRegSessionId;
PVMFRecognizerCommmandHandler* iRecRegCmdHandler;
};
识别的一个信息的保存,一个事识别的sessionID,一个事事件处理。
下面就是一个命令的具体的封装,包含一个参数的列表
class PVMFRecRegImplCommand
{
public:
初始化就是一个参数列表
PVMFRecRegImplCommand(PVMFSessionId aSessionId, int32
aCmdType, PVMFCommandId aCmdId, OsclAny* aContextData = NULL,
Oscl_Vector<PVMFRecRegImplCommandParamUnion,
OsclMemAllocator>* aParamVector = NULL, bool aAPICommand = true) :
iSessionId(aSessionId),
iCmdType(aCmdType), iCmdId(aCmdId), iContextData(aContextData), iAPICommand(aAPICommand)
{
iParamVector.clear();
if (aParamVector)
{
iParamVector
= *aParamVector;
}
}
PVMFRecRegImplCommand(const PVMFRecRegImplCommand& aCmd)
{
iSessionId = aCmd.iSessionId;
iCmdType = aCmd.iCmdType;
iCmdId = aCmd.iCmdId;
iContextData = aCmd.iContextData;
iAPICommand = aCmd.iAPICommand;
iParamVector = aCmd.iParamVector;
}
PVMFSessionId GetSessionId()const
{
return iSessionId;
}
int32 GetCmdType()const
{
return iCmdType;
}
PVMFCommandId GetCmdId()const
{
return iCmdId;
}
OsclAny* GetContext()const
{
return iContextData;
}
bool IsAPICommand()const
{
return iAPICommand;
}
PVMFRecRegImplCommandParamUnion GetParam(uint32
aIndex)const
{
if (aIndex >= iParamVector.size())
{
PVMFRecRegImplCommandParamUnion
param;
oscl_memset(¶m,
0, sizeof(PVMFRecRegImplCommandParamUnion));
return
param;
}
else
{
return
iParamVector[aIndex];
}
}
bool operator==(const PVMFRecRegImplCommand& x)const
{
return iCmdId == x.iCmdId;
}
PVMFSessionId iSessionId;
int32 iCmdType;
PVMFCommandId iCmdId;
OsclAny* iContextData;
bool iAPICommand;
Oscl_Vector<PVMFRecRegImplCommandParamUnion,
OsclMemAllocator> iParamVector;
};
上面就是一个带参数的识别命令。
然后有一个优先级的比较:
class PVMFRecRegImplCommandCompareLess
{
public:
int compare(PVMFRecRegImplCommand& a, PVMFRecRegImplCommand&
b) const
{
int a_pri = PVMFRecRegImplCommandCompareLess::GetPriority(a);
int b_pri = PVMFRecRegImplCommandCompareLess::GetPriority(b);
if (a_pri < b_pri)
{
// Higher
priority
return
true;
}
else if (a_pri == b_pri)
{
// Same
priority so look at the command ID to maintain FIFO
return
(a.GetCmdId() > b.GetCmdId());
}
else
{
// Lower
priority
return
false;
}
}
static int GetPriority(PVMFRecRegImplCommand& aCmd)
{
switch (aCmd.GetCmdType())
{
case
PVMFRECREG_COMMAND_RECOGNIZE:
return
5;
case
PVMFRECREG_COMMAND_CANCELCOMMAND:
return
3;
default:
return
0;
}
}
};
如何识别的?
这里为什么要从PvmiDataStreamObserver来继承,因为我们的识别就必须读取一部分的字节,必然会有相关的data数据流的操作,这里是一个简单的事件监控。为什么要从OsclTimerObject,因为我们识别是单开线程的,并且识别有一个时间的上线。
class PVMFRecognizerRegistryImpl : public OsclTimerObject,
public
PvmiDataStreamObserver
{
public:
PVMFRecognizerRegistryImpl();
~PVMFRecognizerRegistryImpl();
注册和注销识别插件
PVMFStatus RegisterPlugin(PVMFRecognizerPluginFactory&
aPluginFactory);
PVMFStatus RemovePlugin(PVMFRecognizerPluginFactory&
aPluginFactory);
PVMFStatus OpenSession(PVMFSessionId&
aSessionId, PVMFRecognizerCommmandHandler& aCmdHandler);
PVMFStatus CloseSession(PVMFSessionId
aSessionId);
识别
PVMFCommandId Recognize(PVMFSessionId
aSessionId, PVMFDataStreamFactory& aSourceDataStreamFactory, PVMFRecognizerMIMEStringList* aFormatHint,
Oscl_Vector<PVMFRecognizerResult,
OsclMemAllocator>& aRecognizerResult, OsclAny* aCmdContext, uint32 aTimeout);
PVMFCommandId CancelCommand(PVMFSessionId
aSessionId, PVMFCommandId aCommandToCancelId, OsclAny* aCmdContext);
// Reference count for the
registry implementation
int32 iRefCount; 注册的Plug个数
private:
这个我怀疑是这样的,同时可能有多个识别的操作,那么就需要列表处理
PVMFSessionId iNextSessionId;
PVMFCommandId iNextCommandId;
// From OsclTimerObject
void Run();
// Vector to hold the active
sessions
Oscl_Vector<PVMFRecRegSessionInfo,
OsclMemAllocator> iRecognizerSessionList;
// Vector to hold the available
recognizer plug-in
Oscl_Vector<PVMFRecognizerPluginFactory*,
OsclMemAllocator> iRecognizerPluginFactoryList;
int32 FindPluginFactory(PVMFRecognizerPluginFactory&
aFactory);
PVMFRecognizerPluginInterface*
CreateRecognizerPlugin(PVMFRecognizerPluginFactory& aFactory);
void DestroyRecognizerPlugin(PVMFRecognizerPluginFactory&
aFactory, PVMFRecognizerPluginInterface* aPlugin);
// Vector to hold pending,
current, and to-cancel commands
OsclPriorityQueue<PVMFRecRegImplCommand,
OsclMemAllocator, Oscl_Vector<PVMFRecRegImplCommand, OsclMemAllocator>, PVMFRecRegImplCommandCompareLess> iRecognizerPendingCmdList;
Oscl_Vector<PVMFRecRegImplCommand,
OsclMemAllocator> iRecognizerCurrentCmd;
Oscl_Vector<PVMFRecRegImplCommand,
OsclMemAllocator> iRecognizerCmdToCancel;
PVMFCommandId AddRecRegCommand(PVMFSessionId
aSessionId, int32 aCmdType, OsclAny* aContextData = NULL, Oscl_Vector<PVMFRecRegImplCommandParamUnion, OsclMemAllocator>* aParamVector = NULL, bool aAPICommand = true);
void CompleteCurrentRecRegCommand(PVMFStatus
aStatus, const uint32 aCurrCmdIndex = 0, PVInterface* aExtInterface = NULL);
bool FindCommandByID(Oscl_Vector<PVMFRecRegImplCommand,
OsclMemAllocator> &aCmdQueue, const PVMFCommandId aCmdId);
// Command handling functions
void DoRecognize();
void CompleteRecognize(PVMFStatus
aStatus);
void DoCancelCommand(PVMFRecRegImplCommand&
aCmd);
PVMFDataStreamFactory* iDataStreamFactory;
PVMIDataStreamSyncInterface*
iDataStream;
PvmiDataStreamSession iDataStreamSessionID;
PvmiDataStreamCommandId
iRequestReadCapacityNotificationID;
PVMFStatus GetMaxRequiredSizeForRecognition(uint32&
aMaxSize);
PVMFStatus GetMinRequiredSizeForRecognition(uint32&
aMinSize);
PVMFStatus CheckForDataAvailability();
//logger
PVLogger* iLogger;
void DataStreamCommandCompleted(const
PVMFCmdResp& aResponse);
void DataStreamInformationalEvent(const
PVMFAsyncEvent& aEvent);
void DataStreamErrorEvent(const
PVMFAsyncEvent& aEvent);
bool oRecognizePending;
PVMFStatus iDataStreamCallBackStatus;
};
这个函数具体是如何实现的,有兴趣的可以继续看一下,我小看了一下,基本上就是一个for循环识别文件的格式。并且这种框架很容易扩展新的文件格式。不错不错。具体的文件plug怎么写,后面继续。