weka文件生成脚

从findsound中选取了8个大类,并且对每个大类中的5个小类(一个为三个小类)进行了音频特征的提取,至多提取100个clip。提取的特征以mean和variance的形式分类保存在了文件之中,并且每一个clip对应有相应的文件,这些文件和相应的clip保存在相应的目录之中。

功能:根据给定的特征的类型,从这些文件中选择相应特征值,组成针对每一个小类的arff weka文件,这些weka文件保存到一个统一的地方,这里是保存在了list所在的目录。

list文件也是针对每一个小类的,并且按照原有目录的层次结构进行了组织。

function [ret,contents] = parseAfile(fpath,target)

state = exist(fpath,'file');
if state~=2
   sprintf('not exist %s \n',fpath)
   ret=0;
   contents=cell(1,1);
   return;
end
contents=cell(1,length(target));
fid=fopen(fpath);
count=1;
while ~feof(fid)
  featurename=fgetl(fid);
  featurename=strtrim(featurename);
  l=length(target);
  flag=0;
  for i=1:l
     name=target(i);
     if strcmp(name,featurename)
         flag=1;
         break;
     end
  end
  if flag==0
      continue;
  end
  featurevalue=fgetl(fid);
  featurevalue=strtrim(featurevalue);
  featurevalue=sprintf('%s ',featurevalue);
  index=strfind(featurevalue,' ');
  aline=cell(1,length(index));
  start=1;
  for j=1:length(index)
     avalue=featurevalue(1,start:index(j));
     start=index(j)+1;
     aline{j}=avalue; 
  end
  contents{count}=aline;
  count=count+1;
end
fclose(fid);
ret=1;


上面的代码根据fpath指定的路径,找到特征文件,并且根据target返回所需要的特征值。

alllist=cell(8,5);
alllist{1,1}='animal\bat.txt';
alllist{1,2}='animal\cat.txt';
alllist{1,3}='animal\cow.txt';
alllist{1,4}='animal\dog.txt';
alllist{1,5}='animal\lamb.txt';

alllist{2,1}='household\bubbles.txt';
alllist{2,2}='household\clock.txt';
alllist{2,3}='household\door.txt';
alllist{2,4}='household\phone.txt';
alllist{2,5}='household\toilet.txt';

alllist{3,1}='musical+instruments\drum+rimshot.txt';
alllist{3,2}='musical+instruments\flute.txt';
alllist{3,3}='musical+instruments\guitar.txt';
alllist{3,4}='musical+instruments\piano.txt';
alllist{3,5}='musical+instruments\trumpet.txt';

alllist{4,1}='nature\fire.txt';
alllist{4,2}='nature\ocean.txt';
alllist{4,3}='nature\rain.txt';
alllist{4,4}='nature\thunder.txt';
alllist{4,5}='nature\wind.txt';

alllist{5,1}='office\coins.txt';
alllist{5,2}='office\modem.txt';
alllist{5,3}='office\mouse+click.txt';
alllist{5,4}='office\paper.txt';
alllist{5,5}='office\typewriter.txt';

alllist{6,1}='people\applause.txt';
alllist{6,2}='people\baby.txt';
alllist{6,3}='people\cough.txt';
alllist{6,4}='people\cry.txt';
alllist{6,5}='people\heartbeat.txt';

alllist{7,1}='sports+and+recreation\bowling.txt';
alllist{7,2}='sports+and+recreation\camera.txt';
alllist{7,3}='sports+and+recreation\cards.txt';

alllist{8,1}='vehicles\collision.txt';
alllist{8,2}='vehicles\engine.txt';
alllist{8,3}='vehicles\helicopter.txt';
alllist{8,4}='vehicles\ship.txt';
alllist{8,5}='vehicles\train.txt';

[r,c]=size(alllist);
target=cell(1,10);
target{1,1}='fluctuation';
target{1,2}='attacktime';
target{1,3}='attackslope';
target{1,4}='centroid';
target{1,5}='brightness';
target{1,6}='skewness';
target{1,7}='kurtosis';
target{1,8}='spectentropy';
target{1,9}='flatness';
target{1,10}='irregularity';
target{1,11}='zerocross';
target{1,12}='mfcc';

allcategory=cell(1,38);
allcategory{1,1}='bat';
allcategory{1,2}='cat';
allcategory{1,3}='cow';
allcategory{1,4}='dog';
allcategory{1,5}='lamb';

allcategory{1,6}='bubbles';
allcategory{1,7}='clock';
allcategory{1,8}='door';
allcategory{1,9}='phone';
allcategory{1,10}='toilet';

allcategory{1,11}='drum+rimshot';
allcategory{1,12}='flute';
allcategory{1,13}='guitar';
allcategory{1,14}='piano';
allcategory{1,15}='trumpet';

allcategory{1,16}='fire';
allcategory{1,17}='ocean';
allcategory{1,18}='rain';
allcategory{1,19}='thunder';
allcategory{1,20}='wind';

allcategory{1,21}='coins';
allcategory{1,22}='modem';
allcategory{1,23}='mouse+click';
allcategory{1,24}='paper';
allcategory{1,25}='typewriter';

allcategory{1,26}='applause';
allcategory{1,27}='baby';
allcategory{1,28}='cough';
allcategory{1,29}='cry';
allcategory{1,30}='heartbeat';

allcategory{1,31}='bowling';
allcategory{1,32}='camera';
allcategory{1,33}='cards';

allcategory{1,34}='collision';
allcategory{1,35}='engine';
allcategory{1,36}='helicopter';
allcategory{1,37}='ship';
allcategory{1,38}='train';
hasattribute=0;
for i=1:r
    for j=1:c
        alist=alllist{i,j}
        if isempty(alist)
           continue; 
        end
        aalist=sprintf('list2\\%s',alist)
        fid = fopen(aalist);
        output=sprintf('list2\\%s.arff',alist);
        fidoutput=fopen(output,'w+')
        fprintf(fidoutput,'@relation ''cpu''\n');
        while ~feof(fid)
           line=fgetl(fid);
           line=sprintf('%s-mean.txt',line);
           indexs=strfind(line,'/');
           len=length(indexs);
           category=line(1,indexs(len-1)+1:indexs(len)-1)
           fullpath=sprintf('F:\\研究——音频事件检测\\findsound  result\\%s',line)
          
           [ret,contents]=parseAfile(fullpath,target);
           if ret==1
               if hasattribute==0
                   [retstat,order]=getattributename(fullpath,target);
                   for k=1:length(order)
                      name=order{k,1};
                      le=order{k,2};
                      for kk=1:le
                         %thisname=sprintf('%s%d\n',name,kk)
                         %fwrite(fidoutput,thisname);
                         fprintf(fidoutput,'@attribute %s%d numeric\n',name,kk);
                      end
                   end
                   fprintf(fidoutput,'@attribute class {')
                   for kk=1:length(allcategory)
                       if kk~=length(allcategory)
                           fprintf(fidoutput,'%s,',allcategory{kk});
                           continue;
                       end
                       fprintf(fidoutput,'%s',allcategory{kk});
                   end
                   fprintf(fidoutput,'}\n');
                   fprintf(fidoutput,'@data\n');
                   hasattribute=1;
               end
               
               tmp=contents;
               for k=1:length(contents)
                  avalue=contents{1,k};
                  for kk=1:length(avalue)
                     %fwrite(fidoutput,avalue{kk});
                     %fwrite(fidoutput,' ');
                     fprintf(fidoutput,'%s,',avalue{kk});
                  end
               end
               %fwrite(fidoutput,'\n');
               fprintf(fidoutput,'%s\n',category);
           end
        end
        hasattribute=0;
        fclose(fid);
        fclose(fidoutput);
    end
end

上面的代码,指定了所有的list的位置,所有的类别,所要提取的特征的类型,通过调用脚本一,得到这些特征,并且把这些特征输出为arff文件

function [ret,orderattribute] = getattributename(fpath,target)

state = exist(fpath,'file');
if state~=2
   sprintf('not exist %s \n',fpath)
   ret=0;
   orderattribute=cell(1,1);
   return;
end
orderattribute=cell(length(target),2);
fid=fopen(fpath);
count=1;
while ~feof(fid)
  featurename=fgetl(fid);
  featurename=strtrim(featurename);
  l=length(target);
  flag=0;
  for i=1:l
     name=target(i);
     if strcmp(name,featurename)
         flag=1;
         break;
     end
  end
  if flag==0
      continue;
  end
  featurevalue=fgetl(fid);
  featurevalue=strtrim(featurevalue);
  featurevalue=sprintf('%s ',featurevalue);
  index=strfind(featurevalue,' ');
  attrlen=length(index);
  orderattribute{count,1}=featurename;
  orderattribute{count,2}=attrlen;
  count=count+1;
end
fclose(fid);
ret=1;

getattributename的功能是返回所有的特征,并且按照实际的顺序,不应是按照target指定的顺序,可以作为arff文件中的attribute的输出

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值