import java.util.arraylist;import java.util.hashset;import java.util.iterator;import java.util.list;public class songlist { //歌手列表 private list<song> songlist; //当前页面最大分页数 private int maxpage = 2; //是否取得页面最大分页数 private boolean bool = false; //已添加的歌曲信息 private hashset<string> sondlisted = new hashset<string>(); public songlist(string singer){ this.songlist = spiderbysinger(singer); } public list<song> getsonglist(){ return songlist; } /** * 根据歌手名称去爬页面,取得歌曲信息 */ private list<song> spiderbysinger(string singer){ arraylist<song> sosolist = new arraylist<song>(); spider spider = new spider(); for (int i = 1; i < maxpage ;i++){ string content = spider.geturlcontentbyurl("http://music.soso.com/music.cgi?w=" + singer + "&pl=&co=&ch=s.m.res&sc=mus&sz=&clz=wma&pg="+i); setmaxpagebycontent(content); sosolist.addall(this.getsonglistbycontent(content)); } sondlisted.clear(); return sosolist; } /** * 根据内容设置当前页面最大分页数 */ private void setmaxpagebycontent(string content){ if (!bool){ string s = contentpattern.getstringbycontentpattern(content, "<script language=\"javascript\">splitresnum(.*?);</script>"); if (s != null && !s.equals("")){ s = replace.yyreplace(s, "(", ""); s = replace.yyreplace(s, ")", ""); int i = integer.parseint(s)/20 + 1; if (i > 50){ maxpage = 50; }else{ maxpage = i; } } bool = true; } } /** * 获得歌曲列表 */ private arraylist<song> getsonglistbycontent(string content){ arraylist<song> list = new arraylist<song>(); iterator<string> iterator = contentpattern.getlistbycontentpattern(content, "<form id=(.*?)<tr>").iterator(); while(iterator.hasnext()){ song song = getsongbycontent(iterator.next()); if (song.getname() == null){ }else{ list.add(song); } } return list; } /** * 根据页面内容提取歌曲信息 */ private song getsongbycontent(string content){ song song = new song(); //获得歌曲名称 string name = contentpattern.getstringbycontentpattern(content, "name=\"song\"\\s+value=\"(.*?)\">"); //判断是否已添加此歌曲 if(sondlisted.contains(name.trim())){ return song; }else{ sondlisted.add(name.trim()); //获得歌曲文件地址 string address = contentpattern.getstringbycontentpattern(content, "name=\"url\"\\s+value=\"(.*?)\">"); //获得歌手 string singer = contentpattern.getstringbycontentpattern(content, "name=\"singer\"\\s+value=\"(.*?)\">"); //获得专辑 string special = contentpattern.getstringbycontentpattern(content, "name=\"album\"\\s+value=\"(.*?)\">"); song.setaddress(address); song.setname(name); song.setspecial(special); song.setsinger(singer); return song; } }}
import java.util.hashset;import java.util.regex.matcher;import java.util.regex.pattern;public class contentpattern { /** * 获得列表根据原内容和正则表达式 */ public static hashset<string> getlistbycontentpattern(string content, string pattern){ hashset<string> list = new hashset<string>(); // 用正则表达式编译链接的匹配模式。 pattern p = pattern.compile(pattern, pattern.case_insensitive); matcher m = p.matcher(content); while(m.find()){ string s = m.group(1).trim(); if(!list.contains(s)) list.add(s); } return list; } /** * 内容,根据内容和正则表达式 */ public static string getstringbycontentpattern(string content, string pattern){ string s = ""; // 用正则表达式编译链接的匹配模式。 pattern p = pattern.compile(pattern, pattern.case_insensitive); matcher m = p.matcher(content); if(m.find()){ s = m.group(1).trim(); } return s; }}
public class song { //歌曲名称 private string name; //歌手名 private string singer; //专辑 private string special; //地址 private string address; public string getname() { return name; } public void setname(string name) { this.name = name; } public string getsinger() { return singer; } public void setsinger(string singer) { this.singer = singer; } public string getspecial() { return special; } public void setspecial(string special) { this.special = special; } public string getaddress() { return address; } public void setaddress(string address) { this.address = address; }}