最近网易相册改革了,有很多的相册都改成了新的版本,这就苦了一些以前的网易相册获取工具(包括我用ajax写的一个);虽然自己写的这个程序是有点过时了,但是这是我用java写的第一个类似爬虫的东西(其实并不是这样的),以后在有时间和有兴趣时,我会研究新的网易相册,写出一个真正的爬虫出来;
相关说明,我把获得的相册列表文件放到了C盘根目录(可以修改代码37行),在等10行中为相册名赋值;现在这个工具对某些没有升级的相册还是有用的;本工具就是分析了"http://photo.163.com/js/albumsinfo.php?user=username&from=guest";这个文件
其中汉字处理还有问题
好的现在给出源代码吧:
- import java.io.*;
- import java.net.*;
- import java.util.*;
- import java.util.regex.*;
- public class socket {
- public static void main(String[] args) {
- String username = ""; //这里写上你想要获取相册的用户名
- String urlName = "http://photo.163.com/js/albumsinfo.php?user="
- + username + "&from=guest";
- String content = getContent(urlName);
- // 正则表达式处理
- String re = "gAlbumsIds//[([0-9]*)[^//d]*(//d+)[^,]*,[^,]*,(//d*),/"(.*?)/",/"(.*?)/"//]";
- List<AlbumsInfo> list = getContentListA(re, content);
- System.out.println("begin aaa!/r/n content's length is:"
- + content.length());
- for (AlbumsInfo album : list) {
- urlName = "http://photo.163.com/js/photosinfo.php?user=" + username
- + "&aid=" + album.getListId() + "&from=guest";
- content = getContent(urlName);
- printAlbum(album);
- re = "([^/"]+?)/"//];";
- List<String> l = getContentList(re, content);
- String fileContent = l.toString().replaceAll("[//[//]]", "")
- .replaceAll("//s*,//s*", "/r/n");
- fileContent += "/r/n/r/n===============相关信息=========================/r/n"
- + album.getNote();
- writeToFile("c://" + album.getTitle() + ".txt", fileContent); //文件放在C盘根目录
- }
- System.out.print("end aaa!/r/n");
- }
- public static void printAlbum(AlbumsInfo album) {
- System.out.print("/r/n========/r/nId: " + album.getId() + "/r/nListId: "
- + album.getListId() + "/r/nnum : " + album.getTitle()
- + "/r/ntitle : " + album.getTitle() + "/r/nNote : "
- + album.getNote());
- }
- public static void writeToFile(String fileName, String content) {
- try {
- FileWriter out = new FileWriter(fileName);
- out.write(content);
- out.close();
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
- public static List<String> getContentList(String re, String content) {
- Pattern p = Pattern.compile(re);
- Matcher m = p.matcher(content);
- List<String> list = new LinkedList<String>();
- while (m.find())
- list.add(m.group(1));
- return list;
- }
- public static List<AlbumsInfo> getContentListA(String re, String content) {
- List<AlbumsInfo> list = new LinkedList<AlbumsInfo>();
- Pattern p = Pattern.compile(re);
- Matcher m = p.matcher(content);
- while (m.find()) {
- AlbumsInfo temp = new AlbumsInfo();
- temp.setId(Integer.parseInt(m.group(1)));
- temp.setListId(m.group(2));
- temp.setNum(m.group(3));
- temp.setTitle(changeStr(m.group(4)));
- temp.setNote(m.group(5));
- list.add(temp);
- }
- return list;
- }
- public static String changeStr(String str) {
- char[] temp = str.toCharArray();
- return temp.toString();
- }
- public static String getContent(String urlName) {
- StringBuilder content = new StringBuilder();
- try {
- URL url = new URL(urlName);
- InputStream inStream = url.openStream();
- Scanner in = new Scanner(inStream);
- while (in.hasNext()) {
- content.append(in.next() + "/r/n");
- }
- } catch (IOException e) {
- e.printStackTrace();
- }
- return content.toString();
- }
- }
- public class AlbumsInfo
- {
- private int id=0;
- private String listId="";
- private String num="";
- private String title="";
- private String note="";
- public AlbumsInfo()
- {
- }
- public int getId() {
- return id;
- }
- public void setId(int id) {
- this.id = id;
- }
- public String getNum() {
- return num;
- }
- public void setNum(String num) {
- this.num = num;
- }
- public String getTitle() {
- return title;
- }
- public void setTitle(String title) {
- this.title = title;
- }
- public String getNote() {
- return note;
- }
- public void setNote(String note) {
- this.note = note;
- }
- public String getListId() {
- return listId;
- }
- public void setListId(String listId) {
- this.listId = listId;
- }
- }