import
java.io.File;
import
java.io.FileOutputStream;
import
java.io.InputStream;
import
java.io.OutputStream;
import
java.net.HttpURLConnection;
import
java.net.URL;
import
java.net.URLConnection;
import
java.text.SimpleDateFormat;
import
java.util.ArrayList;
import
java.util.Iterator;
import
java.util.List;
import
java.util.UUID;
import
java.util.regex.Matcher;
import
java.util.regex.Pattern;
/***
* java抓取网络图片
*
* @author ITWANG
*
*/
public
class
CatchImage
{
private
static
final
String ECODING =
"UTF-8"
;
private
static
final
String IMGURL_REG =
"<img.*src=(.*?)[^>]*?>"
;
private
static
final
String IMGSRC_REG =
"http:\"?(.*?)(\"|>|\\s+)"
;
private
static
final
String IMGDSRC_REG =
"[\"\'](http.+\\.(jpg|JPG|png|PNG|gif|GIF))[\"\']"
;
private
static
final
String[] picstuffix = {
"jpg"
,
"JPG"
,
"gif"
,
"GIF"
,
"png"
,
"PNG"
};
private
static
List<String> pList =
new
ArrayList<>();
public
static
void
main(String[] args)
throws
Exception
{
CatchImage cm =
new
CatchImage();
String HTML = cm.getHTML(URL);
List<String> imgUrl = cm.getImageUrl(HTML);
List<String> imgSrc = cm.getImageSrc(imgUrl);
cm.Download(imgSrc,
"E:\\Imagesave"
+saveDiff());
}
/***
* 获取HTML内容
*
* @param url
* @return
* @throws Exception
*/
private
String getHTML(String url)
throws
Exception
{
URL uri =
new
URL(url);
URLConnection connection = uri.openConnection();
InputStream in = connection.getInputStream();
byte
[] buf =
new
byte
[
1024
];
int
length =
0
;
StringBuffer sb =
new
StringBuffer();
while
((length = in.read(buf,
0
, buf.length)) >
0
)
{
sb.append(
new
String(buf, ECODING));
}
in.close();
return
sb.toString();
}
/***
* 获取ImageUrl地址
*
* @param HTML
* @return
*/
private
List<String> getImageUrl(String HTML)
{
Matcher matcher = Pattern.compile(IMGURL_REG).matcher(HTML);
List<String> listImgUrl =
new
ArrayList<String>();
while
(matcher.find())
{
listImgUrl.add(matcher.group());
}
return
listImgUrl;
}
/***
* 获取ImageSrc地址
*
* @param listImageUrl
* @return
*/
private
List<String> getImageSrc(List<String> listImageUrl)
{
List<String> listImgSrc =
new
ArrayList<String>();
for
(String image : listImageUrl)
{
Matcher matcher = Pattern.compile(IMGSRC_REG).matcher(image);
while
(matcher.find())
{
listImgSrc.add(matcher.group().substring(
0
, matcher.group().length() -
1
));
}
}
return
listImgSrc;
}
/**
* 获取html里面的图片
*
* @param html
* @return
*/
private
void
getImageSrc(String html)
{
Pattern p = Pattern.compile(IMGDSRC_REG);
Matcher m = p.matcher(html);
while
(m.find())
{
getpicsrc(m.group(
1
));
}
}
/**
* 截取字符串里面的图片
*
* @param src
* @return
*/
public
void
getpicsrc(String src)
{
if
(src.contains(
"http:"
))
{
String[] app = src.split(
"http:"
);
for
(
int
i =
0
; i < app.length; i++)
{
if
(!isBlank(app[i]))
{
for
(
int
j =
0
; j < picstuffix.length; j++)
{
if
(app[i].contains(
"."
+ picstuffix[j]))
{
int
inum = app[i].indexOf(picstuffix[j]);
String url =
"http:"
+ app[i].substring(
0
, inum) + picstuffix[j];
pList.add(url);
}
}
}
}
}
}
/**
* 去处重复元素
*
* @param result
* @return
*/
public
static
List<String> RemoveRepeated(List<String> result)
{
List<String> tmpArr =
new
ArrayList<String>();
for
(
int
i =
0
; i < result.size(); i++)
{
if
(!tmpArr.contains(result.get(i)))
{
tmpArr.add((String) result.get(i));
}
}
return
tmpArr;
}
/**
* 判断非空
*
* @param cs
* @return
*/
public
static
boolean
isBlank(CharSequence cs)
{
int
strLen;
if
(cs ==
null
|| (strLen = cs.length()) ==
0
)
{
return
true
;
}
for
(
int
i =
0
; i < strLen; i++)
{
if
(Character.isWhitespace(cs.charAt(i)) ==
false
)
{
return
false
;
}
}
return
true
;
}
/***
* 单线程下载图片
*
* @param listImgSrc
*/
private
void
Download(List<String> listImgSrc, String savedir)
{
for
(String url : listImgSrc)
{
try
{
String imageName = url.substring(url.lastIndexOf(
"/"
) +
1
, url.length());
URL uri =
new
URL(url);
InputStream in = uri.openStream();
FileOutputStream fo =
new
FileOutputStream(
new
File(savedir + imageName));
byte
[] buf =
new
byte
[
1024
];
int
length =
0
;
while
((length = in.read(buf,
0
, buf.length)) != -
1
)
{
fo.write(buf,
0
, length);
}
in.close();
fo.close();
System.out.println(
"*^_^*"
);
}
catch
(Exception e)
{
System.out.println(
"-_-!"
);
}
}
}
/**
* 多线程下载图片
*
* @param listImgSrc
* @param savedir
* @param tnum
*/
private
void
ThreadDownload(List<String> listImgSrc, String savedir,
int
tnum)
{
for
(
int
i =
0
; i < listImgSrc.size(); i += tnum)
{
new
DThread(savedir, tnum, listImgSrc, i).start();
}
}
/**
*
* 2014-4-3上午10:52:38 Describe: 多线程下载照片
*
* @author: ITWANG
*/
class
DThread
extends
Thread
{
private
String savedir =
null
;
private
int
tnum;
private
List<String> listImgSrc;
private
int
bunm;
public
DThread(String savedir,
int
tnum, List<String> listImgSrc,
int
bnum)
{
this
.savedir = savedir;
this
.tnum = tnum;
this
.listImgSrc = listImgSrc;
this
.bunm = bnum;
}
@Override
public
void
run()
{
for
(
int
i =
0
; i < tnum; i++)
{
try
{
String url = listImgSrc.get(bunm + i);
String sps = url.substring(url.lastIndexOf(
"."
), url.length());
String imageName = UUID.randomUUID().toString() + sps;
URL uri =
new
URL(url);
InputStream in = uri.openStream();
System.out.println(savedir + imageName);
FileOutputStream fo =
new
FileOutputStream(
new
File(savedir + imageName));
byte
[] buf =
new
byte
[
1024
];
int
length =
0
;
while
((length = in.read(buf,
0
, buf.length)) != -
1
)
{
fo.write(buf,
0
, length);
}
in.close();
fo.close();
System.out.println(
"*^_^*"
);
}
catch
(Exception e)
{
System.out.println(
"-_-!"
);
}
}
}
}
/**
* 多线程超时下载
*
* @param listImgSrc
* @param savedir
* @param tnum
* @param timeout
*/
private
void
TOThreadDownload(List<String> listImgSrc, String savedir,
int
tnum,
int
timeout)
{
for
(
int
i =
0
; i < listImgSrc.size(); i += tnum)
{
new
TODThread(savedir, tnum, listImgSrc, i, timeout).start();
}
}
/**
*
* 2014-4-3上午10:52:07 Describe: 超时方式下载照片线程
*
* @author: ITWANG
*/
class
TODThread
extends
Thread
{
private
String savedir =
null
;
private
int
tnum;
private
List<String> listImgSrc;
private
int
bunm;
private
int
timeout =
3000
;
public
TODThread(String savedir,
int
tnum, List<String> listImgSrc,
int
bnum,
int
timeout)
{
this
.savedir = savedir;
this
.tnum = tnum;
this
.listImgSrc = listImgSrc;
this
.bunm = bnum;
this
.timeout = timeout;
}
@Override
public
void
run()
{
for
(
int
i =
0
; i < tnum; i++)
{
String url = listImgSrc.get(bunm + i);
String sps = url.substring(url.lastIndexOf(
"."
), url.length());
String imageName = UUID.randomUUID().toString() + sps;
try
{
if
(getPic(url, savedir, imageName, timeout))
{
System.out.println(
"*^_^*"
);
}
else
{
System.out.println(
"-_-!"
);
}
}
catch
(Exception e)
{
System.out.println(
"下载异常"
);
}
}
}
}
/**
* GET方式下载照片
*
* @param purl
* @param folder
* @param filename
* @param timeout
* @return
* @throws Exception
*/
public
boolean
getPic(String purl, String folder, String filename,
int
timeout)
throws
Exception
{
URL url =
new
URL(purl);
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setConnectTimeout(timeout);
conn.setRequestMethod(
"GET"
);
conn.setDoOutput(
true
);
conn.setDoInput(
true
);
if
(conn.getResponseCode() ==
200
)
{
InputStream is = conn.getInputStream();
byte
[] bs =
new
byte
[
1024
];
int
len;
File sf =
new
File(folder);
if
(!sf.exists())
{
sf.mkdirs();
}
OutputStream os =
new
FileOutputStream(sf.getPath() +
"\\"
+ filename);
while
((len = is.read(bs)) != -
1
)
{
os.write(bs,
0
, len);
}
os.close();
is.close();
return
true
;
}
return
false
;
}
/**
* 时间文件夹
*
* @return
*/
public
static
String saveDiff()
{
SimpleDateFormat formate =
new
SimpleDateFormat(
"yyyy-MM-dd-HH-mm-ss"
);
return
"\\"
+ formate.format(System.currentTimeMillis()) +
"\\"
;
}
}