HttpClient 爬接口报 Input length = 1
调试时发现响应的header有条
Set-Cookie: security_session_verify=cad9721cc133dd9d4646bb8913a27e44; expires=��, 28- 6��-19 15:43:50 GMT; path=/; HttpOnly;
当时初始化的HttpClient编码为utf-8,
ConnectionConfig connCfg = ConnectionConfig.custom().setCharset(Charset.forName("utf-8")).build();
HttpClientBuilder clientBuilder = HttpClients.custom()
.setConnectionManager(connManager)
.setDefaultRequestConfig(defaultRequestConfig)
.setDefaultConnectionConfig(connCfg);
包含不合法字符导致读头的时候报java.nio.charset.MalformedInputException: Input length = 1
最后尝试下改换gbk,gb2312,终于不报length=1了.
本以为解决了,结果调试发现cookie 解析会报Invalid cookie header导致丢弃,原来解决乱码后expires=四, 27- 6月-19 20:57:11 GMT,这其实也不是正确的expires格式.
这个时候就得重写cookieSpec了,贴上代码供参考
public class MyCookieSpec extends DefaultCookieSpec {
public final static String EXPIRESCOOKIESPEC = "myCookieSpec";
@Override
public List parse(Header header, CookieOrigin cookieOrigin)
throws MalformedCookieException
{
String value = header.getValue();
String prefix = "expires=";
if (value.contains(prefix))
{
String expires = value.substring(value.indexOf(prefix) + prefix.length());
expires = expires.substring(0, expires.indexOf(";"));
int longer = expires.length();
//去掉expires
/*
if(("?".contains(expires)||isMessy(expires))&&(longer==25||longer==27)){
//四, 27- 6月-19 20:57:11 GMT
String date =
DateUtils.formatDate(new Date(System.currentTimeMillis()+3*24*3600*1000),"EEE, dd-MMM-yy HH:mm:ss z");
value = value.replaceAll(prefix + ".{"+longer+"};",prefix + date + ";");
}
*/
value = value.replaceAll(prefix + ".{"+longer+"};","");
}
header =new BasicHeader(header.getName(), value);
return super.parse(header, cookieOrigin);
}
HttpClientContext httpContext = HttpClientContext.create();
Registry<CookieSpecProvider> registry = RegistryBuilder.<CookieSpecProvider>create()
.register(CookieSpecs.DEFAULT, new DefaultCookieSpecProvider())
.register(MyCookieSpec.EXPIRESCOOKIESPEC, context -> new MyCookieSpec())
.build();
httpContext .setCookieSpecRegistry(registry);