ParseUtil.java

<span style="font-size:14px;">/**
 * A Utility class containing methods to simply perform parsing utilities such
 * as iterating through a preferred list of {@link Parser}s to obtain
 * {@link Parse} objects.
 */
public class ParseUtil {
  
  /* our log stream */
  public static final Logger LOG = LoggerFactory.getLogger(ParseUtil.class);
  private ParserFactory parserFactory;
  /** Parser timeout set to 30 sec by default. Set -1 to deactivate **/
  private int maxParseTime = 30;
  private ExecutorService executorService;
  
  /**
   * 
   * @param conf
   */
  public ParseUtil(Configuration conf) {
    this.parserFactory = new ParserFactory(conf);
    maxParseTime=conf.getInt("parser.timeout", 30);
    executorService = Executors.newCachedThreadPool(new ThreadFactoryBuilder()
      .setNameFormat("parse-%d").setDaemon(true).build());
  }
  
  /**
   * Performs a parse by iterating through a List of preferred {@link Parser}s
   * until a successful parse is performed and a {@link Parse} object is
   * returned. If the parse is unsuccessful, a message is logged to the
   * <code>WARNING</code> level, and an empty parse is returned.
   *
   * @param content The content to try and parse.
   * @return <key, {@link Parse}> pairs.
   * @throws ParseException If no suitable parser is found to perform the parse.
   */
  public ParseResult parse(Content content) throws ParseException {
    Parser[] parsers = null;
    
    try {
      parsers = this.parserFactory.getParsers(content.getContentType(), 
	         content.getUrl() != null ? content.getUrl():"");
    } catch (ParserNotFound e) {
      if (LOG.isWarnEnabled()) {
        LOG.warn("No suitable parser found when trying to parse content " + content.getUrl() +
               " of type " + content.getContentType());
      }
      throw new ParseException(e.getMessage());
    }
    
    ParseResult parseResult = null;
    for (int i=0; i<parsers.length; i++) {
      if (LOG.isDebugEnabled()) {
        LOG.debug("Parsing [" + content.getUrl() + "] with [" + parsers[i] + "]");
      }
      if (maxParseTime!=-1)
      	parseResult = runParser(parsers[i], content);
      else 
      	parseResult = parsers[i].getParse(content);

      if (parseResult != null && !parseResult.isEmpty())
        return parseResult;
    }
   
    if (LOG.isWarnEnabled()) { 
      LOG.warn("Unable to successfully parse content " + content.getUrl() +
               " of type " + content.getContentType());
    }
    return new ParseStatus(new ParseException("Unable to successfully parse content")).getEmptyParseResult(content.getUrl(), null);
  }
    
  /**
   * Method parses a {@link Content} object using the {@link Parser} specified
   * by the parameter <code>extId</code>, i.e., the Parser's extension ID.
   * If a suitable {@link Parser} is not found, then a <code>WARNING</code>
   * level message is logged, and a ParseException is thrown. If the parse is
   * uncessful for any other reason, then a <code>WARNING</code> level
   * message is logged, and a <code>ParseStatus.getEmptyParse()</code> is
   * returned.
   *
   * @param extId The extension implementation ID of the {@link Parser} to use
   *              to parse the specified content.
   * @param content The content to parse.
   *
   * @return <key, {@link Parse}> pairs if the parse is successful, otherwise,
   *         a single <key, <code>ParseStatus.getEmptyParse()</code>> pair.
   *
   * @throws ParseException If there is no suitable {@link Parser} found
   *                        to perform the parse.
   */
  public ParseResult parseByExtensionId(String extId, Content content)
  throws ParseException {
    Parser p = null;
    
    try {
      p = this.parserFactory.getParserById(extId);
    } catch (ParserNotFound e) {
      if (LOG.isWarnEnabled()) {
        LOG.warn("No suitable parser found when trying to parse content " + content.getUrl() +
            " of type " + content.getContentType());
      }
      throw new ParseException(e.getMessage());
    }
    
    ParseResult parseResult = null;
    if (maxParseTime!=-1)
    	parseResult = runParser(p, content);
    else 
    	parseResult = p.getParse(content);
    if (parseResult != null && !parseResult.isEmpty()) {
      return parseResult;
    } else {
      if (LOG.isWarnEnabled()) {
        LOG.warn("Unable to successfully parse content " + content.getUrl() +
            " of type " + content.getContentType());
      }  
      return new ParseStatus(new ParseException("Unable to successfully parse content")).getEmptyParseResult(content.getUrl(), null);
    }
  }

  private ParseResult runParser(Parser p, Content content) {
    ParseCallable pc = new ParseCallable(p, content);
    Future<ParseResult> task = executorService.submit(pc);
    ParseResult res = null;
    try {
      res = task.get(maxParseTime, TimeUnit.SECONDS);
    } catch (Exception e) {
      LOG.warn("Error parsing " + content.getUrl() + " with " + p, e);
      task.cancel(true);
    } finally {
      pc = null;
    }
    return res;
  }
  
}</span>

/Library/Java/JavaVirtualMachines/zulu-8.jdk/Contents/Home/bin/java -javaagent:/Applications/IntelliJ IDEA CE.app/Contents/lib/idea_rt.jar=63141:/Applications/IntelliJ IDEA CE.app/Contents/bin -Dfile.encoding=UTF-8 -classpath /Library/Java/JavaVirtualMachines/zulu-8.jdk/Contents/Home/jre/lib/cat.jar:/Library/Java/JavaVirtualMachines/zulu-8.jdk/Contents/Home/jre/lib/charsets.jar:/Library/Java/JavaVirtualMachines/zulu-8.jdk/Contents/Home/jre/lib/ext/cldrdata.jar:/Library/Java/JavaVirtualMachines/zulu-8.jdk/Contents/Home/jre/lib/ext/crs-agent.jar:/Library/Java/JavaVirtualMachines/zulu-8.jdk/Contents/Home/jre/lib/ext/dnsns.jar:/Library/Java/JavaVirtualMachines/zulu-8.jdk/Contents/Home/jre/lib/ext/jaccess.jar:/Library/Java/JavaVirtualMachines/zulu-8.jdk/Contents/Home/jre/lib/ext/localedata.jar:/Library/Java/JavaVirtualMachines/zulu-8.jdk/Contents/Home/jre/lib/ext/nashorn.jar:/Library/Java/JavaVirtualMachines/zulu-8.jdk/Contents/Home/jre/lib/ext/sunec.jar:/Library/Java/JavaVirtualMachines/zulu-8.jdk/Contents/Home/jre/lib/ext/sunjce_provider.jar:/Library/Java/JavaVirtualMachines/zulu-8.jdk/Contents/Home/jre/lib/ext/sunpkcs11.jar:/Library/Java/JavaVirtualMachines/zulu-8.jdk/Contents/Home/jre/lib/ext/zipfs.jar:/Library/Java/JavaVirtualMachines/zulu-8.jdk/Contents/Home/jre/lib/jce.jar:/Library/Java/JavaVirtualMachines/zulu-8.jdk/Contents/Home/jre/lib/jfr.jar:/Library/Java/JavaVirtualMachines/zulu-8.jdk/Contents/Home/jre/lib/jsse.jar:/Library/Java/JavaVirtualMachines/zulu-8.jdk/Contents/Home/jre/lib/management-agent.jar:/Library/Java/JavaVirtualMachines/zulu-8.jdk/Contents/Home/jre/lib/resources.jar:/Library/Java/JavaVirtualMachines/zulu-8.jdk/Contents/Home/jre/lib/rt.jar:/Users/shenglei/Library/Mobile Documents/com~apple~CloudDocs/💻Code/🍀/i/out/production/i Task1 Exception in thread "main" java.lang.IllegalArgumentException: Error decoding percent encoded characters at sun.net.www.ParseUtil.decode(ParseUtil.java:216) at sun.misc.URLClassPath$FileLoader.<init>(URLClassPath.java:1366) at sun.misc.URLClassPath$3.run(URLClassPath.java:574) at sun.misc.URLClassPath$3.run(URLClassPath.java:569) at java.security.AccessController.doPrivileged(Native Method) at sun.misc.URLClassPath.getLoader(URLClassPath.java:568) at sun.misc.URLClassPath.getLoader(URLClassPath.java:533) at sun.misc.URLClassPath.getNextLoader(URLClassPath.java:498) at sun.misc.URLClassPath.getResource(URLClassPath.java:252) at java.net.URLClassLoader$1.run(URLClassLoader.java:366) at java.net.URLClassLoader$1.run(URLClassLoader.java:363) at java.security.AccessController.doPrivileged(Native Method) at java.net.URLClassLoader.findClass(URLClassLoader.java:362) at java.lang.ClassLoader.loadClass(ClassLoader.java:419) at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:352) at java.lang.ClassLoader.loadClass(ClassLoader.java:352) at sun.instrument.InstrumentationImpl.loadClassAndStartAgent(InstrumentationImpl.java:304) at sun.instrument.InstrumentationImpl.loadClassAndCallPremain(InstrumentationImpl.java:401) FATAL ERROR in native method: processing of -javaagent failed 进程已结束,退出代码134 (interrupted by signal 6: SIGABRT)
06-06
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值