Boyer-Moore算法Java实现

本文介绍Boyer-Moore算法的两种Java实现方式,包括预处理阶段和搜索阶段的详细步骤,如last函数、match函数及suffix数组的计算。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

关于算法的说明可以看

http://baike.baidu.com/link?url=-w9_47V9JJlgQLlEoFyXvxqBc97Zd281elktWQeTr0I-GgV8M3RK_oSkvSfwTg2DWWjunGpTgwpbXfeccOymS2w7Xh_aEzga-o2QcJHX1s_HcPi7zTgGdCrrJ6YW58eh

百度百科的说明。

第一种,根据百度百科C语言

import java.util.Scanner;

public class BoyerMoore2 {
  
  public static final int ALPHABET_SIZE = Character.MAX_VALUE + 1;
    
  private String text;
  private String pattern;
  
  private int[] last;
  private int[] match;
  private int[] suffix;
  
  public BoyerMoore2(String pattern, String text) {
    this.text = text;
    this.pattern = pattern;
    last = new int[ALPHABET_SIZE];
    match = new int[pattern.length()];
    suffix = new int[pattern.length()];
  }
  
  
  public int match() {
    // Preprocessing

	computeLast();
    computeMatch();
    
    // Searching
    int i=pattern.length()-1;
    int j = 0;    
    while (j<=text.length()-pattern.length()) {
      for(i=pattern.length()-1;i>=0&&pattern.charAt(i) == text.charAt(i+j);i--);
      if(i<0){
         return j;
      }else{
         j+=Math.max(last[text.charAt(j+i)]-pattern.length()+i+1, match[i]);
      }
    }
    return -1;    
  }
  

  private void computeLast() {
    for (int k = 0; k < last.length; k++) { 
      last[k] = pattern.length();
    }
    for (int j = 0; j <pattern.length()-1; j++) {
        last[pattern.charAt(j)] = pattern.length()-1-j;
    }
  }
  
  
  private void computeMatch() {
    /* Phase 1 */
    for (int j = 0; j < match.length; j++) { 
      match[j] = match.length;
    } //O(m) 
    
    computeSuffix(); //O(m)
        
    /* Phase 2 */
    for(int i=suffix.length-1;i>=0;i--){
    	if(i+1==suffix[i]){
    		for(int j=0;j<suffix.length-1-i;j++)
    		{
    			if(suffix.length==match[j]){
    				match[j]=suffix.length-1-i;
    			}
    		}
    	}
    }
   
  
    /* Phase 3 */
    for(int i=0;i<=suffix.length-2;i++){
    	match[suffix.length-1-suffix[i]]=suffix.length-1-i;
    }
  }
  

  private void computeSuffix() {        
	  int f=suffix.length-2;
	  suffix[suffix.length-1]=suffix.length;
	  int g=suffix.length-1;
	  for(int i=suffix.length-2;i>=0;--i){
		  if(i>g&&suffix[i+suffix.length-1-f]<i-g)
		  {
			  suffix[i]=suffix[i+suffix.length-1-f];
		  }
		  else
		  {
			  if(i<g)
			  {
				  g=i;
			  }
			  f=i;
			  while(g>=0&&pattern.charAt(g)==pattern.charAt(g+pattern.length()-1-f)){
				  --g;
			  }
			  suffix[i]=f-g;
		  }
	  }
  }
  
  public static void main(String[]args){
	  Scanner cin=new Scanner(System.in);
	  String str=cin.nextLine();
	  String pattern=cin.nextLine();
	  System.out.println(new BoyerMoore2(pattern,str).match());
  }
  
}
第二种,来自网上

import java.util.Scanner;

/**
 * Implementation of the Boyer-Moore Algorithm for pattern matching.
 * @author V.Boutchkova
 */
public class BoyerMoore {
  
  public static final int ALPHABET_SIZE = Character.MAX_VALUE + 1;
    
  private String text;
  private String pattern;
  
  private int[] last;
  private int[] match;
  private int[] suffix;
  
  public BoyerMoore(String pattern, String text) {
    this.text = text;
    this.pattern = pattern;
    last = new int[ALPHABET_SIZE];
    match = new int[pattern.length()];
    suffix = new int[pattern.length()];
  }
  
  /**
   * Searches the pattern in the text.
   * Returns the position of the first occurrence, if found and -1 otherwise.
   */  
  public int match() {
    // Preprocessing
    computeLast();
    computeMatch();
    
    // Searching
    int i = pattern.length() - 1;
    int j = pattern.length() - 1;    
    while (i < text.length()) {
      if (pattern.charAt(j) == text.charAt(i)) {
        if (j == 0) { 
          //the left-most match is found
          return i;
        }
        j--;
        i--;
      } else { //a difference
          i += pattern.length() - j - 1 + Math.max(j - last[text.charAt(i)], match[j]);
          j = pattern.length() - 1;
      }
    }
    return -1;    
  }
  
  /**
   * Computes the function <i>last</i> and stores its values in the array <code>last</code>.
   * The function is defined as follows:
   * <pre>
   * last(Char ch) = the index of the right-most occurrence of the character ch
   *                                                           in the pattern; 
   *                 -1 if ch does not occur in the pattern.
   * </pre>
   * The running time is O(pattern.length() + |Alphabet|).
   */
  private void computeLast() {
    for (int k = 0; k < last.length; k++) { 
      last[k] = -1;
    }
    for (int j = pattern.length()-1; j >= 0; j--) {
      if (last[pattern.charAt(j)] < 0) {
        last[pattern.charAt(j)] = j;
      }
    }
  }
  
  /**
   * Computes the function <i>match</i> and stores its values in the array <code>match</code>.
   * The function is defined as follows:
   * <pre>
   * match(j) = min{ s | 0 < s <= j && p[j-s]!=p[j]
   *                            && p[j-s+1]..p[m-s-1] is suffix of p[j+1]..p[m-1] }, 
   *                                                         if such s exists, else
   *            min{ s | j+1 <= s <= m 
   *                            && p[0]..p[m-s-1] is suffix of p[j+1]..p[m-1] }, 
   *                                                         if such s exists,
   *            m, otherwise,
   * where m is the pattern's length and p is the pattern.
   * </pre>
   * The running time is O(pattern.length()).
   */
  private void computeMatch() {
    /* Phase 1 */
    for (int j = 0; j < match.length; j++) { 
      match[j] = match.length;
    } //O(m) 
    
    computeSuffix(); //O(m)
        
    /* Phase 2 */
    //Uses an auxiliary array, backwards version of the KMP failure function.
    //suffix[i] = the smallest j > i s.t. p[j..m-1] is a prefix of p[i..m-1],
    //if there is no such j, suffix[i] = m
    
    //Compute the smallest shift s, such that 0 < s <= j and
    //p[j-s]!=p[j] and p[j-s+1..m-s-1] is suffix of p[j+1..m-1] or j == m-1}, 
    //                                                         if such s exists,
    for (int i = 0; i < match.length - 1; i++) {
      int j = suffix[i + 1] - 1; // suffix[i+1] <= suffix[i] + 1
      if (suffix[i] > j) { // therefore pattern[i] != pattern[j]
        match[j] = j - i;
      } else {// j == suffix[i]
        match[j] = Math.min(j - i + match[i], match[j]);
      } 
    } //End of Phase 2
  
    /* Phase 3 */
    //Uses the suffix array to compute each shift s such that
    //p[0..m-s-1] is a suffix of p[j+1..m-1] with j < s < m
    //and stores the minimum of this shift and the previously computed one.
    if (suffix[0] < pattern.length()) {
      for (int j = suffix[0] - 1; j >= 0; j--) {
        if (suffix[0] < match[j]) { match[j] = suffix[0]; }
      }
      int j = suffix[0];
      for (int k = suffix[j]; k < pattern.length(); k = suffix[k]) {
        while (j < k) {
          if (match[j] > k) match[j] = k;
          j++;
        }       
      }
    }//endif
  }
  
  /**
   * Computes the values of <code>suffix</code>, which is an auxiliary array, 
   * backwards version of the KMP failure function.
   * <br>
   * suffix[i] = the smallest j > i s.t. p[j..m-1] is a prefix of p[i..m-1],
   * if there is no such j, suffix[i] = m, i.e. <br>
   * p[suffix[i]..m-1] is the longest prefix of p[i..m-1], if suffix[i] < m.
   * <br>
   * The running time for computing the <code>suffix</code> is O(m).
   */
  private void computeSuffix() {        
    suffix[suffix.length-1] = suffix.length;            
    int j = suffix.length - 1;
    //suffix[i] = m - the length of the longest prefix of p[i..m-1]
    for (int i = suffix.length - 2; i >= 0; i--) { 
      while (j < suffix.length - 1 && pattern.charAt(j) != pattern.charAt(i)) {
        j = suffix[j + 1] - 1; 
      }
      if (pattern.charAt(j) == pattern.charAt(i)) { j--; }
      suffix[i] = j + 1;
    }
    
  }
  
  public static void main(String[]args){
	  Scanner cin=new Scanner(System.in);
	  String str=cin.nextLine();
	  String pattern=cin.nextLine();
	  System.out.println(new BoyerMoore(pattern,str).match());
  }
  
}




评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值