import java.util.ArrayList;
import java.util.Arrays;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
public class BUC {
private String [][] dataSet;
private ArrayList<String> dimension;
private ArrayList<String> inputList;
private ArrayList<String> removeList;
private Integer minSup;
public BUC(int rank, int col,Integer minSup, String filename) {
this.dataSet = new String[rank][col];
this.dimension = new ArrayList<String>();
this.minSup = minSup;
this.inputList = new ArrayList<String>();
this.removeList = new ArrayList<String>();
this.readFile(filename);
this.getDimension();
this.getOutput(0);
}
//读取csv数据到dataSet里面
public void readFile(String filename){
File inFile = new File(filename); // 读取的CSV文件
String inString = "";
try {
BufferedReader reader = new BufferedReader(new FileReader(inFile));
inString = reader.readLine();
String [] tempRank = null;
for(int i=0;inString!=null;i++){
tempRank = inString.split(",");
for(int j=0;j<tempRank.length;j++){
this.dataSet[i][j] = tempRank[j];
}
inString=reader.readLine();
}
reader.close();
} catch (FileNotFoundException ex) {
System.out.println("没找到文件!");
} catch (IOException ex) {
System.out.println("读写文件出错!");
}
}
//获取数据有几个维度,即有几个a,b,c,d。。。
public void getDimension(){
for(int i=0;i<this.dataSet.length;i++){
for(int j=0;j<this.dataSet[i].length;j++){
if(check(String.valueOf(this.dataSet[i][j].charAt(0)))){
this.dimension.add(String.valueOf(this.dataSet[i][j].charAt(0)));
}
}
}
System.out.println("文件的含有的dimension有: "+this.dimension.toString());
}
//获取数据有几个维度,即有几个a,b,c,d。。。
public ArrayList<String> getDimension(ArrayList<String> a){
for(int i=0;i<this.dataSet.length;i++){
for(int j=0;j<this.dataSet[i].length;j++){
if(check(this.dataSet[i][j],a)){
a.add(this.dataSet[i][j]);
}
}
}
System.out.println("该行的含有的dimension有: "+a.toString());
return a;
}
//获取数据有几个维度,即有几个a,b,c,d。。。
public ArrayList<String> getDimension(ArrayList<String> a,Integer k){
if(k>this.dataSet[0].length){
return null;
}else {
for(int i=0;i<this.dataSet.length;i++){
for(int j=0;j<=this.dataSet[i].length-k;j++){
String tmp = "";
for(int m=j;m<j+k;m++){
tmp = tmp + this.dataSet[i][m];
}
if(check(tmp,a)){
a.add(tmp);
}
}
}
}
System.out.println("该行的含有的dimension有: "+a.toString());
System.out.println("要去掉的关系有 :"+this.removeList.toString());
for(int i=0;i<a.size();i++){
for(int j=0;j<this.removeList.size();j++){
if(a.get(i).contains(this.removeList.get(j))){
a.set(i,"");
}
}
}
ArrayList<String> endSet = new ArrayList<String>();
for(int i=0;i<a.size();i++){
if(a.get(i).equals("")){
continue;
}
endSet.add(a.get(i));
}
a = endSet;
System.out.println("该行的含有的dimension有: "+a.toString());
return a;
}
//查看是否重复,用于维度的工具函数
public boolean check(String index,ArrayList<String> t){
boolean flag = true;
for(int i=0;i<t.size();i++){
if(t.get(i).equals(index)){
flag = false;
break;
}
}
return flag;
}
//查看是否重复,用于维度的工具函数
public boolean check(String index){
boolean flag = true;
for(int i=0;i<this.dimension.size();i++){
if(String.valueOf(this.dimension.get(i).charAt(0)).equals(index)){
flag = false;
break;
}
}
return flag;
}
public void getProcess(int dim){
System.out.println("当前维度是: "+dim);
if(dim==1){
ArrayList<String> tempSet = new ArrayList<String>();
this.getDimension(tempSet);
Integer [] SetCount = new Integer[tempSet.size()];
for(int i=0;i<this.dataSet.length;i++){
for(int j=0;j<this.dataSet[i].length;j++){
for(int k=0;k<tempSet.size();k++){
if(tempSet.get(k).equals(this.dataSet[i][j])){
if(SetCount[k]==null){
SetCount[k]=1;
}else {
SetCount[k] = SetCount[k] + 1;
}
}
}
}
}
System.out.println("该行的dimension频数为 : "+Arrays.toString(SetCount));
for(int i=0;i<SetCount.length;i++){
if(SetCount[i]>=this.minSup){
this.inputList.add(tempSet.get(i)+"<"+SetCount[i]+">");
}else {
this.removeList.add(tempSet.get(i));
}
}
}else {
ArrayList<String> tempSet = new ArrayList<String>();
tempSet = this.getDimension(tempSet,dim);
this.removeList = new ArrayList<String>();
Integer [] SetCount = new Integer[tempSet.size()];
for(int i=0;i<this.dataSet.length;i++){
for(int j=0;j<=this.dataSet[i].length-dim;j++){
String tmp = "";
for(int m=j;m<j+dim;m++){
tmp = tmp + this.dataSet[i][m];
}
for(int k=0;k<tempSet.size();k++){
if(tempSet.get(k).equals(tmp)){
if(SetCount[k]==null){
SetCount[k]=1;
}else {
SetCount[k] = SetCount[k] + 1;
}
}
}
}
}
System.out.println("该行的dimension频数为 : "+Arrays.toString(SetCount));
for(int i=0;i<SetCount.length;i++){
if(SetCount[i]>=this.minSup){
this.inputList.add(tempSet.get(i)+"<"+SetCount[i]+">");
}else {
this.removeList.add(tempSet.get(i));
}
}
System.out.println("输出结果为:"+this.inputList.toString());
}
}
//工作函数
public void getOutput(int dim){
if(dim<this.dimension.size()){
dim = dim + 1;
this.getProcess(dim);
getOutput(dim);
}
}
public static void main(String[] args){
//此处需要手动输入行数和列数,案例是15行4列
BUC buc = new BUC(15,4,4,"C:\\Users\\14983\\Desktop\\BUC_Experiment\\src\\BUC数据集.csv");
}
}
BUC算法的java实现
最新推荐文章于 2021-03-12 20:04:57 发布