CommonCount1.java将预测产品的分词结果和每行都算相似度(分词重合的个数除以预测产品的分词个数)将一行产品的相似度最高值保存为该行的相似度值,然后根据相似度降序排序.
将大于相似度大于0.8的前100条搭配套餐的行号记录下来(输出文件:line_0.8)
将大于相似度大于0.6的前100条搭配套餐的行号记录下来。(输出文件:line_0.6)
将大于相似度大于0.6的前100条搭配套餐的行号记录下来。如果该行最高的相似度达不到0.6,那只取第一条(输出文件:line0.6_100)
将大于相似度大于0.6的前10条搭配套餐的行号记录下来。如果该行最高的相似度达不到0.6,那只取第一条(输出文件:line0.6_10)
将line0.6_10中补上line_0.8中出现不止10条的记录(输出文件:line0.6_10_0.8)
package test;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.util.Map;
public class CommonCount1 {
public static int count(String[] s1,String[] s2)
{
int count1=0;
for(int k=0;k<s2.length;k++)
{
for(int j=0;j<s1.length;j++)
{
if(s2[k].equals(s1[j]))
count1++;
}
}
return count1;
}
public static void appendMethod(String fileName, String content)
{
try
{
//打开一个写文件器,构造函数中的第二个参数true表示以追加形式写文件
FileWriter writer = new FileWriter(fileName, true);
writer.write(content);
writer.close();
} catch (IOException e)
{
e.printStackTrace();
}
}
public static double[] bubbleSort(double[] a,int[] b)
{
for (int i = 0; i < 21; i++)
{
for (int j = i + 1; j < a.length; j++)
{
if(a[i] < a[j])
{
double temp;
int temp1;
temp = a[j];
a[j] = a[i];
a[i] = temp;
temp1 = b[j];
b[j] = b[i];
b[i] = temp1;
}
}
}
return a;
}
public static void main(String args[])
{
int count =0;
double temp;
double a[]=new double[23105];
int b[]=new int[23105];
String fileName = "/public/home/dsj/Public/sundujing/fpgrowth/line1.txt";
String content;
FileInputStream fis;
InputStreamReader isr;
BufferedReader br = null;
try {
//fis = new FileInputStream("/public/home/dsj/Public/sundujing/fpgrowth/IdToItem.txt");
fis = new FileInputStream("/public/home/dsj/Public/sundujing/fpgrowth/di.txt");
isr = new InputStreamReader(fis, "UTF-8");
br = new BufferedReader(isr);
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
String[] strings = new String[1];
String str;
try {
while ((str = br.readLine()) != null)
{
for(int i=0;i<23105;i++)
{
a[i]=0;
b[i]=i+1;
}
count=0;
String[] str1 = str.split(" ");
// for(int k=0;k<str1.length;k++)
// {
//str1[k]
//读Toterms1文件,每行比较,选取相似度最高的100个,记录行号即可
FileInputStream fis1;
InputStreamReader isr1;
BufferedReader br1 = null;
try {
fis1 = new FileInputStream("/public/home/dsj/Public/sundujing/fpgrowth/ToTerms1.txt");
isr1 = new InputStreamReader(fis1, "UTF-8");
br1 = new BufferedReader(isr1);
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
String str2;
try {
while ((str2 = br1.readLine()) != null)
{
temp=0;
a[count]=0;
String[] str3 = str2.split(",");//将原先的一行所有分词,换成一个一个产品的分词
for(int i1=0;i1<str3.length;i1++)
{
String[] str4=str3[i1].split(" ");
temp=(double)count(str1,str4)/str1.length;
if(temp>(double)a[count]/str1.length)
{
a[count]=temp;
}
}
count++;
}
} catch (IOException e) {
e.printStackTrace();
}
//sort
bubbleSort(a,b);
// content=b[0-100];
for(int j=0;j<20;j++)
{
// if(a[0]<=0.4)
// {
//content=b[0]+" "+b[1]+" "+b[2]+" "+b[3]+" "+b[4]+" "+b[5]+" "+b[6]+" "+b[7]+" "+b[8];
content=b[j]+" ";
appendMethod(fileName, content);
// break;
// }
// if(a[j]>0.4)
// {
// content=b[j]+" ";
// appendMethod(fileName, content);
// }
//
}
appendMethod(fileName, "\n");
}
} catch (IOException e) {
e.printStackTrace();
}
}
}