java实现找出两个文件中相同的单词(两种方法)
java实现找出两个文件中相同的单词,具体代码如下所示:
package com.zy.DesignPrinciples.singleresponsibility;
import javax.print.DocFlavor;
import java.io.BufferedReader;
import java.io.FileReader;
import java.util.HashSet;
/**
* @ClassName: ReaderComplete
* @Author: Tiger
* @Title: 找出两个文件中相同的单词
* @Datetime: 2020/8/19 19:49
* @Package: com.zy.DesignPrinciples.singleresponsibility
*/
public class ReaderComplete {
public static void main(String[] args) throws Exception {
String s = loadFile("F:\\.temp\\1.txt");
String s1 = loadFile("F:\\.temp\\2.txt");
String[] words1 = parseWordsFromString(s);
String[] words2 = parseWordsFromString(s1);
String[] words = findTheSameWords(words1, words2);
for (String word : words) {
System.out.println(word);
}
}
//加载文件内容,并将文本内容解析返回出去
public static String loadFile(String path) throws Exception {
FileReader reader = new FileReader(path);
//bufferedReader 一次只能读一行内容
BufferedReader br = new BufferedReader(reader);
String line = null;
StringBuilder sb = new StringBuilder();
while ((line = br.readLine()) != null) {
sb.append(line);
sb.append("\n");
}
br.close();
return sb.toString();
}
//将解析的内容变成数组
public static String[] parseWordsFromString(String string) {
String[] strings = string.split("[^a-zA-Z]+");
return strings;
}
//查询两文件的相同代码
public static String[] findTheSameWords(String[] s1,String[] s2){
HashSet<String> set = new HashSet<>();
for (int i = 0; i < s1.length; i++) {
for (int j = 0; j < s2.length; j++) {
if (s1[i].equals(s2[j])){
set.add(s1[i]);
}
}
}
return set.toArray(new String[] {});
}
}
第二种比较单词的方法:将string【】数组转换成集合,通过集合的retainAll()方法
两个集合取交集
public static String[] findTheSameWords2(String[] s1,String[] s2){
//将字符串数组转换成集合
List<String> list = new ArrayList(Arrays.asList(s1));
List<String> list2 = new ArrayList(Arrays.asList(s2));
//计算交集 并且把计算结果存入到list中而且list2并不会改变
list.retainAll(list2);
HashSet<String> set = new HashSet<>(list);
return set.toArray(new String[] {});
}
PS:java实现获取两个文本共同的词
代码块
public class ComWords {
/**
* @ClassName: ComWords
* @Description: TODO(获取两个文档的共同词)
* @author: Amy_Robot
* @date: 2017年3月24日 下午5:16:48
*/
public static String getFileContent(String path)
{
StringBuffer sb = new StringBuffer();
InputStreamReader isr = null;
BufferedReader bufferedReader = null;
String fileContent="";{
try {
String encoding = "utf-8";
File file = new File(path);
if (file.isFile() && file.exists()) { // 判断文件是否存在
isr = new InputStreamReader(new FileInputStream(file), encoding);// 考虑到编码格式
bufferedReader = new BufferedReader(isr);
String lineTxt = null;
while ((lineTxt = bufferedReader.readLine()) != null) {
//System.out.println(lineTxt);//输出文档内容
sb.append(lineTxt);
}
isr.close();
} else {
System.out.println("找不到指定的文件");
}
} catch (Exception e) {
System.out.println("读取文件内容出错");
e.printStackTrace();
} finally {
try {
if (isr != null) {
isr.close();
isr = null;
}
if (bufferedReader != null) {
bufferedReader.close();
bufferedReader = null;
}
} catch (Exception e) {
e.printStackTrace();
}
}
return sb.toString();}
}
public static void getComWords(String sourcefile1,String sourcefile2,String targetfile) throws IOException{
BufferedWriter F2Writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(new File(targetfile))));
List<String> list1= new ArrayList<String>();
List<String> list2= new ArrayList<String>();
List<String> list3= new ArrayList<String>();
String a[] =getFileContent(sourcefile1).split("\\s");
for (int j = 0; j < a.length; j++) {
list1.add(a[j]);
}
String b[] = getFileContent(sourcefile2).split("\\s");
for (int j = 0; j < b.length; j++) {
list2.add(b[j]);
}
for (String str2 : list2) {
if(list1.contains(str2)){
list3.add(str2);
F2Writer.write(str2+"\r\n");
}
}
for (String str : list3) {
System.out.println(str);
}
F2Writer.close();
}
public static void main(String[] args) throws IOException {
String target="e:\\1.txt";
String path1="D:\\qirui_naf.txt";
String path2="D:\\jianghuai_naf.txt";
getComWords(path1, path2, target);
}
}
总结
到此这篇关于java实现找出两个文件中相同的单词(两种方法)的文章就介绍到这了,更多相关java找出两个文件相同单词内容请搜索以前的文章或继续浏览下面的相关文章希望大家以后多多支持!
以上是 java实现找出两个文件中相同的单词(两种方法) 的全部内容, 来源链接: utcz.com/z/323125.html