kwic--Java统计单词个数并按照顺序输出
是一个统计文件中单词个数,并按照个数从大到小输出的。输入文件名OK 了
单词是按照首字母排序的,,,里面用到映射等,,,注意重写比较函数,因为我们是要按值排序,而不是一般的按照键排序,,,,我们要输出的由多到少的个数,,,有个数是重复的
由于我们要按照个数多少排序,所以记得重写比较函数,为啥不能让数字做键,因为键不能重复,所以这里单词是键,OK 。
1 str.split("\\s+");这里记录分隔符
2 正则表达式,
\\d表示 0-9 的数字,
\\s表示 空格,回车,换行等空白符,
\\w表示单词字符(数字字母下划线)
+号表示一个或多个的意思
代码:
1 package calnum;2
3 import java.io.BufferedReader;
4 import java.io.FileNotFoundException;
5 import java.io.FileReader;
6 import java.io.IOException;
7 import java.nio.Buffer;
8 import java.nio.ShortBuffer;
9 import java.util.ArrayList;
10 import java.util.Collections;
11 import java.util.Comparator;
12 import java.util.HashMap;
13 import java.util.Iterator;
14 import java.util.List;
15 import java.util.Map;
16 import java.util.Map.Entry;
17 import java.util.Scanner;
18 import java.util.TreeMap;
19
20 import javax.swing.Spring;
21 import javax.swing.text.html.parser.Entity;
22
23 public class calwordnum{
24
25 public String readfile() throws IOException {
26 String all = null;
27 // System.out.print("##########################");
28 Scanner cin = new Scanner(System.in);
29 System.out.print("plz input file name:");
30 String filename = cin.next();
31 FileReader read = new FileReader(filename);
32 BufferedReader re = new BufferedReader(read);
33 StringBuffer strbuf = new StringBuffer();
34
35 String ss;
36 while ((ss = re.readLine()) != null) {
37 strbuf.append(ss);
38 //读全文
39 }
40
41 return strbuf.toString();
42
43 }
44
45 //
46 public String[] input(String str) {
47 String[] arr = str.split("\\s+");
48
49 return arr;
50 }
51
52 public void putmap(List<String> ls) {
53 String temp, theword = null;
54 int flage = 0;
55 theword = ls.get(0);
56 Map<String, Integer> map = new HashMap<String, Integer>();
57 Iterator it = ls.iterator();
58 while (it.hasNext()) {
59 // System.out.println("sss"+it.next().toString());
60 temp = it.next().toString();
61
62 if (temp.equals(theword)) {
63 flage++;
64 continue;
65 } else {
66 map.put(theword, flage);
67 flage = 1;
68 theword = temp;
69
70 }
71 }
72
73 map.put(theword, flage);
74 //重写比较方法,treemap按值排序。用两个map 比较函数需要参数
75 ValueComparator bvc = new ValueComparator(map);
76 Map<String,Integer> map2 = new TreeMap<String,Integer>(bvc);
77 map2.putAll(map);
78 Iterator mapit = map2.entrySet().iterator();
79 while (mapit.hasNext()) {
80 Entry tr = (Entry) mapit.next();
81
82 String num = (String) tr.getKey();
83 int sa = (Integer) tr.getValue();
84 System.out.println("this word:"+num + " has:" + sa + "个");
85 }
86
87 }
88
89 // /
90 public static void main(String[] args) throws IOException {
91 // TODO Auto-generated method stub
92 List<String> ls = new ArrayList<String>();
93
94 calwordnum ca = new calwordnum();
95
96 String allword = ca.readfile();
97 String[] arr = ca.input(allword);
98 for (int i = 0; i < arr.length; i++) {
99
100 ls.add(arr[i].toLowerCase());
101 }
102
103 Collections.sort(ls);
104 display(ls);
105 ca.putmap(ls);
106
107 }
108
109 @Override
110 public boolean equals(Object obj) {
111 // TODO Auto-generated method stub
112 return super.equals(obj);
113 }
114
115 private static void display(List<String> ls) {
116 // TODO Auto-generated method stub
117
118 for (String s : ls)
119 System.out.println(s);
120 }
121
122 }
123 //
124 class ValueComparator implements Comparator<String> {
125
126 Map<String, Integer> base;
127 public ValueComparator(Map<String, Integer> base) {
128 this.base = base;
129 }
130
131
132 public int compare(String a, String b) {
133 if (base.get(a) >=base.get(b)) {
134 return -1; //注意 不要返回0 会删除重复
135 }
136
137 else {
138 return 1;
139 }
140 }
141 }
不截图了,运行了之后输入文件名就行,如果你的分隔符不是//s+能满足的,麻烦自行修改。出来的结果就是单词,单词个数,有考虑步骤的地方。望批评指正
以上是 kwic--Java统计单词个数并按照顺序输出 的全部内容, 来源链接: utcz.com/z/394495.html