kwic--Java统计单词个数并按照顺序输出

java

 

是一个统计文件中单词个数,并按照个数从大到小输出的。输入文件名OK 了

单词是按照首字母排序的,,,里面用到映射等,,,注意重写比较函数,因为我们是要按值排序,而不是一般的按照键排序,,,,我们要输出的由多到少的个数,,,有个数是重复的

由于我们要按照个数多少排序,所以记得重写比较函数,为啥不能让数字做键,因为键不能重复,所以这里单词是键,OK 。

 1 str.split("\\s+");这里记录分隔符

 2 正则表达式,
\\d表示 0-9 的数字,
\\s表示 空格,回车,换行等空白符,
\\w表示单词字符(数字字母下划线)
+号表示一个或多个的意思

代码:

  1 package calnum;

2

3 import java.io.BufferedReader;

4 import java.io.FileNotFoundException;

5 import java.io.FileReader;

6 import java.io.IOException;

7 import java.nio.Buffer;

8 import java.nio.ShortBuffer;

9 import java.util.ArrayList;

10 import java.util.Collections;

11 import java.util.Comparator;

12 import java.util.HashMap;

13 import java.util.Iterator;

14 import java.util.List;

15 import java.util.Map;

16 import java.util.Map.Entry;

17 import java.util.Scanner;

18 import java.util.TreeMap;

19

20 import javax.swing.Spring;

21 import javax.swing.text.html.parser.Entity;

22

23 public class calwordnum{

24

25 public String readfile() throws IOException {

26 String all = null;

27 // System.out.print("##########################");

28 Scanner cin = new Scanner(System.in);

29 System.out.print("plz input file name:");

30 String filename = cin.next();

31 FileReader read = new FileReader(filename);

32 BufferedReader re = new BufferedReader(read);

33 StringBuffer strbuf = new StringBuffer();

34

35 String ss;

36 while ((ss = re.readLine()) != null) {

37 strbuf.append(ss);

38 //读全文

39 }

40

41 return strbuf.toString();

42

43 }

44

45 //

46 public String[] input(String str) {

47 String[] arr = str.split("\\s+");

48

49 return arr;

50 }

51

52 public void putmap(List<String> ls) {

53 String temp, theword = null;

54 int flage = 0;

55 theword = ls.get(0);

56 Map<String, Integer> map = new HashMap<String, Integer>();

57 Iterator it = ls.iterator();

58 while (it.hasNext()) {

59 // System.out.println("sss"+it.next().toString());

60 temp = it.next().toString();

61

62 if (temp.equals(theword)) {

63 flage++;

64 continue;

65 } else {

66 map.put(theword, flage);

67 flage = 1;

68 theword = temp;

69

70 }

71 }

72

73 map.put(theword, flage);

74 //重写比较方法,treemap按值排序。用两个map 比较函数需要参数

75 ValueComparator bvc = new ValueComparator(map);

76 Map<String,Integer> map2 = new TreeMap<String,Integer>(bvc);

77 map2.putAll(map);

78 Iterator mapit = map2.entrySet().iterator();

79 while (mapit.hasNext()) {

80 Entry tr = (Entry) mapit.next();

81

82 String num = (String) tr.getKey();

83 int sa = (Integer) tr.getValue();

84 System.out.println("this word:"+num + " has:" + sa + "个");

85 }

86

87 }

88

89 // /

90 public static void main(String[] args) throws IOException {

91 // TODO Auto-generated method stub

92 List<String> ls = new ArrayList<String>();

93

94 calwordnum ca = new calwordnum();

95

96 String allword = ca.readfile();

97 String[] arr = ca.input(allword);

98 for (int i = 0; i < arr.length; i++) {

99

100 ls.add(arr[i].toLowerCase());

101 }

102

103 Collections.sort(ls);

104 display(ls);

105 ca.putmap(ls);

106

107 }

108

109 @Override

110 public boolean equals(Object obj) {

111 // TODO Auto-generated method stub

112 return super.equals(obj);

113 }

114

115 private static void display(List<String> ls) {

116 // TODO Auto-generated method stub

117

118 for (String s : ls)

119 System.out.println(s);

120 }

121

122 }

123 //

124 class ValueComparator implements Comparator<String> {

125

126 Map<String, Integer> base;

127 public ValueComparator(Map<String, Integer> base) {

128 this.base = base;

129 }

130

131

132 public int compare(String a, String b) {

133 if (base.get(a) >=base.get(b)) {

134 return -1; //注意 不要返回0 会删除重复

135 }

136

137 else {

138 return 1;

139 }

140 }

141 }

不截图了,运行了之后输入文件名就行,如果你的分隔符不是//s+能满足的,麻烦自行修改。出来的结果就是单词,单词个数,有考虑步骤的地方。望批评指正

以上是 kwic--Java统计单词个数并按照顺序输出 的全部内容, 来源链接: utcz.com/z/394495.html

回到顶部