php英文单词统计器

本文实例为大家分享了英文单词统计器php 实现,供大家参考,具体内容如下

程序开始运行, 按"浏览"钮选择一个英文文档, 再按"统计 Statistics"钮, 即可得到按字母顺序列出的所有单词,及其出现的次数

用于测试的数据文档: data.txt

驱动程序:word.php

output.php 和 StringTokenizer.php 是 要求在同一个文件夹中的程序

1. words_statistics_PHP.png   

2. word.php

<html>

<style>

td{

background-color:#CF6;

width:100px;

margin:5px;

}

</style>

<body>

<?php

/**

* 程序开始运行, 按"浏览"钮选择一个英文文档, 再按"统计"钮,

* 即可得到按字母顺序列出的所有单词,及其出现的次数

*

* 作者: 许同春 author Tongchun Xu

* @开源中国 Open Source, Chna communiity

* 完成日期:2016年6月10日 completion date: 10 June, 2016

*/

require("StringTokenizer.php");

require("output.php");

if($_POST['submit']){

if ($_FILES["file"]["error"] > 0)

echo "Error: " . $_FILES["file"]["error"] . "<br />";

else {

$myfile = fopen($_FILES["file"]["tmp_name"], "r") or die("Unable to open file!");

$str = fread($myfile,filesize($_FILES["file"]["tmp_name"]));

$delim = "?\\,. /:!\"()\t\n\r\f%";

$st = new StringTokenizer($str, $delim);

echo '找到字符串: '.$st->countTokens();

$list=new LinkedList();

while ($st->hasMoreTokens()) {

$list->orderInsert($st->nextToken());

}

$list->words_count();

$list->traversal();

fclose($myfile);

}

}

?>

<h2>英文文档单词统计 Statistics on English words </h2>

<p>程序开始运行, 按"浏览"钮选择一个英文文档, 再按"统计 Statistics"钮,

即可得到按字母顺序列出的所有单词,及其出现的次数 </p>

<form action="word.php" method="post"

enctype="multipart/form-data">

<label for="file">英文文档名 File Name:</label>

<input type="file" name="file" id="file" />

<input type="submit" name="submit" value="统计 Statistics" />

</form>

</body>

</html>

3. output.php

<meta charset="utf-8" />

<?

/**

* The class LinkedList allows an application to store strings in

* alphabetical order by calling orderInsert().

* 此处定义的 LinkedList 类,可以调用它的 方法 orderInsert(),来以字母

* 大小的顺序储存 英文字符串。

* 同时记录 英文单词出现的次数

* 作者: 许同春 author Tongchun Xu

* @开源中国 Open Source, China communiity

* 完成日期:2016年6月10日 completion date: 10 June, 2016

*/

class Node{

public $data;

public $frequency;

public $next;

function __construct($data, $next = null, $frequency = 1){

$this->data = $data; //英文字符串

$this->next = $next; //指向后继结点的指针

$this->frequency=$frequency; //英文字符串出现的次数

}

}

class LinkedList{

private $head; //单链表的头结点,不存储数据

function __construct(){//单链表的构造方法

//头结点的数据为"傀儡", 不代表 任何数据

$this->head = new Node("dummy 傀儡");

$this->first = null;

}

function isEmpty(){

return ($this->head->next == null);

}

/* orderInsert($data) 方法,

* 按给定字符串 $data 的大小, 将其安插到适当的位置,

* 以保证单链表中字符串的存储,始终是有序的。

*/

function orderInsert($data){

$p = new Node($data);

if($this->isEmpty()){

$this->head->next = $p;

}

else {

$node= $this->find($data);

if(!$node){

$q = $this->head;

while($q->next != NULL && strcmp($data, $q->next->data)> 0 ){

$q = $q->next;

}

$p->next = $q->next;

$q->next = $p;

}else

$node->frequency++;

}

}

function insertLast($data){//将字符串插到单链表的尾部

$p = new Node($data);

if($this->isEmpty()){

$this->head->next = $p;

}

else{

$q = $this->head->next;

while($q->next != NULL)

$q = $q->next;

$q->next = $p;

}

}

function find($value){//查询是否有给定的字符串

$q = $this->head->next;

while($q->next != null){

if(strcmp($q->data,$value)==0){

break;

}

$q = $q->next;

}

if ($q->data == $value)

return $q;

else

return null;

}

function traversal(){//遍历单链表

if(!$this->isEmpty()){

$p=$this->head->next;

echo "输出结果:<table><tr>";

echo "<td>".$p->data."<br>出现次数:".$p->frequency."</td>";

$n=1;

while($p->next != null){

$p=$p->next;

echo "<td>".$p->data."<br>出现次数:".$p->frequency."</td>";

$n++;

if ($n%11==0) echo "</tr><tr>";

}

echo "</tr></table>";

}else

echo "链表为空!";

}

function words_count(){

if($this->isEmpty())

echo "<br>没有储存字符串 <br>";

else{

$counter=0;

$p=$this->head->next;

while($p->next != null){

$p=$p->next;

$counter++;

};

echo "***共有单词 ".$counter." 个***";

}

}}

?>

4. StringTokenizer.php

<?php

/**

* The string tokenizer class allows an application to break a string into tokens.

*

* @author Azeem Michael

* @example The following is one example of the use of the tokenizer. The code:

* <code>

* <?php

* $str = "this is:@\t\n a test!";

* $delim = " !@:'\t\n\0"; // remove these chars

* $st = new StringTokenizer($str, $delim);

* echo 'Total tokens: '.$st->countTokens().'<br/>';

* while ($st->hasMoreTokens()) {

* echo $st->nextToken() . '<br/>';

* }

* prints the following output:

* Total tokens: 4

* this

* is

* a

* test

* ?>

* </code>

*/

class StringTokenizer {

/** @var string

*/

private $string;

/** @var string

*/

private $token;

/** @var string

*/

private $delim;

/**

* Constructs a string tokenizer for the specified string.

* @param string $str String to tokenize

* @param string $delim The set of delimiters (the characters that separate tokens)

* specified at creation time, default to " \n\r\t\0"

*/

public function __construct($str, $delim=" \n\r\t\0") {

$this->string = $str;

$this->delim = $delim;

$this->token = strtok($str, $delim);

}

/**

* Destructor to prevent memory leaks

*/

public function __destruct() {

unset($this);

}

/**

* Calculates the number of times that this tokenizer's nextToken method can

* be called before it generates an exception

* @return int - number of tokens

*/

public function countTokens() {

$counter = 0;

while($this->hasMoreTokens()) {

$counter++;

$this->nextToken();

}

$this->token = strtok($this->string, $this->delim);

return $counter;

}

/**

* Tests if there are more tokens available from this tokenizer's string. It

* does not move the internal pointer in any way. To move the internal pointer

* to the next element call nextToken()

* @return boolean - true if has more tokens, false otherwise

*/

public function hasMoreTokens() {

return ($this->token !== false);

}

/**

* Returns the next token from this string tokenizer and advances the internal

* pointer by one.

* @return string - next element in the tokenized string

*/

public function nextToken() {

$hold = $this->token; //hold current pointer value

$this->token = strtok($this->delim); //increment pointer

return $hold; //return current pointer value

}

}

?>

以上是 php英文单词统计器 的全部内容, 来源链接: utcz.com/z/354875.html

回到顶部