【Java爬虫】爬取南通大学教务系统成绩计算绩点

java

  以前写过一个python版的,但是想做一个jsp网页版的,就又用Java有写了一下。

  具体地址的分析过程在这里,这里简单说一下HttpCliet的Get,Post方法的使用

           1.Get请求方法

//创建一个浏览器客户端

CloseableHttpClient httpClient = HttpClients.createDefault();

//要Get的地址

String url1="http://www.baidu.com";

//创建一个Get请求

HttpGet baidu=new HttpGet(url1);

//用上面创建的浏览器客户端执行该请求

CloseableHttpResponse res=httpClient.execute(baidu);

//用响应创建一个http实体并获得输入流

HttpEntity he=res.getEntity();

InputStream in=he.getContent();

//将获得的流写到本地磁盘

FileOutputStream out=new FileOutputStream("baidu.html'");

byte[] buffer=new byte[1024];

int count=-1;

while((count=in.read(buffer))!=-1)

{

out.write(buffer, 0, count);

}

in.close();

out.close();


   2.Post请求方法

CloseableHttpClient httpClient = HttpClients.createDefault();

String url="http://××××.××××.com?#";

//要提交的参数username,password

List<NameValuePair> list = new ArrayList<NameValuePair>();

list.add(new BasicNameValuePair("Username","Name"));

list.add(new BasicNameValuePair("Password","××××××"));

//转换编码

UrlEncodedFormEntity entity = new UrlEncodedFormEntity(list,"utf-8");

//创建Post请求

HttpPost httpPost=new HttpPost(url);

//为请求设置参数

httpPost.setEntity(entity);

//获得响应,输入流并写入本地磁盘

CloseableHttpResponse res=httpClient.execute(httpPost);

HttpEntity he=res.getEntity();

InputStream in=he.getContent();

FileOutputStream out=new FileOutputStream("××××.×××");

byte[] buffer=new byte[1024];

int count=-1;

while((count=in.read(buffer))!=-1)

{

out.write(buffer, 0, count);

}

in.close();

out.close();


爬虫的完整代码:

import org.apache.http.impl.client.CloseableHttpClient;

import org.apache.http.impl.client.HttpClients;

import org.apache.http.message.BasicNameValuePair;

import java.io.FileOutputStream;

import java.io.IOException;

import java.io.InputStream;

import java.io.UnsupportedEncodingException;

import java.util.ArrayList;

import java.util.List;

import java.util.Scanner;

import java.util.regex.Pattern;

import java.util.regex.Matcher;

import org.apache.http.HttpEntity;

import org.apache.http.NameValuePair;

import org.apache.http.client.ClientProtocolException;

import org.apache.http.client.entity.UrlEncodedFormEntity;

import org.apache.http.client.methods.*;

public class spider02 {

public static void main(String[] args) throws ClientProtocolException, IOException

{

@SuppressWarnings("resource")

Scanner cin=new Scanner(System.in);

doon asd=new doon();

asd.getyzm();

String yzm=cin.nextLine(); //测试

String stop="1";

while(!stop.equals("#"))

{

stop=cin.nextLine();

System.out.println(stop);

if(stop.equals("n"))

{

Matcher name=asd.patternname(asd.getname());

while(name.find())

System.out.println(name.group(1));

}

if(stop.equals("s"))

{

Matcher score=asd.patternscore(asd.getscore());

List<lession> les= asd.workjidian(score);

double jdsum=0,xfsum=0;

for(int i=0;i<les.size();i++)

{

jdsum+=les.get(i).getKcxfjd();

xfsum+=Double.valueOf(les.get(i).getXf()).doubleValue();

System.out.println(les.get(i).getKcmc()+"\t"+les.get(i).getZpcj()+"\t"+les.get(i).getXf()+"\t"+les.get(i).getKcxfjd());

}

System.out.println("所修课程学分:"+xfsum);

System.out.println("所修课程学分绩点:"+jdsum);

System.out.println("平均学分绩点:"+jdsum/xfsum);

}

}

}

}

class doon{

private CloseableHttpClient httpClient = HttpClients.createDefault();

public void done(String xh,String sfzh,String kl,String yzm)

{

try {

login(xh, sfzh, kl, yzm); //尝试登陆

getscore(); //获取分数

} catch (ClientProtocolException e) {

e.printStackTrace();

} catch (IOException e) {

e.printStackTrace();

}

}

public String getname()

{

String url="http://jwgl.ntu.edu.cn/cjcx/QueryAll.aspx"; //获取个人信息位置

String information="";

//Post请求

List<NameValuePair> list=new ArrayList<NameValuePair>();

list.add(new BasicNameValuePair("xq","2013-2014-1"));

try {

UrlEncodedFormEntity entity = new UrlEncodedFormEntity(list,"utf-8");

HttpPost post=new HttpPost(url);

post.setEntity(entity);

CloseableHttpResponse res= httpClient.execute(post);

HttpEntity he=res.getEntity();

InputStream in=he.getContent();

//FileOutputStream out=new FileOutputStream("");

byte[] buffer=new byte[1024];

int count=-1;

while((count=in.read(buffer))!=-1)

{

String inf=new String(buffer,0,count);

information+=inf;

}

in.close();

} catch (IOException e) {

// TODO Auto-generated catch block

e.printStackTrace();

}

return information;

}

public void getyzm() throws IOException

{

//获得验证码并写到本地,Get请求

String url1="http://jwgl.ntu.edu.cn/cjcx/checkImage.aspx"; //验证码页面

HttpGet yzm=new HttpGet(url1);

CloseableHttpResponse res=httpClient.execute(yzm);

HttpEntity he=res.getEntity();

InputStream in=he.getContent();

FileOutputStream out=new FileOutputStream("yzm.gif");

byte[] buffer=new byte[1024];

int count=-1;

while((count=in.read(buffer))!=-1)

{

out.write(buffer, 0, count);

}

in.close();

out.close();

}

public void login(String xh,String sfzh,String kl,String yzm) throws ClientProtocolException, IOException

{

//Post请求

String url="http://jwgl.ntu.edu.cn/cjcx/Default.aspx"; //登录页面

List<NameValuePair> list = new ArrayList<NameValuePair>();

list.add(new BasicNameValuePair("__VIEWSTATE","/wEPDwUJODExMDE5NzY5ZGRgtUdRucUbXsT8g55XmVsTwV6PMw=="));

list.add(new BasicNameValuePair("__VIEWSTATEGENERATOR","6C0FF253"));

list.add(new BasicNameValuePair("xh",xh));

list.add(new BasicNameValuePair("sfzh",sfzh));

list.add(new BasicNameValuePair("kl",kl));

list.add(new BasicNameValuePair("yzm",yzm));

UrlEncodedFormEntity entity = new UrlEncodedFormEntity(list,"utf-8");

HttpPost httpPost=new HttpPost(url);

httpPost.setEntity(entity);

CloseableHttpResponse res=httpClient.execute(httpPost);

HttpEntity he=res.getEntity();

InputStream in=he.getContent();

FileOutputStream out=new FileOutputStream("ans.html");

byte[] buffer=new byte[1024];

int count=-1;

while((count=in.read(buffer))!=-1)

{

out.write(buffer, 0, count);

}

in.close();

out.close();

}

public String getscore() throws ClientProtocolException, IOException

{

//Post请求

String url="http://jwgl.ntu.edu.cn/cjcx/Data/ScoreAllData.aspx"; //获取分数

List<NameValuePair> list = new ArrayList<NameValuePair>();

list.add(new BasicNameValuePair("start","0"));

list.add(new BasicNameValuePair("pageSize","80"));

UrlEncodedFormEntity entity = new UrlEncodedFormEntity(list,"utf-8");

HttpPost httpPost=new HttpPost(url);

httpPost.setEntity(entity);

CloseableHttpResponse res=httpClient.execute(httpPost);

HttpEntity he=res.getEntity();

InputStream in=he.getContent();

FileOutputStream out=new FileOutputStream("score.html");

byte[] buffer=new byte[1024];

int count=-1;

String save="";

while((count=in.read(buffer))!=-1)

{

out.write(buffer, 0, count);

String sav=new String(buffer,0,count);

save+=sav;

}

in.close();

out.close();

return save;

}

public Matcher patternscore(String score)

{

//用正则表达式匹配成绩

String reg="\"kcmc\":\"(.*?)\",\"jsxm\":\"(.*?)\",\"xq\":\"(.*?)\",\"xs\":\"(.*?)\",\"xf\":\"(.*?)\",\"zpcj\":\"(.*?)\",\"pscj\":\"(.*?)\",\"qmcj\":\"(.*?)\",\"kcsx\":\"(.*?)\",\"cjid\":\"(.*?)\",\"ksfsm\":\"(.*?)\",\"pxcj\":\"(.*?)\"}";

Pattern p=Pattern.compile(reg);

Matcher m=p.matcher(score);

return m;

}

public Matcher patternname(String name)

{

//匹配个人信息

String reg="<b>(.*?)</b>";

Pattern p=Pattern.compile(reg);

Matcher m=p.matcher(name);

return m;

}

public List<lession> workjidian(Matcher score)

{

//计算绩点

List<lession> les=new ArrayList<lession>();

while(score.find())

{

double xf=0.0;

if(score.group(6).equals("优")) //五级计分

xf=Double.valueOf(score.group(5)).doubleValue()*4.5;

else if(score.group(6).equals("良"))

xf=Double.valueOf(score.group(5)).doubleValue()*3.5;

else if(score.group(6).equals("中"))

xf=Double.valueOf(score.group(5)).doubleValue()*2.5;

else if(score.group(6).equals("及格"))

xf=Double.valueOf(score.group(5)).doubleValue()*1.5;

else if(score.group(6).equals("缓考")||score.group(6).equals("不及格"))

continue;

else if(Double.valueOf(score.group(6)).doubleValue()>=90) //百分计分

xf=((Double.valueOf(score.group(6)).doubleValue()-90)/10+4.0)*Double.valueOf(score.group(5)).doubleValue();

else if(Double.valueOf(score.group(6)).doubleValue()>=80&&Double.valueOf(score.group(6)).doubleValue()<=89)

xf=((Double.valueOf(score.group(6)).doubleValue()-80)/10+3.0)*Double.valueOf(score.group(5)).doubleValue();

else if(Double.valueOf(score.group(6)).doubleValue()>=70&&Double.valueOf(score.group(6)).doubleValue()<=79)

xf=((Double.valueOf(score.group(6)).doubleValue()-70)/10+2.0)*Double.valueOf(score.group(5)).doubleValue();

else if(Double.valueOf(score.group(6)).doubleValue()>=60&&Double.valueOf(score.group(6)).doubleValue()<=69)

xf=((Double.valueOf(score.group(6)).doubleValue()-60)/10+1.0)*Double.valueOf(score.group(5)).doubleValue();

else if(Double.valueOf(score.group(6)).doubleValue()<60)

continue;

les.add(new lession(score.group(1),score.group(2),score.group(3),score.group(4),score.group(5),score.group(6),score.group(7),score.group(8),score.group(9),score.group(10),score.group(11),score.group(12),xf));

// System.out.println(score.group(1)+"\t\t\t\t\t\t"+score.group(2)+"\t"

// +score.group(5)+"\t"+score.group(6)+"学分"+Double.toString(xf));

}

return les;

}

}




以上是 【Java爬虫】爬取南通大学教务系统成绩计算绩点 的全部内容, 来源链接: utcz.com/z/393924.html

回到顶部