用java下载网页

Z时代
2024-01-10
分类：综合

java

使用java下载指定URL的网页内容：

[java] view plain copy

package com.learn.test;

import java.io.BufferedReader;

import java.io.IOException;

import java.io.InputStreamReader;

import java.net.SocketTimeoutException;

import java.net.URL;

import java.net.URLConnection;

public class TestDownload {

public static void main(String[] args) {

TestDownload loader = new TestDownload();

String page = loader.Download("http://top.baidu.com/detail/buzz?boardid=11");

if (!page.isEmpty())

System.out.println(page);

}

public String Download(String strURL) {

try {

URL url = new URL(strURL);// 取得资源对象

URLConnection uc = url.openConnection();// 生成连接对象

uc.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0");

uc.setRequestProperty("Accept","text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");

uc.setConnectTimeout(20);//设置超时时间

uc.setDoOutput(true);

try{

uc.connect(); // 发出连接

}catch(SocketTimeoutException e){

e.printStackTrace();

}

final BufferedReader in = new BufferedReader(new InputStreamReader(

url.openStream(), "utf8"));

String temp;

final StringBuffer sb = new StringBuffer();

while ((temp = in.readLine()) != null) {

sb.append("\n");

sb.append(temp);

}

in.close();

return sb.toString();

} catch (IOException e) {

e.printStackTrace();

}

return "";

}

}

以上是用java下载网页的全部内容，来源链接： utcz.com/z/390639.html

回到顶部