Java下载文件 爬虫 超时处理解决方案

java

import java.util.List;

import java.io.BufferedReader;

import java.io.BufferedWriter;

import java.io.File;

import java.io.FileNotFoundException;

import java.io.FileOutputStream;

import java.io.FileReader;

import java.io.FileWriter;

import java.io.IOException;

import java.io.InputStream;

import java.io.InputStreamReader;

import java.io.OutputStream;

import java.net.HttpURLConnection;

import java.net.MalformedURLException;

import java.net.SocketTimeoutException;

import java.net.URL;

import java.util.ArrayList;

import java.util.logging.Logger;

import java.util.regex.Matcher;

import java.util.regex.Pattern;

public class Main {

public static final int sleepMsPerConnection = 1000;

public static final int timeOutMs = 20000;

public static final int retry = 2;

private static void download(String urlStr, String filePath) {

int retryCount = 0;

while(true){

try {

DownloadThread thread = new DownloadThread(urlStr, filePath);

thread.start();

thread.join(timeOutMs);

if(!thread.isAlive()){

return;

}else{

thread.interrupt();//实测并不能结束线程,请参考如何中断JAVA线程一文

}

} catch (InterruptedException e) {

e.printStackTrace();

}

retryCount++;

if(retryCount > retry){

throw new RuntimeException("still timeout after retry " + (retry - 1) + " times");

}

System.out.println("retry");

}

}

private static String getHtml(String urlStr) {

int retryCount = 0;

while(true){

try {

GetHtmlThread thread = new GetHtmlThread(urlStr);

thread.start();

thread.join(timeOutMs);

if(!thread.isAlive()){

return thread.html;

}else{

thread.interrupt();

}

} catch (InterruptedException e) {

e.printStackTrace();

}

retryCount++;

if(retryCount > retry){

throw new RuntimeException("still timeout after retry " + (retry - 1) + " times");

}

System.out.println("retry");

}

}

}

import java.io.BufferedReader;

import java.io.InputStreamReader;

import java.net.URL;

public class GetHtmlThread extends Thread {

public String html;

private String urlStr;

public GetHtmlThread(String urlStr) {

this.urlStr = urlStr;

}

public void run() {

try {

Thread.sleep(Main.sleepMsPerConnection);

URL url = new URL(urlStr);

StringBuilder sb = new StringBuilder();

BufferedReader br = new BufferedReader(new InputStreamReader(url

.openStream()));

String line = null;

while ((line = br.readLine()) != null) {

sb.append(line);

sb.append('\n');

}

br.close();

this.html = sb.toString();

} catch (InterruptedException e) {

// do nothing?

} catch (Exception e) {

e.printStackTrace();

System.exit(1);

}

}

}

import java.io.File;

import java.io.FileOutputStream;

import java.io.IOException;

import java.io.InputStream;

import java.io.OutputStream;

import java.net.URL;

public class DownloadThread extends Thread {

private String urlStr;

private String filePath;

public DownloadThread(String urlStr, String filePath) {

this.urlStr = urlStr;

this.filePath = filePath;

}

public void run() {

try {

URL url = new URL(urlStr);

InputStream is = url.openStream();

File pdfFile = new File(filePath);

FileOutputStream os = new FileOutputStream(pdfFile);

copyStream(is, os);

is.close();

os.close();

} catch (Exception e) {

e.printStackTrace();

System.exit(1);

}

}

/**

* still need to close inputstream and outputstream after call this method

* @param inputStream

* @param outputStream

* @throws IOException

*/

private void copyStream(InputStream inputStream, OutputStream outputStream)

throws IOException {

byte[] b = new byte[1024];

int len;

while ((len = inputStream.read(b)) > 0) {

outputStream.write(b, 0, len);

}

outputStream.flush();

}

}

以上是 Java下载文件 爬虫 超时处理解决方案 的全部内容, 来源链接: utcz.com/z/394708.html

回到顶部