Java下载文件 爬虫 超时处理解决方案
import java.util.List;import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.SocketTimeoutException;
import java.net.URL;
import java.util.ArrayList;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Main {
public static final int sleepMsPerConnection = 1000;
public static final int timeOutMs = 20000;
public static final int retry = 2;
private static void download(String urlStr, String filePath) {
int retryCount = 0;
while(true){
try {
DownloadThread thread = new DownloadThread(urlStr, filePath);
thread.start();
thread.join(timeOutMs);
if(!thread.isAlive()){
return;
}else{
thread.interrupt();//实测并不能结束线程,请参考如何中断JAVA线程一文
}
} catch (InterruptedException e) {
e.printStackTrace();
}
retryCount++;
if(retryCount > retry){
throw new RuntimeException("still timeout after retry " + (retry - 1) + " times");
}
System.out.println("retry");
}
}
private static String getHtml(String urlStr) {
int retryCount = 0;
while(true){
try {
GetHtmlThread thread = new GetHtmlThread(urlStr);
thread.start();
thread.join(timeOutMs);
if(!thread.isAlive()){
return thread.html;
}else{
thread.interrupt();
}
} catch (InterruptedException e) {
e.printStackTrace();
}
retryCount++;
if(retryCount > retry){
throw new RuntimeException("still timeout after retry " + (retry - 1) + " times");
}
System.out.println("retry");
}
}
}
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.URL;
public class GetHtmlThread extends Thread {
public String html;
private String urlStr;
public GetHtmlThread(String urlStr) {
this.urlStr = urlStr;
}
public void run() {
try {
Thread.sleep(Main.sleepMsPerConnection);
URL url = new URL(urlStr);
StringBuilder sb = new StringBuilder();
BufferedReader br = new BufferedReader(new InputStreamReader(url
.openStream()));
String line = null;
while ((line = br.readLine()) != null) {
sb.append(line);
sb.append('\n');
}
br.close();
this.html = sb.toString();
} catch (InterruptedException e) {
// do nothing?
} catch (Exception e) {
e.printStackTrace();
System.exit(1);
}
}
}
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URL;
public class DownloadThread extends Thread {
private String urlStr;
private String filePath;
public DownloadThread(String urlStr, String filePath) {
this.urlStr = urlStr;
this.filePath = filePath;
}
public void run() {
try {
URL url = new URL(urlStr);
InputStream is = url.openStream();
File pdfFile = new File(filePath);
FileOutputStream os = new FileOutputStream(pdfFile);
copyStream(is, os);
is.close();
os.close();
} catch (Exception e) {
e.printStackTrace();
System.exit(1);
}
}
/**
* still need to close inputstream and outputstream after call this method
* @param inputStream
* @param outputStream
* @throws IOException
*/
private void copyStream(InputStream inputStream, OutputStream outputStream)
throws IOException {
byte[] b = new byte[1024];
int len;
while ((len = inputStream.read(b)) > 0) {
outputStream.write(b, 0, len);
}
outputStream.flush();
}
}
以上是 Java下载文件 爬虫 超时处理解决方案 的全部内容, 来源链接: utcz.com/z/394708.html