java:快速文件分割及合并
文件分割与合并是一个常见需求,比如:上传大文件时,可以先分割成小块,传到服务器后,再进行合并。很多高大上的分布式文件系统(比如:google的GFS、taobao的TFS)里,也是按block为单位,对文件进行分割或合并。
看下基本思路:
如果有一个大文件,指定分割大小后(比如:按1M切割)
step 1:
先根据原始文件大小、分割大小,算出最终分割的小文件数N
step 2:
在磁盘上创建这N个小文件
step 3:
开多个线程(线程数=分割文件数),每个线程里,利用RandomAccessFile的seek功能,将读取指针定位到原文件里每一段的段首位置,然后向后读取指定大小(即:分割块大小),最终写入对应的分割文件,因为多线程并行处理,各写各的小文件,速度相对还是比较快的。
合并时,把上面的思路逆向处理即可。
核心代码:
分割处理:
1 /**2 * 拆分文件
3 * @param fileName 待拆分的完整文件名
4 * @param byteSize 按多少字节大小拆分
5 * @return 拆分后的文件名列表
6 * @throws IOException
7 */
8 public List<String> splitBySize(String fileName, int byteSize)
9 throws IOException {
10 List<String> parts = new ArrayList<String>();
11 File file = new File(fileName);
12 int count = (int) Math.ceil(file.length() / (double) byteSize);
13 int countLen = (count + "").length();
14 ThreadPoolExecutor threadPool = new ThreadPoolExecutor(count,
15 count * 3, 1, TimeUnit.SECONDS,
16 new ArrayBlockingQueue<Runnable>(count * 2));
17
18 for (int i = 0; i < count; i++) {
19 String partFileName = file.getName() + "."
20 + leftPad((i + 1) + "", countLen, '0') + ".part";
21 threadPool.execute(new SplitRunnable(byteSize, i * byteSize,
22 partFileName, file));
23 parts.add(partFileName);
24 }
25 return parts;
26 }
View Code
1 private class SplitRunnable implements Runnable {2 int byteSize;
3 String partFileName;
4 File originFile;
5 int startPos;
6
7 public SplitRunnable(int byteSize, int startPos, String partFileName,
8 File originFile) {
9 this.startPos = startPos;
10 this.byteSize = byteSize;
11 this.partFileName = partFileName;
12 this.originFile = originFile;
13 }
14
15 public void run() {
16 RandomAccessFile rFile;
17 OutputStream os;
18 try {
19 rFile = new RandomAccessFile(originFile, "r");
20 byte[] b = new byte[byteSize];
21 rFile.seek(startPos);// 移动指针到每“段”开头
22 int s = rFile.read(b);
23 os = new FileOutputStream(partFileName);
24 os.write(b, 0, s);
25 os.flush();
26 os.close();
27 } catch (IOException e) {
28 e.printStackTrace();
29 }
30 }
31 }
View Code
合并处理:
1 /**2 * 合并文件
3 *
4 * @param dirPath 拆分文件所在目录名
5 * @param partFileSuffix 拆分文件后缀名
6 * @param partFileSize 拆分文件的字节数大小
7 * @param mergeFileName 合并后的文件名
8 * @throws IOException
9 */
10 public void mergePartFiles(String dirPath, String partFileSuffix,
11 int partFileSize, String mergeFileName) throws IOException {
12 ArrayList<File> partFiles = FileUtil.getDirFiles(dirPath,
13 partFileSuffix);
14 Collections.sort(partFiles, new FileComparator());
15
16 RandomAccessFile randomAccessFile = new RandomAccessFile(mergeFileName,
17 "rw");
18 randomAccessFile.setLength(partFileSize * (partFiles.size() - 1)
19 + partFiles.get(partFiles.size() - 1).length());
20 randomAccessFile.close();
21
22 ThreadPoolExecutor threadPool = new ThreadPoolExecutor(
23 partFiles.size(), partFiles.size() * 3, 1, TimeUnit.SECONDS,
24 new ArrayBlockingQueue<Runnable>(partFiles.size() * 2));
25
26 for (int i = 0; i < partFiles.size(); i++) {
27 threadPool.execute(new MergeRunnable(i * partFileSize,
28 mergeFileName, partFiles.get(i)));
29 }
30
31 }
View Code
1 private class MergeRunnable implements Runnable {2 long startPos;
3 String mergeFileName;
4 File partFile;
5
6 public MergeRunnable(long startPos, String mergeFileName, File partFile) {
7 this.startPos = startPos;
8 this.mergeFileName = mergeFileName;
9 this.partFile = partFile;
10 }
11
12 public void run() {
13 RandomAccessFile rFile;
14 try {
15 rFile = new RandomAccessFile(mergeFileName, "rw");
16 rFile.seek(startPos);
17 FileInputStream fs = new FileInputStream(partFile);
18 byte[] b = new byte[fs.available()];
19 fs.read(b);
20 fs.close();
21 rFile.write(b);
22 rFile.close();
23 } catch (IOException e) {
24 e.printStackTrace();
25 }
26 }
27 }
View Code
为了方便文件操作,把关于文件读写的功能,全封装到FileUtil类:
1 package com.cnblogs.yjmyzz;2
3 import java.io.*;
4 import java.util.*;
5 import java.util.concurrent.*;
6
7 /**
8 * 文件处理辅助类
9 *
10 * @author yjmyzz@126.com
11 * @version 0.2
12 * @since 2014-11-17
13 *
14 */
15 public class FileUtil {
16
17 /**
18 * 当前目录路径
19 */
20 public static String currentWorkDir = System.getProperty("user.dir") + "\\";
21
22 /**
23 * 左填充
24 *
25 * @param str
26 * @param length
27 * @param ch
28 * @return
29 */
30 public static String leftPad(String str, int length, char ch) {
31 if (str.length() >= length) {
32 return str;
33 }
34 char[] chs = new char[length];
35 Arrays.fill(chs, ch);
36 char[] src = str.toCharArray();
37 System.arraycopy(src, 0, chs, length - src.length, src.length);
38 return new String(chs);
39
40 }
41
42 /**
43 * 删除文件
44 *
45 * @param fileName
46 * 待删除的完整文件名
47 * @return
48 */
49 public static boolean delete(String fileName) {
50 boolean result = false;
51 File f = new File(fileName);
52 if (f.exists()) {
53 result = f.delete();
54
55 } else {
56 result = true;
57 }
58 return result;
59 }
60
61 /***
62 * 递归获取指定目录下的所有的文件(不包括文件夹)
63 *
64 * @param obj
65 * @return
66 */
67 public static ArrayList<File> getAllFiles(String dirPath) {
68 File dir = new File(dirPath);
69
70 ArrayList<File> files = new ArrayList<File>();
71
72 if (dir.isDirectory()) {
73 File[] fileArr = dir.listFiles();
74 for (int i = 0; i < fileArr.length; i++) {
75 File f = fileArr[i];
76 if (f.isFile()) {
77 files.add(f);
78 } else {
79 files.addAll(getAllFiles(f.getPath()));
80 }
81 }
82 }
83 return files;
84 }
85
86 /**
87 * 获取指定目录下的所有文件(不包括子文件夹)
88 *
89 * @param dirPath
90 * @return
91 */
92 public static ArrayList<File> getDirFiles(String dirPath) {
93 File path = new File(dirPath);
94 File[] fileArr = path.listFiles();
95 ArrayList<File> files = new ArrayList<File>();
96
97 for (File f : fileArr) {
98 if (f.isFile()) {
99 files.add(f);
100 }
101 }
102 return files;
103 }
104
105 /**
106 * 获取指定目录下特定文件后缀名的文件列表(不包括子文件夹)
107 *
108 * @param dirPath
109 * 目录路径
110 * @param suffix
111 * 文件后缀
112 * @return
113 */
114 public static ArrayList<File> getDirFiles(String dirPath,
115 final String suffix) {
116 File path = new File(dirPath);
117 File[] fileArr = path.listFiles(new FilenameFilter() {
118 public boolean accept(File dir, String name) {
119 String lowerName = name.toLowerCase();
120 String lowerSuffix = suffix.toLowerCase();
121 if (lowerName.endsWith(lowerSuffix)) {
122 return true;
123 }
124 return false;
125 }
126
127 });
128 ArrayList<File> files = new ArrayList<File>();
129
130 for (File f : fileArr) {
131 if (f.isFile()) {
132 files.add(f);
133 }
134 }
135 return files;
136 }
137
138 /**
139 * 读取文件内容
140 *
141 * @param fileName
142 * 待读取的完整文件名
143 * @return 文件内容
144 * @throws IOException
145 */
146 public static String read(String fileName) throws IOException {
147 File f = new File(fileName);
148 FileInputStream fs = new FileInputStream(f);
149 String result = null;
150 byte[] b = new byte[fs.available()];
151 fs.read(b);
152 fs.close();
153 result = new String(b);
154 return result;
155 }
156
157 /**
158 * 写文件
159 *
160 * @param fileName
161 * 目标文件名
162 * @param fileContent
163 * 写入的内容
164 * @return
165 * @throws IOException
166 */
167 public static boolean write(String fileName, String fileContent)
168 throws IOException {
169 boolean result = false;
170 File f = new File(fileName);
171 FileOutputStream fs = new FileOutputStream(f);
172 byte[] b = fileContent.getBytes();
173 fs.write(b);
174 fs.flush();
175 fs.close();
176 result = true;
177 return result;
178 }
179
180 /**
181 * 追加内容到指定文件
182 *
183 * @param fileName
184 * @param fileContent
185 * @return
186 * @throws IOException
187 */
188 public static boolean append(String fileName, String fileContent)
189 throws IOException {
190 boolean result = false;
191 File f = new File(fileName);
192 if (f.exists()) {
193 RandomAccessFile rFile = new RandomAccessFile(f, "rw");
194 byte[] b = fileContent.getBytes();
195 long originLen = f.length();
196 rFile.setLength(originLen + b.length);
197 rFile.seek(originLen);
198 rFile.write(b);
199 rFile.close();
200 }
201 result = true;
202 return result;
203 }
204
205 /**
206 * 拆分文件
207 *
208 * @param fileName
209 * 待拆分的完整文件名
210 * @param byteSize
211 * 按多少字节大小拆分
212 * @return 拆分后的文件名列表
213 * @throws IOException
214 */
215 public List<String> splitBySize(String fileName, int byteSize)
216 throws IOException {
217 List<String> parts = new ArrayList<String>();
218 File file = new File(fileName);
219 int count = (int) Math.ceil(file.length() / (double) byteSize);
220 int countLen = (count + "").length();
221 ThreadPoolExecutor threadPool = new ThreadPoolExecutor(count,
222 count * 3, 1, TimeUnit.SECONDS,
223 new ArrayBlockingQueue<Runnable>(count * 2));
224
225 for (int i = 0; i < count; i++) {
226 String partFileName = file.getName() + "."
227 + leftPad((i + 1) + "", countLen, '0') + ".part";
228 threadPool.execute(new SplitRunnable(byteSize, i * byteSize,
229 partFileName, file));
230 parts.add(partFileName);
231 }
232 return parts;
233 }
234
235 /**
236 * 合并文件
237 *
238 * @param dirPath
239 * 拆分文件所在目录名
240 * @param partFileSuffix
241 * 拆分文件后缀名
242 * @param partFileSize
243 * 拆分文件的字节数大小
244 * @param mergeFileName
245 * 合并后的文件名
246 * @throws IOException
247 */
248 public void mergePartFiles(String dirPath, String partFileSuffix,
249 int partFileSize, String mergeFileName) throws IOException {
250 ArrayList<File> partFiles = FileUtil.getDirFiles(dirPath,
251 partFileSuffix);
252 Collections.sort(partFiles, new FileComparator());
253
254 RandomAccessFile randomAccessFile = new RandomAccessFile(mergeFileName,
255 "rw");
256 randomAccessFile.setLength(partFileSize * (partFiles.size() - 1)
257 + partFiles.get(partFiles.size() - 1).length());
258 randomAccessFile.close();
259
260 ThreadPoolExecutor threadPool = new ThreadPoolExecutor(
261 partFiles.size(), partFiles.size() * 3, 1, TimeUnit.SECONDS,
262 new ArrayBlockingQueue<Runnable>(partFiles.size() * 2));
263
264 for (int i = 0; i < partFiles.size(); i++) {
265 threadPool.execute(new MergeRunnable(i * partFileSize,
266 mergeFileName, partFiles.get(i)));
267 }
268
269 }
270
271 /**
272 * 根据文件名,比较文件
273 *
274 * @author yjmyzz@126.com
275 *
276 */
277 private class FileComparator implements Comparator<File> {
278 public int compare(File o1, File o2) {
279 return o1.getName().compareToIgnoreCase(o2.getName());
280 }
281 }
282
283 /**
284 * 分割处理Runnable
285 *
286 * @author yjmyzz@126.com
287 *
288 */
289 private class SplitRunnable implements Runnable {
290 int byteSize;
291 String partFileName;
292 File originFile;
293 int startPos;
294
295 public SplitRunnable(int byteSize, int startPos, String partFileName,
296 File originFile) {
297 this.startPos = startPos;
298 this.byteSize = byteSize;
299 this.partFileName = partFileName;
300 this.originFile = originFile;
301 }
302
303 public void run() {
304 RandomAccessFile rFile;
305 OutputStream os;
306 try {
307 rFile = new RandomAccessFile(originFile, "r");
308 byte[] b = new byte[byteSize];
309 rFile.seek(startPos);// 移动指针到每“段”开头
310 int s = rFile.read(b);
311 os = new FileOutputStream(partFileName);
312 os.write(b, 0, s);
313 os.flush();
314 os.close();
315 } catch (IOException e) {
316 e.printStackTrace();
317 }
318 }
319 }
320
321 /**
322 * 合并处理Runnable
323 *
324 * @author yjmyzz@126.com
325 *
326 */
327 private class MergeRunnable implements Runnable {
328 long startPos;
329 String mergeFileName;
330 File partFile;
331
332 public MergeRunnable(long startPos, String mergeFileName, File partFile) {
333 this.startPos = startPos;
334 this.mergeFileName = mergeFileName;
335 this.partFile = partFile;
336 }
337
338 public void run() {
339 RandomAccessFile rFile;
340 try {
341 rFile = new RandomAccessFile(mergeFileName, "rw");
342 rFile.seek(startPos);
343 FileInputStream fs = new FileInputStream(partFile);
344 byte[] b = new byte[fs.available()];
345 fs.read(b);
346 fs.close();
347 rFile.write(b);
348 rFile.close();
349 } catch (IOException e) {
350 e.printStackTrace();
351 }
352 }
353 }
354
355 }
View Code
单元测试:
1 package com.cnblogs.yjmyzz;2
3 import java.io.IOException;
4
5 import org.junit.Test;
6
7 public class FileTest {
8
9 @Test
10 public void writeFile() throws IOException, InterruptedException {
11
12 System.out.println(FileUtil.currentWorkDir);
13
14 StringBuilder sb = new StringBuilder();
15
16 long originFileSize = 1024 * 1024 * 100;// 100M
17 int blockFileSize = 1024 * 1024 * 15;// 15M
18
19 // 生成一个大文件
20 for (int i = 0; i < originFileSize; i++) {
21 sb.append("A");
22 }
23
24 String fileName = FileUtil.currentWorkDir + "origin.myfile";
25 System.out.println(fileName);
26 System.out.println(FileUtil.write(fileName, sb.toString()));
27
28 // 追加内容
29 sb.setLength(0);
30 sb.append("0123456789");
31 FileUtil.append(fileName, sb.toString());
32
33 FileUtil fileUtil = new FileUtil();
34
35 // 将origin.myfile拆分
36 fileUtil.splitBySize(fileName, blockFileSize);
37
38 Thread.sleep(10000);// 稍等10秒,等前面的小文件全都写完
39
40 // 合并成新文件
41 fileUtil.mergePartFiles(FileUtil.currentWorkDir, ".part",
42 blockFileSize, FileUtil.currentWorkDir + "new.myfile");
43
44 }
45 }
View Code
以上是 java:快速文件分割及合并 的全部内容, 来源链接: utcz.com/z/394314.html