View Javadoc
1   package org.argeo.cms.security;
2   
3   import java.io.IOException;
4   import java.math.BigInteger;
5   import java.nio.MappedByteBuffer;
6   import java.nio.channels.FileChannel;
7   import java.nio.file.FileVisitResult;
8   import java.nio.file.Files;
9   import java.nio.file.Path;
10  import java.nio.file.Paths;
11  import java.nio.file.SimpleFileVisitor;
12  import java.nio.file.attribute.BasicFileAttributes;
13  import java.security.MessageDigest;
14  import java.util.Base64;
15  import java.util.zip.Checksum;
16  
17  import org.argeo.cms.CmsException;
18  
19  /** Allows to fine tune how files are read. */
20  public class ChecksumFactory {
21  	private int regionSize = 10 * 1024 * 1024;
22  
23  	public byte[] digest(Path path, final String algo) {
24  		try {
25  			final MessageDigest md = MessageDigest.getInstance(algo);
26  			if (Files.isDirectory(path)) {
27  				long begin = System.currentTimeMillis();
28  				Files.walkFileTree(path, new SimpleFileVisitor<Path>() {
29  
30  					@Override
31  					public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
32  						if (!Files.isDirectory(file)) {
33  							byte[] digest = digest(file, algo);
34  							md.update(digest);
35  						}
36  						return FileVisitResult.CONTINUE;
37  					}
38  
39  				});
40  				byte[] digest = md.digest();
41  				long duration = System.currentTimeMillis() - begin;
42  				System.out.println(printBase64Binary(digest) + " " + path + " (" + duration / 1000 + "s)");
43  				return digest;
44  			} else {
45  				long begin = System.nanoTime();
46  				long length = -1;
47  				try (FileChannel channel = (FileChannel) Files.newByteChannel(path);) {
48  					length = channel.size();
49  					long cursor = 0;
50  					while (cursor < length) {
51  						long effectiveSize = Math.min(regionSize, length - cursor);
52  						MappedByteBuffer mb = channel.map(FileChannel.MapMode.READ_ONLY, cursor, effectiveSize);
53  						// md.update(mb);
54  						byte[] buffer = new byte[1024];
55  						while (mb.hasRemaining()) {
56  							mb.get(buffer);
57  							md.update(buffer);
58  						}
59  
60  						// sub digest
61  						// mb.flip();
62  						// MessageDigest subMd =
63  						// MessageDigest.getInstance(algo);
64  						// subMd.update(mb);
65  						// byte[] subDigest = subMd.digest();
66  						// System.out.println(" -> " + cursor);
67  						// System.out.println(IOUtils.encodeHexString(subDigest));
68  						// System.out.println(new BigInteger(1,
69  						// subDigest).toString(16));
70  						// System.out.println(new BigInteger(1, subDigest)
71  						// .toString(Character.MAX_RADIX));
72  						// System.out.println(printBase64Binary(subDigest));
73  
74  						cursor = cursor + regionSize;
75  					}
76  					byte[] digest = md.digest();
77  					long duration = System.nanoTime() - begin;
78  					System.out.println(printBase64Binary(digest) + " " + path.getFileName() + " (" + duration / 1000000
79  							+ "ms, " + (length / 1024) + "kB, " + (length / (duration / 1000000)) * 1000 / (1024 * 1024)
80  							+ " MB/s)");
81  					return digest;
82  				}
83  			}
84  		} catch (Exception e) {
85  			throw new CmsException("Cannot digest " + path, e);
86  		}
87  	}
88  
89  	/** Whether the file should be mapped. */
90  	protected boolean mapFile(FileChannel fileChannel) throws IOException {
91  		long size = fileChannel.size();
92  		if (size > (regionSize / 10))
93  			return true;
94  		return false;
95  	}
96  
97  	public long checksum(Path path, Checksum crc) {
98  		final int bufferSize = 2 * 1024 * 1024;
99  		long begin = System.currentTimeMillis();
100 		try (FileChannel channel = (FileChannel) Files.newByteChannel(path);) {
101 			byte[] bytes = new byte[bufferSize];
102 			long length = channel.size();
103 			long cursor = 0;
104 			while (cursor < length) {
105 				long effectiveSize = Math.min(regionSize, length - cursor);
106 				MappedByteBuffer mb = channel.map(FileChannel.MapMode.READ_ONLY, cursor, effectiveSize);
107 				int nGet;
108 				while (mb.hasRemaining()) {
109 					nGet = Math.min(mb.remaining(), bufferSize);
110 					mb.get(bytes, 0, nGet);
111 					crc.update(bytes, 0, nGet);
112 				}
113 				cursor = cursor + regionSize;
114 			}
115 			return crc.getValue();
116 		} catch (Exception e) {
117 			throw new CmsException("Cannot checksum " + path, e);
118 		} finally {
119 			long duration = System.currentTimeMillis() - begin;
120 			System.out.println(duration / 1000 + "s");
121 		}
122 	}
123 
124 	public static void main(String... args) {
125 		ChecksumFactory cf = new ChecksumFactory();
126 		// Path path =
127 		// Paths.get("/home/mbaudier/apache-maven-3.2.3-bin.tar.gz");
128 		Path path;
129 		if (args.length > 0) {
130 			path = Paths.get(args[0]);
131 		} else {
132 			path = Paths.get("/home/mbaudier/Downloads/torrents/CentOS-7-x86_64-DVD-1503-01/"
133 					+ "CentOS-7-x86_64-DVD-1503-01.iso");
134 		}
135 		// long adler = cf.checksum(path, new Adler32());
136 		// System.out.format("Adler=%d%n", adler);
137 		// long crc = cf.checksum(path, new CRC32());
138 		// System.out.format("CRC=%d%n", crc);
139 		String algo = "SHA1";
140 		byte[] digest = cf.digest(path, algo);
141 		System.out.println(algo + " " + printBase64Binary(digest));
142 		System.out.println(algo + " " + new BigInteger(1, digest).toString(16));
143 		// String sha1 = printBase64Binary(cf.digest(path, "SHA1"));
144 		// System.out.format("SHA1=%s%n", sha1);
145 	}
146 
147 	private static String printBase64Binary(byte[] arr) {
148 		return Base64.getEncoder().encodeToString(arr);
149 	}
150 }