View Javadoc
1   package org.argeo.util;
2   
3   import java.io.PrintStream;
4   import java.nio.charset.Charset;
5   import java.nio.file.DirectoryStream;
6   import java.nio.file.Files;
7   import java.nio.file.Path;
8   import java.nio.file.Paths;
9   import java.security.MessageDigest;
10  import java.security.NoSuchAlgorithmException;
11  import java.util.ArrayList;
12  import java.util.Arrays;
13  import java.util.List;
14  
15  /** Hashes the hashes of the files in a directory.*/
16  public class DirH {
17  
18  	private final static Charset charset = Charset.forName("UTF-16");
19  	private final static long bufferSize = 200 * 1024 * 1024;
20  	private final static String algorithm = "SHA";
21  
22  	private final static byte EOL = (byte) '\n';
23  	private final static byte SPACE = (byte) ' ';
24  
25  	private final int hashSize;
26  
27  	private final byte[][] hashes;
28  	private final byte[][] fileNames;
29  	private final byte[] digest;
30  	private final byte[] dirName;
31  
32  	/**
33  	 * @param dirName
34  	 *            can be null or empty
35  	 */
36  	private DirH(byte[][] hashes, byte[][] fileNames, byte[] dirName) {
37  		if (hashes.length != fileNames.length)
38  			throw new UtilsException(hashes.length + " hashes and " + fileNames.length + " file names");
39  		this.hashes = hashes;
40  		this.fileNames = fileNames;
41  		this.dirName = dirName == null ? new byte[0] : dirName;
42  		if (hashes.length == 0) {// empty dir
43  			hashSize = 20;
44  			// FIXME what is the digest of an empty dir?
45  			digest = new byte[hashSize];
46  			Arrays.fill(digest, SPACE);
47  			return;
48  		}
49  		hashSize = hashes[0].length;
50  		for (int i = 0; i < hashes.length; i++) {
51  			if (hashes[i].length != hashSize)
52  				throw new UtilsException(
53  						"Hash size for " + new String(fileNames[i], charset) + " is " + hashes[i].length);
54  		}
55  
56  		try {
57  			MessageDigest md = MessageDigest.getInstance(algorithm);
58  			for (int i = 0; i < hashes.length; i++) {
59  				md.update(this.hashes[i]);
60  				md.update(SPACE);
61  				md.update(this.fileNames[i]);
62  				md.update(EOL);
63  			}
64  			digest = md.digest();
65  		} catch (NoSuchAlgorithmException e) {
66  			throw new UtilsException("Cannot digest", e);
67  		}
68  	}
69  
70  	public void print(PrintStream out) {
71  		out.print(DigestUtils.encodeHexString(digest));
72  		if (dirName.length > 0) {
73  			out.print(' ');
74  			out.print(new String(dirName, charset));
75  		}
76  		out.print('\n');
77  		for (int i = 0; i < hashes.length; i++) {
78  			out.print(DigestUtils.encodeHexString(hashes[i]));
79  			out.print(' ');
80  			out.print(new String(fileNames[i], charset));
81  			out.print('\n');
82  		}
83  	}
84  
85  	public static DirH digest(Path dir) {
86  		try (DirectoryStream<Path> files = Files.newDirectoryStream(dir)) {
87  			List<byte[]> hs = new ArrayList<byte[]>();
88  			List<String> fNames = new ArrayList<>();
89  			for (Path file : files) {
90  				if (!Files.isDirectory(file)) {
91  					byte[] digest = DigestUtils.digestRaw(algorithm, file, bufferSize);
92  					hs.add(digest);
93  					fNames.add(file.getFileName().toString());
94  				}
95  			}
96  
97  			byte[][] fileNames = new byte[fNames.size()][];
98  			for (int i = 0; i < fNames.size(); i++) {
99  				fileNames[i] = fNames.get(i).getBytes(charset);
100 			}
101 			byte[][] hashes = hs.toArray(new byte[hs.size()][]);
102 			return new DirH(hashes, fileNames, dir.toString().getBytes(charset));
103 		} catch (Exception e) {
104 			throw new UtilsException("Cannot digest " + dir, e);
105 		}
106 	}
107 
108 	public static void main(String[] args) {
109 		try {
110 			DirH dirH = DirH.digest(Paths.get("/home/mbaudier/tmp/"));
111 			dirH.print(System.out);
112 		} catch (Exception e) {
113 			e.printStackTrace();
114 		}
115 	}
116 }