I am backing-up a large number of files to another computer when this idea came to me to write a Python script that generate and validate batch MD5 checksum. Feel free to customize the script according to your needs.
import hashlib
import os
import sys
def generate_md5(file_path):
with open(file_path.strip(), "rb") as f:
v = f.read()
x = hashlib.new("md5", v)
return x.hexdigest()
def validate(md5_digest, file_path):
return generate_md5(file_path) == md5_digest
def run(argv):
if len(argv) > 0:
if argv[0].lower() == "generate":
with open("checksum.md5", "w") as checksum_file:
for root, dirs, files, root_fd in os.fwalk(
"."): # start walking in current path or directory, you can parameterize this if you want
for file in files:
md5_digest = generate_md5(f"{root}/{file}")
print(f"{md5_digest}\t{root}/{file}")
checksum_file.write(f"{md5_digest}\t{root}/{file}\n")
checksum_file.flush()
elif argv[0].lower() == "check":
stats = {"valid": 0, "not_valid": 0}
with open("checksum.md5", "r") as checksum_file:
s = checksum_file.readline()
while s:
v = s.split("\t")
validation_result = validate(v[0], v[1])
print(f"{'Ok' if validation_result else 'Not Okay'} = {v[1]}", end="")
if validation_result:
stats["valid"] = stats["valid"] + 1
else:
stats["not_valid"] = stats["not_valid"] + 1
s = checksum_file.readline()
print(stats)
if __name__ == '__main__':
run(sys.argv[1:])