The backup data preparation is a two way process:
colapse.awk
and
disk-split.awk
for correct directories and file sizes.
awk -f colapse.awk
awk -f disk-split.awk
I recommend to run this with a batch, at
or nohup
command.
find -type f -or
-type l
when collecting files if you want this functionality.
find
capabilities. For other unicces the <
code>find command can differ substantially.
# This AWK script will create a compressed backup tree with ZIP archiver # limiting the max size of the .zip files to GRANULATION. # If directorty is too big then subdirectories are recursively compressed. # Files in big directories are splited into several archives if needed. # Note that ZIP file size is limited to 2^31 bytes (2GB) # usage: awk -f colapse.awk # author: (C) 2003, Leon Kos # License: GPL # OS: BSD BEGIN{ SRC="/home/home/staff"; DST="/home/staff-backup"; # File size granuality in Megabytes GRANULATION=2000; GRANULATION *= 1024 * 1024; ZIP_OPTS="-9q"; FS="\t"; system("rm -rf " DST); findcmd = "find " SRC " -type d -print"; while((findcmd | getline) > 0) { sub(SRC, ""); sub(/^\//, ""); dir_name = $0; cmd = "du -sk \"" SRC "/" dir_name "\""; cmd | getline; size = $1 * 1024; close(cmd); if (size > GRANULATION) # large directory { tree["/" dir_name] = size; cmd = "mkdir \"" DST "/" dir_name "\""; print DST "/" dir_name # print cmd; system(cmd); collect_files(SRC, DST, dir_name); } else # Small leaf directories are recursively stored into archives { depth = split(dir_name, A, "/"); parent=""; for (i = 1; i < depth; i++) parent = parent "/" A[i]; if (tree[parent] || parent == "") { cmd = "cd " SRC "; zip " ZIP_OPTS " -r \"" \ DST "/" dir_name ".zip\" \"" dir_name "\""; # print cmd, "size:", size; system(cmd); } else { # print dir_name " covered by " parent ".zip size:", size } } } close(findcmd); exit(0); } # Collect files in big directoies and create splitted archives # without subdirectories function collect_files(src, dst, dir_name) { cmd = "find \"" src "/" dir_name "\" -type f -maxdepth 1 -print0 | xargs -0 stat -f '%z%t%N'"; # print cmd; total = 0; archive_number=0; file_list = ""; while((cmd | getline) > 0) { size = $1; sub(src, "", $2); sub(/^\//, "", $2); file_name = $2; # print dir_name, size, file_name, $0; file_list = file_list file_name "\n"; total += size; if (total > GRANULATION) { zipcmd = "cd " src "; echo \"" file_list "\" | zip " ZIP_OPTS \ " -@ \"" dst "/" dir_name "/#" archive_number "\""; # print zipcmd; system(zipcmd); file_list = ""; total = 0; archive_number ++; } } close(cmd); if (total > 0) { zipcmd = "cd " src "; echo \"" file_list "\" | zip " ZIP_OPTS \ " -@ \"" dst "/" dir_name "/#" archive_number "\""; # print zipcmd; system(zipcmd); } }
# CDR & DVD backup # # We assume backup tree with colapse.awk utility. # The size of compressed archives should be at most of size of the target # removable media. This will assure that splitted archive will span over # at most two disks. This recomendation is not obligatory for this utility. # If you span archive over more disks, more time/disk space is needed for # spliting. Again. Please note that there is 2^31 file size limitation! # This mean that zip archive cannot span more than two DVD media! # Small archives are only symbolicly linked. For creating real disk # images one should use symbolic link dereference with utility which # will transfer files on media! For example use "du -L *" in created # disk directories to verify that the largest disk will fit the media! # To prevent that archive will be splitted over two disks, with # a small head on one disk and large tail on another the KEEP_TOGETHER # parameter assigns alowable space waste (tolerance) to be left on one disk # just to keep things together (eg. 3% of disk). Please note that space # waste occurs also in zipsplit utility. Als sizes are given in BYTES. # I recommend starting with greedy 1% KEEP_TOGETHER and increasing it # until the number of disks required for whole backup is the same. If # one wants to rely on zipsplit bin packing with no widow protection, it # is also admissible to set KEEP_TOGETHER=0; # # author: (C) 2003, Leon Kos # License: GPL # usage: awk -f disk-split.awk # OS: BSD # BEGIN { #temporary storage of large zips created with colapse.awk SRC="/home/staff-backup"; #splitted zips for backup to CDR DST="/home/dvd-split"; MB=1024*1024 DISK_SIZE=700*MB; #CDR DISK_SIZE=4400*MB;#DVD KEEP_TOGETHER = DISK_SIZE/100*2; # Prevent widow archives FS="\t"; system("rm -rf " DST "; mkdir " DST "; mkdir " DST "/disk00"); disk=0; total_size = 0; findcmd = "find " SRC " -type f -name '*.zip' -print0 |" \ " xargs -0 stat -f %z%t%N"; while( (findcmd | getline) > 0) { size = $1; filename = $2; if ((total_size + size > DISK_SIZE) && (size >= KEEP_TOGETHER)) { if (DISK_SIZE > 2^31) # zip and awk size limitation { cmdsplit = sprintf("zipsplit -n " 2^31 - 1 \ " -r " 2^31 - 1 - (DISK_SIZE - total_size) \ " -b " DST "/disk%02d \"" filename"\"", disk); } else { cmdsplit = sprintf("zipsplit -n " DISK_SIZE " -r " total_size \ " -b " DST "/disk%02d \"" filename"\"", disk); } # print cmdsplit; cmdsplit | getline; total_zips = $1; # print "Total of " total_zips " for " filename; cmdsplit | getline; sub("creating: ", ""); archive = 0; cmd = sprintf("mv \"" $0 "\" \"" DST "/disk%02d/" \ mangle(filename) "." archive ".zip\"", disk); # print cmd; system(cmd); while ( (cmdsplit | getline) > 0) # should run only once { disk++; archive ++; cmd = sprintf("mkdir " DST "/disk%02d", disk); print cmd; system(cmd); sub("creating: ", ""); # We need size for last disk to fill up cmd = "stat -f %z " $0; if( cmd | getline total_size == 1) close(cmd); else exit(3); cmd = sprintf("mv \"" $0 "\" \"" DST "/disk%02d/" \ mangle(filename) "." archive ".zip\"", disk); # print cmd; system(cmd); } close(cmdsplit); } else # just link to original { if ((total_size + size > DISK_SIZE) && (size < KEEP_TOGETHER)) { disk ++; cmd = sprintf("mkdir " DST "/disk%02d", disk); system(cmd); total_size = 0; print "Widow protection for ", filename; } total_size += size; cmd = sprintf("ln -s \"" filename "\" \"" \ DST "/disk%02d/" mangle(filename) ".zip\"", disk); system(cmd); # print size, cmd; } } close(findcmd); exit(0) } function mangle(filename) { mangled = filename; sub(SRC, "", mangled); sub(DST, "", mangled); sub(/^\//,"", mangled); gsub(/\//, "-", mangled); # sub("#", ".", mangled); sub(/\.zip$/, "", mangled); return mangled; }