#!/bin/sh
#
# AUTHOR: Tony Sanderson, Bluehaze Solutions (http://www.bluehaze.com.au).
# Version 2.7, April 1, 2002
# -----------------------------------------------
#
# <<<< This is no longer the current version >>>>
#
# -----------------------------------------------
#
# HISTORY (latest at top):
#
#  V.2.7, April 1, 2002: Now saves previous "list" files as date-suffixed
#  rather than just over-writing (never know when they may be handy).
#  Add 3 new excludes to get rid of some apache/webalizer log-file
#  backups (500Mb + worth in my case).  Increase CHUNK_SIZE from 630000000
#  to 670000000 for 650Mb CDs, and add an (optional) def for 700Mb CDs.
#  And more comments.
#
#  V 2.6, Dec 5,2001: Another typo found by Justin Noack.  In Vers 2.5,
#  var FIND_ARGS had been mis-typed as FIND_ARG in one line.  This caused
#  the delta backup arg (# days) to be ignored.  Corrected (thanks Justin!)
#
#  V.2.5, Nov 21,2001: Add "-depth" arg and remove "-type f" from find cmd
#  to ensure that dir perms and ownerships are saved in the archive.
#  This was causing restored directories to take incorrect owner/perms.
#  (Bug found by Justin Noack, JN Computer Care and Coloma Community Schools)
#
#  V.2.4, Jun 10,2001: Add "-H crc" arg to cpio (enables > 65k inodes).
#
#  V.2.3, Jun 9,2001:
#  a) Add exclusion dirs /usr/share, /usr/src, /usr/doc, */old_stuff/*.
#  b) Add "full_list" (so we can see what *wasn't* backed up as well).
#  c) Add FIND_ERRS to keep a list of mis-traversed dirs.
#
#  V.2.1, Mar 11,2001: Pipe output of cpio into fsplit instead of generating
#  intermediate files via cpio and then having fsplit read these later.  May
#  run a bit faster, and will use less disc.
# ----------------------------------------
#
# DESCRIPTION:
#
# Backup - creates split cpio archives of a unix system which can then be
# burned onto a set of CD-Rs.  A sed RE is used to exclude a few unwanted
# directories.  Assumes the availability of GNU 'find' and GNU 'cpio' or
# equivalent.
#
# To restrict the backup to files which are < N days old, supply N as a numeric
# parameter, eg: "backup 365" means backup everything < 365 days old.
#
# The resulting archive is split via "fsplit" into 630Mb chunks called
# <archive-name>.000, <archive-name>.001, etc for convenient copying onto
# CD-Rs.
#
# To recover, copy the chunks off the CDs into an empty directory,
# and type:
#
#   for f in *.cpio.[0-9]*
#   do
#      cat $f >> filename && rm $f
#   done
#
# Now you can recover. For example, to get EVERYTHING back, use:
#
#   cd junk
#   cpio -idmv < filename 2>errs
#      (Assumes the archive chunks are in the 'junk' directory)
#
# <<<< CAVEAT >>>>
# (#)  The above loop may FAIL if 'filename' reaches 2Gb in size with Linux.
# (You will eventually get an error from cat, viz:   'cat: File Too Large'.)
# Most other 'nixes (Solaris, HPUX, FreeBSD) are free of this limit now.
# But if you are using Linux AND your set of CD chunks exceeds 2Gb in total,
# you'll have to recover using:
#
#   cd junk
#   cat *.cpio.00* | cpio -idmv 2>errs
#      (Assumes the archive chunks are in the 'junk' directory)
#
# or, for a FULL 'over the top' recovery:
#
#   cd /
#   cat *.cpio.00* | cpio -idmv
#
# Because the latter methods don't rebuild the original archive, they're also
# much quicker and require less disc space.
# --------------------------------------------------------------------------

#set -x  # Uncomment for debugging

#===========================================================================
#                              PLAY HERE
#===========================================================================
# Users may like to play with the following 3 lines, and also the 'sed' RE
# terms near the end of the script.
#
# Un-comment the desired CHUNK_SIZE below.  Most people will probably want
# this set for 650 or 700 Mb more or less permanently.
# Do NOT USE the large size unless you are CERTAIN that your burners and
# readers can handle it!  Stick with the 2nd one if you aren't sure:

# For 700Mb = 1024*1024*700 = 734003200 bytes, uncomment next line:
#CHUNK_SIZE=720000000 # To fit on a 700Mb CD-R with about 14Mb to spare.

# For 650Mb = 1024*1024*650 = 681574400 bytes, uncomment next line:
CHUNK_SIZE=670000000 # To fit on a std 650Mb CD-R with about 11Mb to spare.

# And change this to set your desired backup dir:
BKUP_DIR=/usr/local/backups
#===========================================================================

ARC_NAME="$BKUP_DIR/`date '+%Y%m%d%H%M'`.cpio"
BKUP_LIST="$BKUP_DIR/list"
FULL_LIST="$BKUP_DIR/full_list"
ERRS="$BKUP_DIR/errors"
FIND_ERRS="$BKUP_DIR/find_errors"
FIND_ARGS=" -depth "
CPIO_ARGS=" -o -H crc "
DATE_STAMP=`date '+%Y%m%d%H'`

if [ $# -gt 0 ]
then
	FIND_ARGS="$FIND_ARGS -mtime -${1} "
fi

if [ -f $BKUP_LIST ]
then
	mv $BKUP_LIST ${BKUP_LIST}.$DATE_STAMP
fi

cd /

# The initial unrestricted 'find' just records a full list of all files
# on the system in case we want to know later.  The script doesn't use it.
# So comment this line in or out as required (I normally leave it off):
#find . $FIND_ARGS  > $FULL_LIST 2>/dev/null

#===========================================================================
#                             ALSO PLAY HERE
#===========================================================================
#
# Adjust the sed stuff below to suit YOUR system.  Each exclusion phrase
# is preceded by a '-e', and all lines but the last should have a
# terminating '\'.
# Also don't forget that path slashes (ie: /) in each sed RE need to include
# a preceding '\' (escape).  The un-escaped ones are sed's RE deliniation.
#
#===========================================================================

# Using "find", we now create the list of wanted files.  This is everything
# below "/" except for a few notable exclusions that we remove via sed.

find . $FIND_ARGS 2>$FIND_ERRS | sed -e '/src\/XFree86/d' \
-e '/\/lost+found\//d' -e '/\/nobackup\//d' -e '/^\.\/dev\//d' \
-e '/\/\.old\//d' -e '/\/backups\//d' -e '/\/old\//d' -e '/\/tmp\//d' \
-e '/^\.\/usr\/share\//d' -e '/^\.\/usr\/src\//d' -e '/^\.\/usr\/doc\//d' \
-e '/^\.\/proc\//d' -e '/^\.\/cdrom\//d' -e '/^\.\/mnt\//d' \
-e '/\/bak\//d' -e '/\/apache\/logs\/200/d' \
-e '/\/apache\/logs\/webalizer\/archives\/done/d' \
-e '/\/junk\//d' -e '/\/old_stuff\//d' > $BKUP_LIST

# Now pipe the list into cpio and through fsplit to build the archive chunks:

cat $BKUP_LIST |cpio $CPIO_ARGS 2>$ERRS | fsplit -s $CHUNK_SIZE -o ${ARC_NAME}

# Script ends.