#!/bin/bash # wgetter2.bash # Author: Little Monster [monster@monstruum.co.uk] # ==> Used in ABS Guide with permission of script author. # ==> This script still needs debugging and fixups (exercise for reader). # ==> It could also use some additional editing in the comments. # This is wgetter2 -- #+ a Bash script to make wget a bit more friendly, and save typing. # Carefully crafted by Little Monster. # More or less complete on 02/02/2005. # If you think this script can be improved, #+ email me at: monster@monstruum.co.uk # ==> and cc: to the author of the ABS Guide, please. # This script is licenced under the GPL. # You are free to copy, alter and re-use it, #+ but please don't try to claim you wrote it. # Log your changes here instead. # ======================================================================= # changelog: # 07/02/2005. Fixups by Little Monster. # 02/02/2005. Minor additions by Little Monster. # (See after # +++++++++++ ) # 29/01/2005. Minor stylistic edits and cleanups by author of ABS Guide. # Added exit error codes. # 22/11/2004. Finished initial version of second version of wgetter: # wgetter2 is born. # 01/12/2004. Changed 'runn' function so it can be run 2 ways -- # either ask for a file name or have one input on the CL. # 01/12/2004. Made sensible handling of no URL's given. # 01/12/2004. Made loop of main options, so you don't # have to keep calling wgetter 2 all the time. # Runs as a session instead. # 01/12/2004. Added looping to 'runn' function. # Simplified and improved. # 01/12/2004. Added state to recursion setting. # Enables re-use of previous value. # 05/12/2004. Modified the file detection routine in the 'runn' function # so it's not fooled by empty values, and is cleaner. # 01/02/2004. Added cookie finding routine from later version (which # isn't ready yet), so as not to have hard-coded paths. # ======================================================================= # Error codes for abnormal exit. E_USAGE=67 # Usage message, then quit. E_NO_OPTS=68 # No command-line args entered. E_NO_URLS=69 # No URLs passed to script. E_NO_SAVEFILE=70 # No save filename passed to script. E_USER_EXIT=71 # User decides to quit. # Basic default wget command we want to use. # This is the place to change it, if required. # NB: if using a proxy, set http_proxy = yourproxy in .wgetrc. # Otherwise delete --proxy=on, below. # ==================================================================== CommandA="wget -nc -c -t 5 --progress=bar --random-wait --proxy=on -r" # ==================================================================== # -------------------------------------------------------------------- # Set some other variables and explain them. pattern=" -A .jpg,.JPG,.jpeg,.JPEG,.gif,.GIF,.htm,.html,.shtml,.php" # wget's option to only get certain types of file. # comment out if not using today=`date +%F` # Used for a filename. home=$HOME # Set HOME to an internal variable. # In case some other path is used, change it here. depthDefault=3 # Set a sensible default recursion. Depth=$depthDefault # Otherwise user feedback doesn't tie in properly. RefA="" # Set blank referring page. Flag="" # Default to not saving anything, #+ or whatever else might be wanted in future. lister="" # Used for passing a list of urls directly to wget. Woptions="" # Used for passing wget some options for itself. inFile="" # Used for the run function. newFile="" # Used for the run function. savePath="$home/w-save" Config="$home/.wgetter2rc" # This is where some variables can be stored, #+ if permanently changed from within the script. Cookie_List="$home/.cookielist" # So we know where the cookies are kept . . . cFlag="" # Part of the cookie file selection routine. # Define the options available. Easy to change letters here if needed. # These are the optional options; you don't just wait to be asked. save=s # Save command instead of executing it. cook=c # Change cookie file for this session. help=h # Usage guide. list=l # Pass wget the -i option and URL list. runn=r # Run saved commands as an argument to the option. inpu=i # Run saved commands interactively. wopt=w # Allow to enter options to pass directly to wget. # -------------------------------------------------------------------- if [ -z "$1" ]; then # Make sure we get something for wget to eat. echo "You must at least enter a URL or option!" echo "-$help for usage." exit $E_NO_OPTS fi # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # added added added added added added added added added added added added if [ ! -e "$Config" ]; then # See if configuration file exists. echo "Creating configuration file, $Config" echo "# This is the configuration file for wgetter2" > "$Config" echo "# Your customised settings will be saved in this file" >> "$Config" else source $Config # Import variables we set outside the script. fi if [ ! -e "$Cookie_List" ]; then # Set up a list of cookie files, if there isn't one. echo "Hunting for cookies . . ." find -name cookies.txt >> $Cookie_List # Create the list of cookie files. fi # Isolate this in its own 'if' statement, #+ in case we got interrupted while searching. if [ -z "$cFlag" ]; then # If we haven't already done this . . . echo # Make a nice space after the command prompt. echo "Looks like you haven't set up your source of cookies yet." n=0 # Make sure the counter #+ doesn't contain random values. while read; do Cookies[$n]=$REPLY # Put the cookie files we found into an array. echo "$n) ${Cookies[$n]}" # Create a menu. n=$(( n + 1 )) # Increment the counter. done < $Cookie_List # Feed the read statement. echo "Enter the number of the cookie file you want to use." echo "If you won't be using cookies, just press RETURN." echo echo "I won't be asking this again. Edit $Config" echo "If you decide to change at a later date" echo "or use the -${cook} option for per session changes." read if [ ! -z $REPLY ]; then # User didn't just press return. Cookie=" --load-cookies ${Cookies[$REPLY]}" # Set the variable here as well as in the config file. echo "Cookie=\" --load-cookies ${Cookies[$REPLY]}\"" >> $Config fi echo "cFlag=1" >> $Config # So we know not to ask again. fi # end added section end added section end added section end added section # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # Another variable. # This one may or may not be subject to variation. # A bit like the small print. CookiesON=$Cookie # echo "cookie file is $CookiesON" # For debugging. # echo "home is ${home}" # For debugging. # Got caught with this one! wopts() { echo "Enter options to pass to wget." echo "It is assumed you know what you're doing." echo echo "You can pass their arguments here too." # That is to say, everything passed here is passed to wget. read Wopts # Read in the options to be passed to wget. Woptions=" $Wopts" # ^ Why the leading space? # Assign to another variable. # Just for fun, or something . . . echo "passing options ${Wopts} to wget" # Mainly for debugging. # Is cute. return } save_func() { echo "Settings will be saved." if [ ! -d $savePath ]; then # See if directory exists. mkdir $savePath # Create the directory to save things in #+ if it isn't already there. fi Flag=S # Tell the final bit of code what to do. # Set a flag since stuff is done in main. return } usage() # Tell them how it works. { echo "Welcome to wgetter. This is a front end to wget." echo "It will always run wget with these options:" echo "$CommandA" echo "and the pattern to match: $pattern \ (which you can change at the top of this script)." echo "It will also ask you for recursion depth, \ and if you want to use a referring page." echo "Wgetter accepts the following options:" echo "" echo "-$help : Display this help." echo "-$save : Save the command to a file $savePath/wget-($today) \ instead of running it." echo "-$runn : Run saved wget commands instead of starting a new one -" echo "Enter filename as argument to this option." echo "-$inpu : Run saved wget commands interactively --" echo "The script will ask you for the filename." echo "-$cook : Change the cookies file for this session." echo "-$list : Tell wget to use URL's from a list instead of \ from the command line." echo "-$wopt : Pass any other options direct to wget." echo "" echo "See the wget man page for additional options \ you can pass to wget." echo "" exit $E_USAGE # End here. Don't process anything else. } list_func() # Gives the user the option to use the -i option to wget, #+ and a list of URLs. { while [ 1 ]; do echo "Enter the name of the file containing URL's (press q to change your mind)." read urlfile if [ ! -e "$urlfile" ] && [ "$urlfile" != q ]; then # Look for a file, or the quit option. echo "That file does not exist!" elif [ "$urlfile" = q ]; then # Check quit option. echo "Not using a url list." return else echo "using $urlfile." echo "If you gave url's on the command line, I'll use those first." # Report wget standard behaviour to the user. lister=" -i $urlfile" # This is what we want to pass to wget. return fi done } cookie_func() # Give the user the option to use a different cookie file. { while [ 1 ]; do echo "Change the cookies file. Press return if you don't want to change it." read Cookies # NB: this is not the same as Cookie, earlier. # There is an 's' on the end. # Bit like chocolate chips. if [ -z "$Cookies" ]; then # Escape clause for wusses. return elif [ ! -e "$Cookies" ]; then echo "File does not exist. Try again." # Keep em going . . . else CookiesON=" --load-cookies $Cookies" # File is good -- use it! return fi done } run_func() { if [ -z "$OPTARG" ]; then # Test to see if we used the in-line option or the query one. if [ ! -d "$savePath" ]; then # If directory doesn't exist . . . echo "$savePath does not appear to exist." echo "Please supply path and filename of saved wget commands:" read newFile until [ -f "$newFile" ]; do # Keep going till we get something. echo "Sorry, that file does not exist. Please try again." # Try really hard to get something. read newFile done # ----------------------------------------------------------------------- # if [ -z ( grep wget ${newfile} ) ]; then # Assume they haven't got the right file and bail out. # echo "Sorry, that file does not contain wget commands. Aborting." # exit # fi # # This is bogus code. # It doesn't actually work. # If anyone wants to fix it, feel free! # ----------------------------------------------------------------------- filePath="${newFile}" else echo "Save path is $savePath" echo "Please enter name of the file which you want to use." echo "You have a choice of:" ls $savePath # Give them a choice. read inFile until [ -f "$savePath/$inFile" ]; do # Keep going till #+ we get something. if [ ! -f "${savePath}/${inFile}" ]; then # If file doesn't exist. echo "Sorry, that file does not exist. Please choose from:" ls $savePath # If a mistake is made. read inFile fi done filePath="${savePath}/${inFile}" # Make one variable . . . fi else filePath="${savePath}/${OPTARG}" # Which can be many things . . . fi if [ ! -f "$filePath" ]; then # If a bogus file got through. echo "You did not specify a suitable file." echo "Run this script with the -${save} option first." echo "Aborting." exit $E_NO_SAVEFILE fi echo "Using: $filePath" while read; do eval $REPLY echo "Completed: $REPLY" done < $filePath # Feed the actual file we are using into a 'while' loop. exit } # Fish out any options we are using for the script. # This is based on the demo in "Learning The Bash Shell" (O'Reilly). while getopts ":$save$cook$help$list$runn:$inpu$wopt" opt do case $opt in $save) save_func;; # Save some wgetter sessions for later. $cook) cookie_func;; # Change cookie file. $help) usage;; # Get help. $list) list_func;; # Allow wget to use a list of URLs. $runn) run_func;; # Useful if you are calling wgetter from, #+ for example, a cron script. $inpu) run_func;; # When you don't know what your files are named. $wopt) wopts;; # Pass options directly to wget. \?) echo "Not a valid option." echo "Use -${wopt} to pass options directly to wget," echo "or -${help} for help";; # Catch anything else. esac done shift $((OPTIND - 1)) # Do funky magic stuff with $#. if [ -z "$1" ] && [ -z "$lister" ]; then # We should be left with at least one URL #+ on the command line, unless a list is #+ being used -- catch empty CL's. echo "No URL's given! You must enter them on the same line as wgetter2." echo "E.g., wgetter2 http://somesite http://anothersite." echo "Use $help option for more information." exit $E_NO_URLS # Bail out, with appropriate error code. fi URLS=" $@" # Use this so that URL list can be changed if we stay in the option loop. while [ 1 ]; do # This is where we ask for the most used options. # (Mostly unchanged from version 1 of wgetter) if [ -z $curDepth ]; then Current="" else Current=" Current value is $curDepth" fi echo "How deep should I go? \ (integer: Default is $depthDefault.$Current)" read Depth # Recursion -- how far should we go? inputB="" # Reset this to blank on each pass of the loop. echo "Enter the name of the referring page (default is none)." read inputB # Need this for some sites. echo "Do you want to have the output logged to the terminal" echo "(y/n, default is yes)?" read noHide # Otherwise wget will just log it to a file. case $noHide in # Now you see me, now you don't. y|Y ) hide="";; n|N ) hide=" -b";; * ) hide="";; esac if [ -z ${Depth} ]; then # User accepted either default or current depth, #+ in which case Depth is now empty. if [ -z ${curDepth} ]; then # See if a depth was set on a previous iteration. Depth="$depthDefault" # Set the default recursion depth if nothing #+ else to use. else Depth="$curDepth" # Otherwise, set the one we used before. fi fi Recurse=" -l $Depth" # Set how deep we want to go. curDepth=$Depth # Remember setting for next time. if [ ! -z $inputB ]; then RefA=" --referer=$inputB" # Option to use referring page. fi WGETTER="${CommandA}${pattern}${hide}${RefA}${Recurse}\ ${CookiesON}${lister}${Woptions}${URLS}" # Just string the whole lot together . . . # NB: no embedded spaces. # They are in the individual elements so that if any are empty, #+ we don't get an extra space. if [ -z "${CookiesON}" ] && [ "$cFlag" = "1" ] ; then echo "Warning -- can't find cookie file" # This should be changed, #+ in case the user has opted to not use cookies. fi if [ "$Flag" = "S" ]; then echo "$WGETTER" >> $savePath/wget-${today} # Create a unique filename for today, or append to it if it exists. echo "$inputB" >> $savePath/site-list-${today} # Make a list, so it's easy to refer back to, #+ since the whole command is a bit confusing to look at. echo "Command saved to the file $savePath/wget-${today}" # Tell the user. echo "Referring page URL saved to the file$ \ savePath/site-list-${today}" # Tell the user. Saver=" with save option" # Stick this somewhere, so it appears in the loop if set. else echo "*****************" echo "*****Getting*****" echo "*****************" echo "" echo "$WGETTER" echo "" echo "*****************" eval "$WGETTER" fi echo "" echo "Starting over$Saver." echo "If you want to stop, press q." echo "Otherwise, enter some URL's:" # Let them go again. Tell about save option being set. read case $REPLY in # Need to change this to a 'trap' clause. q|Q ) exit $E_USER_EXIT;; # Exercise for the reader? * ) URLS=" $REPLY";; esac echo "" done exit 0