NEWER Apple Downloader Script.

Code:
#!/bin/bash

# Download Movie trailers from Apple - downloads a single trailer per movie in Apple's traler XML feed

# specify whether or not to get HD trailers - download priority is 1080p > 720p > 480p > standard 640 wide
GET1080p=0
GET720p=0
GET480p=1

GETPOSTER=1

FEEDS="http://www.apple.com/trailers/home/xml/current.xml"

#for local testing specify a file instead of hitting the net for the feed
#FEEDS="./Apple640Trailers.xml"

# define programs
XMLSTARLET='xml'
AWK='gawk'

# hard-coded file extension for saved videos
# ideally we'd preserve the extension of the original movie file and only add the ".trailer" before it
#  (in case there's anything other than .mov)
BEXTENSION=".trailer.mov"

# save location for the individual trailer folders
SAVEPATH="v:/Movies/zzztrailertest/"

#save path for the tracker file below
DLDBPATH="./"

# text file to keep track of completed downloads to prevent getting the same trailer the next time script runs
tail -5000 $DLDBPATH.downloaded.db > $DLDBPATH.downloaded.db.tmp
mv $DLDBPATH.downloaded.db.tmp $DLDBPATH.downloaded.db


# this cleans passed content of characters that are invalid for Windows filenames and some which are valid but unwanted
FILECLEANER_AWK='
{
        ## some html escapes:
        gsub(">",">")
        gsub("&lt;","<")
        gsub("&quot;","\"")
        gsub("&rdquo;","\"")
        gsub("&bdquo;","\"")
        gsub("&lsquo;","\"")
        gsub("&rsquo;","\"")
	gsub("&sbquo;",",")
        gsub("&amp;","\\&")

	## replace fancy "smart" quotes with straight equivalents
        gsub("’","'"'"'")
        gsub("‘","'"'"'")

        gsub("“","\"")
        gsub("”","\"")

        gsub("„","\"")
        gsub("„","\"")

		
	## backquote to apostrophe
	gsub("`","'"'"'")	

	## double quote to apostrophe
        gsub("\"","'"'"'")
		
	## select illegal filename characaters replaced by alternates
        gsub(">",")")
        gsub("<","(")
        gsub("[:]"," - ")
        gsub("[/]","-")
		## backslash to dash
	gsub("\\\\","-")

        gsub("[?]","")
        gsub("[|]","-")
        gsub("*","+")
	
        ## double space to single space (we may have created a double space in a previous substitution)
        gsub("  "," ")


        ## sanitize the rest:
        ## gsub("[^- '"'"'[:alnum:] _$+&={}\\[\\]()%@!;,.]*","")

	gsub("^[[:blank:]]*", "")
	gsub("[[:blank:]]*$", "")

        ## dump it to stdout
        print
}
'

# main loop - passes once per feed specified above
for FEEDURL in $FEEDS; do
 

# set of partial movie metadata - only the fields we need for downloading, saving & tracking the video/image files.
IFS=$'\n' TRAILERS=(`$XMLSTARLET sel --net -E utf-8 -D -T -t -m "/records/movieinfo" \
	-v "@id" -o '&#9;' \
	-v "info/title" -o '&#9;' \
	-v "info/postdate" -o '&#9;' \
	-v "preview/large" -o '&#9;' \
	-v "poster/xlarge" --nl \
	$FEEDURL 2>/dev/null`)

# complete set of movie metadata to be saved out one file per video later - one record per line
IFS=$'\n' movieFields=(`$XMLSTARLET sel --net -E utf-8 -t -m "/records/movieinfo" \
	-c "." \
	--nl \
	$FEEDURL 2>/dev/null`)

recordDATE=`$XMLSTARLET sel --net -D -T -t -m "/records" \
	-v "@date" \
	$FEEDURL 2>/dev/null`


# individual feed loop - passes once per movie in feed
count=-1
for MOVIE in "${TRAILERS[@]}"; do
	# bash (and ksh and zsh) can do math this way
	count=$(($count+1))

	# notice I set the delimiter with an argument instead of in a BEGIN
	MOVIEID=`echo $MOVIE | $AWK -F'\t' '{ print $1 }' 2>/dev/null`

	MOVIETITLE=`echo $MOVIE | $AWK -F'\t' '{ print $2 }' 2>/dev/null`
		# giving the script as an argument instead of a file containing the script
		MOVIETITLEFILE=`echo "$MOVIETITLE" | $AWK "${FILECLEANER_AWK}"`

	POSTDATE=`echo $MOVIE | $AWK -F'\t' '{ print $3 }' 2>/dev/null`

	# web path to the video file referenced in the feed xml
	PREVIEW=`echo $MOVIE | $AWK -F'\t' '{ print $4 }' 2>/dev/null`

	# filename substitutions to allow getting HD versions of the referenced file
	#   HARD CODED - need logic if referenced names have extensions other than "h640w.mov"
	PREVIEW1080p=${PREVIEW%%h640w.mov}h1080p.mov
	PREVIEW720p=${PREVIEW%%h640w.mov}a720p.mov
	PREVIEW480p=${PREVIEW%%h640w.mov}h480p.mov

	# web path to the poster file
	POSTER=`echo $MOVIE | $AWK -F'\t' '{ print $5 }' 2>/dev/null`

	# new local filename to save poster file
	NEWPOSTERNAME="folder.jpg"

	# added braces around the variable names for clarity
	MOVIESAVEPATH="${SAVEPATH}${MOVIETITLEFILE}"


# create a folder for the downloaded files (using the movie's cleaned name)
if ! grep -q "###$MOVIEID.PREVIEW" $DLDBPATH.downloaded.db; then
	mkdir -p $MOVIESAVEPATH
fi

# save the trailer's XML data to its own file within the trailer's folder
echo -e "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<records date=\"$recordDATE\">${movieFields[$count]}</records>" >$MOVIESAVEPATH/temp.xml

# reformat the XML to make it human-readable
`$XMLSTARLET format $MOVIESAVEPATH/temp.xml >$MOVIESAVEPATH/description.xml`
`rm $MOVIESAVEPATH/temp.xml`

# get and save a 1080p (1920x...) resolution video file
if [ "$GET1080p" -eq "1" ]; then
	if ! grep -q "###$MOVIEID.PREVIEW" $DLDBPATH.downloaded.db; then

		# new local filename to save video file
		NEWPREVIEWNAME="${MOVIETITLEFILE} [1080p]${BEXTENSION}"

		wget -c -O "$MOVIESAVEPATH/$NEWPREVIEWNAME" $PREVIEW1080p; PREVIEWOUT1080p=$?
                if [ $PREVIEWOUT1080p -eq 0 ]; then
                        echo "###$MOVIEID.PREVIEW $NEWPREVIEWNAME" >> $DLDBPATH.downloaded.db
                else
                        echo "##### ID:$MOVIEID URL:$PREVIEW1080p FAILED -- TRYING NEXT LOWER SIZE"
                fi
	fi
fi


# or get and save a 720p (1280x...) resolution video file
if [ "$GET720p" -eq "1" ]; then
	if ! grep -q "###$MOVIEID.PREVIEW" $DLDBPATH.downloaded.db; then

		# new local filename to save video file
		NEWPREVIEWNAME="${MOVIETITLEFILE} [720p]${BEXTENSION}"

		wget -c -O "$MOVIESAVEPATH/$NEWPREVIEWNAME" $PREVIEW720p; PREVIEWOUT720p=$?
                if [ $PREVIEWOUT720p -eq 0 ]; then
                        echo "###$MOVIEID.PREVIEW $NEWPREVIEWNAME" >> $DLDBPATH.downloaded.db
                else
                        echo "##### ID:$MOVIEID URL:$PREVIEW720p FAILED -- TRYING NEXT LOWER SIZE"
                fi
	fi
fi

# or get and save a 480p (848x...) resolution video file
if [ "$GET480p" -eq "1" ]; then
	if ! grep -q "###$MOVIEID.PREVIEW" $DLDBPATH.downloaded.db; then

		# new local filename to save video file
		NEWPREVIEWNAME="${MOVIETITLEFILE} [480p]${BEXTENSION}"

		wget -c -O "$MOVIESAVEPATH/$NEWPREVIEWNAME" $PREVIEW480p; PREVIEWOUT480p=$?
                if [ $PREVIEWOUT480p -eq 0 ]; then
                        echo "###$MOVIEID.PREVIEW $NEWPREVIEWNAME" >> $DLDBPATH.downloaded.db
                else
                        echo "##### ID:$MOVIEID URL:$PREVIEW480p FAILED -- TRYING STANDARD SIZE"
                fi
	fi
fi


	# or get and save the standard (640x...) resolution video file as referenced in the XML feed
	if ! grep -q "###$MOVIEID.PREVIEW" $DLDBPATH.downloaded.db; then

		# new local filename to save video file
		NEWPREVIEWNAME="${MOVIETITLEFILE}${BEXTENSION}"
	
		wget -c -O "$MOVIESAVEPATH/$NEWPREVIEWNAME" $PREVIEW; PREVIEWOUT=$?
    		if [ $PREVIEWOUT -eq 0 ]; then   
      		 	echo "###$MOVIEID.PREVIEW $NEWPREVIEWNAME" >> $DLDBPATH.downloaded.db
  		else
			echo "##### ID:$MOVIEID URL:$PREVIEW FAILED -- RETRY NEXT RUN"
		fi
	else
		echo "##### Trailer ID:$MOVIEID NAME:$MOVIETITLE MARKED DONE  -- SKIPPING"
	fi


	# get and save the movie poster image
	if [ "$GETPOSTER" -eq "1" ]; then
	 if ! grep -q "###$MOVIEID.POSTER" $DLDBPATH.downloaded.db; then
		wget -c -O "$MOVIESAVEPATH/$NEWPOSTERNAME" $POSTER; POSTEROUT=$?
		if [ $POSTEROUT -eq 0 ]; then
			echo "###$MOVIEID.POSTER $NEWPOSTERNAME" >> $DLDBPATH.downloaded.db
		else
			echo "##### $ID:$MOVIEID URL:$POSTER FAILED -- RETRY NEXT RUN"
		fi
	else
		echo "##### Poster ID:$MOVIEID NAME:$MOVIETITLE MARKED DONE -- SKIPPING"
	 fi
	fi

done

done


Edited by hybrid8 (16/01/2009 17:46)
Edit Reason: Included Bitt's string-replace changes plus fixed missing title on output of SKIPPED messages
_________________________
Bruno
Twisted Melon : Fine Mac OS Software