1
0
Fork 0
download_podcast.py/download_podcast.py

99 lines
3.6 KiB
Python
Executable File

#!/usr/bin/env python
from __future__ import print_function
import datetime
import feedparser
import time
import os
import sys
from six.moves import urllib
from contextlib import closing
# From http://stackoverflow.com/a/1160227
if sys.version_info < (3, 3):
def touch(fname, mode=0o666, dir_fd=None, **kwargs):
flags = os.O_CREAT | os.O_APPEND
times = kwargs['times'] if 'times' in kwargs else None
with os.fdopen(os.open(fname, flags, mode)) as f:
os.utime(fname, times)
else:
def touch(fname, mode=0o666, dir_fd=None, **kwargs):
flags = os.O_CREAT | os.O_APPEND
with os.fdopen(os.open(fname, flags=flags, mode=mode, dir_fd=dir_fd)) as f:
os.utime(f.fileno() if os.utime in os.supports_fd else fname,
dir_fd=None if os.supports_fd else dir_fd, **kwargs)
# From http://stackoverflow.com/a/7244263
def downloadFile(url, file_name):
# Download the file from `url` and save it locally under `file_name`:
with closing(urllib.request.urlopen(url)) as response, open(file_name, 'wb') as out_file:
data = response.read() # a `bytes` object
out_file.write(data)
def downloadAll(feedURL):
feed = feedparser.parse(feedURL)
print("Processing feed %s..." % feed['feed']['title'])
for post in reversed(feed.entries):
print("Processing item %s... " % post.title, end="")
if len(post.enclosures) != 1:
print("Post has %d enclosures, not 1. Skipping post."
% len(post.enclosures))
else:
media = post.enclosures[0]
mediaURL = media.href
# cut the extension off the end to use in the filename
mediaExt = mediaURL[mediaURL.rfind('.')+1:]
quesPos = mediaExt.find('?')
if quesPos != -1:
mediaExt = mediaExt[:quesPos]
mediaSize = int(media.length) # media.length is of type str
filename = "%s.%s" % (post.title.replace('/', '_'), mediaExt)
stat = os.stat(filename) if os.path.isfile(filename) else None
if stat and stat.st_size > 0\
and (stat.st_size == mediaSize or mediaSize == 0):
print("File already downloaded. Skipping.")
else:
if stat and stat.st_size != mediaSize:
print("Incorrect file found. Redownloading... ", end="")
sys.stdout.flush()
else:
print("Downloading... ", end="")
sys.stdout.flush()
# Download the file...
downloadFile(mediaURL, filename)
print("Done.")
# From http://stackoverflow.com/a/1697907
pubTimestamp = time.mktime(post.published_parsed)
# ... and set its created time to the publication time.
touch(filename, times=(stat.st_atime if stat else pubTimestamp,
pubTimestamp))
_feedURLfilename = ".podcast_source"
if __name__ == "__main__":
import sys
# Remember the feed URL in a hidden file named _feedURLfilename
# Always use the command-line URL if given and remember it in that file.
if len(sys.argv) == 2:
feedURL = sys.argv[1]
with open(_feedURLfilename, 'w') as feedURLfile:
feedURLfile.write(feedURL)
elif len(sys.argv) == 1 and os.path.isfile(_feedURLfilename):
with open(_feedURLfilename, 'r') as feedURLfile:
feedURL = feedURLfile.read()
else:
print("USAGE: %s feedURL" % sys.argv[0])
sys.exit(1)
downloadAll(feedURL)