Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@
*~
\#*#
.#*
.gog*
2 changes: 1 addition & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ run ``gog-backup --help``.
downloading files (perhaps waiting an appropriate period between
invocations if GOG.com is experiencing high load). By default, game
files are placed in per-game sub-directories of the current working
directory.
directory. Use --all to avoid prompting for input.

Therefore, the simplest command flow would be to first ``login``, then
download a ``manifest``, then ``fetch`` one or more times.
Expand Down
173 changes: 112 additions & 61 deletions gog-backup
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,9 @@

Possible commands:

login <email> <password>
login <email> [<password>]
Login to GOG.com using the supplied email and password.
If password is not supplied, user is promted after start.

manifest
Create a manifest describing all games and extras owned by the
Expand All @@ -38,34 +39,40 @@ Possible commands:
and print a report on what must be downloaded. 'compare' validates
everything; 'update' only validates new files.

fetch
fetch [--all|-a]
Download any missing, incomplete, or corrupted files (after an
implicit 'update').
implicit 'update'). If --all is specified, dowloads every game, else
prompts for comfirmation.
"""

__author__ = 'Evan Powers'
__version__ = '1.0'
__url__ = 'https://github.com/evanpowers/gog-backup'

import sys, os, contextlib, pprint, hashlib, zipfile
import sys, os, codecs, contextlib, pprint, hashlib, locale, zipfile
import cookielib, urllib, urllib2, urlparse
import xml.etree.ElementTree
import threading, Queue, time
import json
from getpass import getpass

import html5lib # http://code.google.com/p/html5lib/

BACKUPINTO = '.'
CONCURRENCY = 6
BREADTHFIRST = False
FETCHCOVERS = False
FETCHCOVERS = True
PATHMAP = '.gog.pathmap.txt'
COOKIES = '.gog.cookies'
MANIFEST = '.gog.games.py'
VALIDATED = '.gog.valid.py'

LOGIN = 'https://www.gog.com/en/login'
SHELF = 'https://www.gog.com/en/myaccount/shelf'
LIST = 'https://www.gog.com/en/myaccount/list'
LOGIN = 'https://secure.gog.com/login'
AJAX_URL = 'http://www.gog.com/user/ajax/'
ACCOUNT_AJAX_URL = 'https://secure.gog.com/en/account/ajax'
SHELF = 'https://secure.gog.com/account/games/shelf'
# match games to covers on the shelf page
LIST = 'https://secure.gog.com/account/games/list'
THUMB = 'http://www.gog.com'

pathmap = {}
Expand All @@ -74,10 +81,12 @@ cookieproc = urllib2.HTTPCookieProcessor(cookiejar)
opener = urllib2.build_opener(cookieproc)
treebuilder = html5lib.treebuilders.getTreeBuilder('etree')
parser = html5lib.HTMLParser(tree=treebuilder, namespaceHTMLElements=False)
sys.stdout = codecs.getwriter(locale.getpreferredencoding())(sys.stdout)

useragent = 'gog-backup/%s (%s)' % (__version__, __url__)
opener.addheaders = [('User-agent', useragent)]


class AttrDict(dict):
def __init__(self, **kw):
self.update(kw)
Expand Down Expand Up @@ -112,14 +121,16 @@ def load_attrdicts(fn):
except IOError:
return AttrDict()

def locate(g, f):
path = os.path.join(pathmap.get(g.key, g.key), f.name)
if not os.path.isfile(path) and os.path.isfile(f.name):
return f.name
def locate(g, f=None, is_extra=False):
filename = f.name if f is not None else ""
path = os.path.join(pathmap.get(g.key, g.title),
(is_extra and "extra" or ""), filename)
# what does it do?
if not os.path.isfile(path) and os.path.isfile(filename):
return filename
return path

def md5map(f, path):
print '#', path
H = hashlib.md5()
part = []
with open(path, 'rb') as r:
Expand Down Expand Up @@ -219,57 +230,83 @@ def needed(valid, games, missing, corrupt):

def open_notrunc(name, bufsize=4*1024):
# 'w+' includes O_TRUNC, 'r+' lacks O_CREAT; so, roll my own
fd = os.open(name, os.O_WRONLY | os.O_CREAT | os.O_BINARY, 0666)
flags = os.O_WRONLY | os.O_CREAT
if hasattr(os, "O_BINARY"):
# exists only on windows
flags |= os.O_BINARY
fd = os.open(name, flags, 0666)
return os.fdopen(fd, 'wb', bufsize)

def cmd_login(email, passwd):
def cmd_login(email, passwd=None):
if passwd is None:
passwd = getpass("Password: ")
# Reset cookiejar
cookiejar.clear()
with request(AJAX_URL, args={'a': 'get'}) as json_data:
data = json.load(json_data)
buk = data['buk']

with request(LOGIN, args={'log_email': email,
'log_password': passwd}) as page:
etree = parser.parse(page)
if etree.find(".//div[@id='register_holder']") is not None:
# this element is only present if not logged in
raise RuntimeError('login failed')
'log_password': passwd,
'redirectOk': '/en/',
'unlockSettings': '1',
'buk': buk,
}) as page:
pass

guc_al = None
for c in cookiejar:
if c.name == 'guc_al':
guc_al = c
break
if not guc_al or guc_al.value == '0':
raise RuntimeError("Login failed")

cookiejar.save()

def cmd_manifest():
games = {}

# parse game list and available files for each from list page
with request(LIST) as page:
with request(SHELF) as page:
etree = parser.parse(page)
for game in etree.findall(".//div[@class='tab_1_row']"):
gamecard = game.find(".//div[@class='tab_1_title']/a")
if gamecard is None:
# happens with "Colin McRae Rally 2005", among others
g = game.find(".//div[@class='tab_1_title']/span")
print 'WARNING: no gamecard link for %s?!' % (g.text,)
print ' Did GOG.com stop selling this game?'
continue
with request(LIST) as list_page:
list_etree = parser.parse(list_page)

for game in etree.findall(".//div[@class='shelf_game']"):
g = AttrDict()
g.key = gamecard.attrib['href'].split('/')[-1]
g.title = gamecard.text
g.thumb = THUMB + game.find(".//img[@src]").attrib['src']
g.key = game.attrib['data-gameid']
g.cover = THUMB +\
game.find(".//img[@src]").attrib['src']
with request(ACCOUNT_AJAX_URL, args={'a': 'gamesShelfDetails',
'g': g.key}) as data_request:
game_data = json.load(data_request)
game_element = parser.parse(game_data['details']['html'])
g.title = game_element.find(".//h2/a").text.strip()

# get thumbnail
list_element = list_etree.find(".//div[@id='game_li_{0}']".\
format(g.key))
g.background = THUMB + list_element.attrib['data-background']
g.thumb = THUMB + list_element.find(".//img[@src]").\
attrib['src']
g.setup, g.extra = [], []
for row in game.findall(".//div[@class='sh_o_i_row']"):
f = AttrDict()
f.href = row.attrib.get('onclick', '')
if not 'download/file' in f.href:
f.href = row.find(".//a").attrib['href']
g.setup.append(f)
else:
f.href = f.href[f.href.index('http:') : -1]
f.desc = row.find(".//div[@class='sh_o_i_text']/span").text
g.extra.append(f)
# get setup downloader for windows
for dl_link in game_element.findall((".//div[@class="
"'win-download']/a[@class='list_game_item']")):
g.setup.append(AttrDict(href=dl_link.attrib['href']))

# get bonus material
for dl_link in game_element.findall((".//div[@class="
"'bonus_content_list browser']/a")):
g.extra.append(AttrDict(
href=THUMB + dl_link.attrib['href'],
desc=dl_link.find("./span[@class='light_un']").text)
)
games[g.key] = g

# match games to covers on the shelf page
with request(SHELF) as page:
etree = parser.parse(page)
for game in etree.findall(".//div[@class='shelf_item_h']"):
gamecard = game.find(".//div[@class='shelf_ov_tab_2_title']/a")
if gamecard is not None:
g = games[gamecard.attrib['href'].split('/')[-1]]
g.cover = THUMB + game.find(".//img[@src]").attrib['src']
# if gamecard is not None:
# g = games[gamecard.attrib['href'].split('/')[-1]]
# g.cover = THUMB + game.find(".//img[@src]").attrib['src']

# request a zero-length range from each file to determine
# - the total size (from the Content-Range header)
Expand Down Expand Up @@ -301,19 +338,24 @@ def cmd_manifest():
print >>w, '# %d games' % len(games)
pprint.pprint(games.values(), width=100, stream=w)

# optionally download cover and thumbnail
# optionally download cover, background and thumbnail
if FETCHCOVERS:
def fetch(which, g):
href = getattr(g, which)
path = pathmap.get(g.key, g.key)
path = locate(g)
ext = os.path.splitext(href)[-1]
fn = '%s_%s%s' % (g.key, which, ext)
fn = '{0}{1}'.format(which, ext)
with request(href) as page:
with open(os.path.join(path, fn), 'wb') as w:
path = os.path.join(path, fn)
path_dir = os.path.dirname(path)
if not os.path.isdir(path_dir):
os.mkdir(path_dir)
with open(path, 'wb') as w:
w.write(page.read())
for g in games.values():
fetch('cover', g)
fetch('thumb', g)
fetch('background', g)

def cmd_list():
# summarize the manifest
Expand All @@ -324,10 +366,10 @@ def cmd_list():
ssz = sum(f.size for f in g.setup)
esz = sum(f.size for f in g.extra)
total += ssz + esz
print '%s (%s)' % (g.key, g.title)
print ' %8s in %d setup files' % (megs(ssz), len(g.setup))
print ' %8s in %d extras' % (megs(esz), len(g.extra))
print '%d games, a total of %s' % (len(games), megs(total))
print u'%s (%s)' % (g.key, g.title)
print u' %8s in %d setup files' % (megs(ssz), len(g.setup))
print u' %8s in %d extras' % (megs(esz), len(g.extra))
print u'%d games, a total of %s' % (len(games), megs(total))

def cmd_compare():
# start the comparison from scratch
Expand All @@ -338,19 +380,28 @@ def cmd_update():
valid = dict((v.key, v) for v in load_attrdicts(VALIDATED))
needed(*compare(valid))

def cmd_fetch():
def cmd_fetch(fetch_all=None):
# start an incremental comparison
valid = dict((v.key, v) for v in load_attrdicts(VALIDATED))
_, games, _, _ = compare(valid)
if fetch_all is None:
for game in games[:]:
if raw_input(u"Download {0}? [Y/N]".\
format(game.title)).upper() != 'Y':
games.remove(game)
elif not (all == "--all" or "-a"):
raise RuntimeError("Unknown parameter supplied. Use --all or -a.")

sizes, rates, errors = {}, {}, {}

# build a list of work items
work = Queue.PriorityQueue()
i = -sys.maxint

for g in games:
for f in g.setup + g.extra:
v = valid[(g.key, f.name)]
path = locate(g, f)
path = locate(g, f, f in g.extra)
dn = os.path.dirname(path)
if dn and not os.path.isdir(dn):
os.makedirs(dn)
Expand Down