diff --git a/README.md b/README.md index 78005d5..de6ec76 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ What is this? ------------- A simple Python module that works as a Metacritic API. It uses the [BeautifulSoup][bs] and the [Requests][requests] library. -Pycritic is under development and hasn't been tested enough (just in Python 2.7.3 for now). +Pycritic is under development and hasn't been tested enough (just in Python 3.10 for now). Comming Soon ™ -------------------- @@ -25,15 +25,15 @@ import pycritic scraper = pycritic.Scraper() resource = scraper.get("http://www.metacritic.com/game/pc/fallout-new-vegas") -print resource.name +print(resource.name) # >> Fallout New Vegas -print resource.date +print(resource.date) # >> Oct 19, 2010 -print resource.metascore +print(resource.metascore) # >> 84 -print resource.userscore +print(resource.userscore) # >> 8.0 -print resource.description +print(resource.description) # >> The latest game in the post-nuclear RPG series is being developed by many members of the Fallout 1 and 2 team at Obsidian Entertainment using the Fallout 3 engine. ``` diff --git a/pycritic/basic.py b/pycritic/basic.py new file mode 100644 index 0000000..eb2df16 --- /dev/null +++ b/pycritic/basic.py @@ -0,0 +1,21 @@ +from __future__ import print_function +from builtins import str +import pycritic + +def print_resource_data(resource): + print("Name: " + resource.name) + print("Release date: " + resource.date) + print("Metascore: " + str(resource.metascore)) + print("Userscore: " + str(resource.userscore)) + print("Description: " + resource.description) + + +def main(): + scraper = pycritic.Scraper() + fviir = scraper.get("http://www.metacritic.com/game/playstation-4/final-fantasy-vii-remake") + print_resource_data(fviir) + fallout = scraper.get("https://www.metacritic.com/game/pc/fallout-4") + print_resource_data(fallout) + +if __name__ == "__main__": + main() diff --git a/pycritic/pycritic.py b/pycritic/pycritic.py index 5032a29..2228d40 100644 --- a/pycritic/pycritic.py +++ b/pycritic/pycritic.py @@ -1,3 +1,7 @@ +from asyncore import write +from builtins import object +from builtins import str +from distutils.file_util import write_file import requests import bs4 @@ -45,8 +49,8 @@ def __init__(self, name, date, category, metascore, userscore, description): class Game(Resource): - def __init__(self, name, date, category, metascore, userscore, description, platform): - super.__init__(name, date, category, metascore, userscore, description) + def __init__(self, name, date, category, metascore, platform): #, userscore, description, platform): + super.__init__(name, date, category, metascore) #, userscore, description) self.platform = platform @@ -61,7 +65,9 @@ def valid(self): class Browser(object): def get(self, url): - request = requests.get(url) + s = requests.Session() + s.headers['User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.131 Safari/537.36' + request = s.get(url) response = Response(request.status_code, request.content) return response @@ -74,7 +80,7 @@ def __init__(self): def get(self, url): self.response = self.browser.get(url) - self.soup = bs4.BeautifulSoup(self.response.content) + self.soup = bs4.BeautifulSoup(self.response.content, 'html.parser') return self.extract_data() def extract_data(self): @@ -89,9 +95,7 @@ def extract_data(self): def _extract_name(self): titles = self.soup.select(".product_title") - title = titles[0].text - info = title.split("\n") - name = info[1].strip() + name = self.soup.find('a', {'class':'hover_none'}).text return name def _extract_date(self): @@ -104,13 +108,11 @@ def _extract_category(self): return Category.GAME def _extract_metascore(self): - section = self.soup.select(".metascore_wrap")[0] - score = section.select(".score_value")[0].text.strip() - return int(score) + score = self.soup.find('a', {'class':'metascore_anchor'}).text + return float(score) def _extract_userscore(self): - section = self.soup.select(".userscore_wrap")[0] - score = section.select(".score_value")[0].text.strip() + score = self.soup.find_all('a', {'class':'metascore_anchor'})[1].text return float(score) def _extract_description(self): @@ -119,7 +121,7 @@ def _extract_description(self): description = "" if (collapsed): # There's a collapse/expand button expanded = section.select(".blurb_expanded") - description = unicode(collapsed[0].text + expanded[0].text).strip() + description = str(collapsed[0].text + expanded[0].text).strip() else: - description = unicode(section.text.strip()) - return unicode(description) + description = str(section.text.strip()) + return str(description) diff --git a/pycritic/pycritic.py.bak b/pycritic/pycritic.py.bak new file mode 100644 index 0000000..b13ac97 --- /dev/null +++ b/pycritic/pycritic.py.bak @@ -0,0 +1,126 @@ +from builtins import str +import requests +import bs4 + +# It's "seems" a good idea to use this "enum", for now +class Category(object): + ALL = 0 + MOVIE = 1 + GAME = 2 + ALBUM = 3 + TV = 4 + PERSON = 5 + TRAILER = 6 + COMPANY = 7 + +# Contains info about the query to be made +class Query(object): + # Standard constructor (w/ parameters) + def __init__(self, category, terms): + self.category = category + self.terms = terms + self.base_url = "http://www.metacritic.com/search/" + partial_url = {Category.ALL: self.base_url + "all", + Category.MOVIE: self.base_url + "movie", + Category.GAME: self.base_url + "game", + Category.ALBUM: self.base_url + "album", + Category.TV: self.base_url + "tv", + Category.PERSON: self.base_url + "person", + Category.TRAILER: self.base_url + "trailer", + Category.COMPANY: self.base_url + "company"}[self.category] + self.url = partial_url + "/" + terms + "/results" + + # Returns the URL of the created query + def get_url(self): + return self.url + +# This class represents a generic resource found at Metacritic +class Resource(object): + def __init__(self, name, date, category, metascore, userscore, description): + self.name = name + self.date = date + self.category = category + self.metascore = metascore + self.userscore = userscore + self.description = description + + +class Game(Resource): + def __init__(self, name, date, category, metascore, userscore, description, platform): + super.__init__(name, date, category, metascore, userscore, description) + self.platform = platform + + +class Response(object): + def __init__(self, status, content): + self.status = status + self.content = content + + def valid(self): + return (self.status == 200) + + +class Browser(object): + def get(self, url): + request = requests.get(url) + response = Response(request.status_code, request.content) + return response + + +class Scraper(object): + def __init__(self): + self.browser = Browser() + self.response = "" + self.soup = "" + + def get(self, url): + self.response = self.browser.get(url) + self.soup = bs4.BeautifulSoup(self.response.content) + return self.extract_data() + + def extract_data(self): + name = self._extract_name() + date = self._extract_date() + category = self._extract_category() + metascore = self._extract_metascore() + userscore = self._extract_userscore() + description = self._extract_description() + resource = Resource(name, date, category, metascore, userscore, description) + return resource + + def _extract_name(self): + titles = self.soup.select(".product_title") + title = titles[0].text + info = title.split("\n") + name = info[1].strip() + return name + + def _extract_date(self): + dates = self.soup.select(".release_data") + date = dates[0].select(".data")[0].text.strip() + return date + + def _extract_category(self): + # TODO + return Category.GAME + + def _extract_metascore(self): + section = self.soup.select(".metascore_wrap")[0] + score = section.select(".score_value")[0].text.strip() + return int(score) + + def _extract_userscore(self): + section = self.soup.select(".userscore_wrap")[0] + score = section.select(".score_value")[0].text.strip() + return float(score) + + def _extract_description(self): + section = self.soup.select(".product_summary")[0].select(".data")[0] + collapsed = section.select(".blurb_collapsed") + description = "" + if (collapsed): # There's a collapse/expand button + expanded = section.select(".blurb_expanded") + description = unicode(collapsed[0].text + expanded[0].text).strip() + else: + description = unicode(section.text.strip()) + return unicode(description) diff --git a/test/basic.py b/test/basic.py deleted file mode 100644 index 2d2e2e1..0000000 --- a/test/basic.py +++ /dev/null @@ -1,21 +0,0 @@ -import pycritic - -def print_resource_data(resource): - print "Name: " + resource.name - print "Release date: " + resource.date - print "Metascore: " + str(resource.metascore) - print "Userscore: " + str(resource.userscore) - print "Description: " + resource.description - - -def main(): - scraper = pycritic.Scraper() - alien = scraper.get("http://www.metacritic.com/movie/alien") - print_resource_data(alien) - aliens = scraper.get("http://www.metacritic.com/movie/aliens") - print_resource_data(aliens) - fallout = scraper.get("http://www.metacritic.com/game/pc/fallout-new-vegas") - print_resource_data(fallout) - -if __name__ == "__main__": - main()