From 3791d2e14b1e7b13d0bf3aa405fdfa97ed11dde0 Mon Sep 17 00:00:00 2001 From: Andy Mahoney <32443674+Saious119@users.noreply.github.com> Date: Wed, 3 Aug 2022 10:47:56 -0400 Subject: [PATCH 1/8] game look up mostly working on python3 --- pycritic/basic.py | 25 +++++++ pycritic/pycritic.py | 39 +++++----- pycritic/pycritic.py.bak | 126 ++++++++++++++++++++++++++++++++ test/{basic.py => basic.py.bak} | 0 4 files changed, 172 insertions(+), 18 deletions(-) create mode 100644 pycritic/basic.py create mode 100644 pycritic/pycritic.py.bak rename test/{basic.py => basic.py.bak} (100%) diff --git a/pycritic/basic.py b/pycritic/basic.py new file mode 100644 index 0000000..63ab4b0 --- /dev/null +++ b/pycritic/basic.py @@ -0,0 +1,25 @@ +from __future__ import print_function +from builtins import str +import pycritic + +def print_resource_data(resource): + print("Name: " + resource.name) + print("Release date: " + resource.date) + print("Metascore: " + str(resource.metascore)) + #print("Userscore: " + str(resource.userscore)) + #print("Description: " + resource.description) + + +def main(): + scraper = pycritic.Scraper() + #alien = scraper.get("http://www.metacritic.com/movie/alien") + #print_resource_data(alien) + #aliens = scraper.get("http://www.metacritic.com/movie/aliens") + #print_resource_data(aliens) + fviir = scraper.get("http://www.metacritic.com/game/playstation-4/final-fantasy-vii-remake") + print_resource_data(fviir) + fallout = scraper.get("https://www.metacritic.com/game/pc/fallout-4") + print_resource_data(fallout) + +if __name__ == "__main__": + main() diff --git a/pycritic/pycritic.py b/pycritic/pycritic.py index 5032a29..98746c9 100644 --- a/pycritic/pycritic.py +++ b/pycritic/pycritic.py @@ -1,3 +1,7 @@ +from asyncore import write +from builtins import object +from builtins import str +from distutils.file_util import write_file import requests import bs4 @@ -35,18 +39,18 @@ def get_url(self): # This class represents a generic resource found at Metacritic class Resource(object): - def __init__(self, name, date, category, metascore, userscore, description): + def __init__(self, name, date, category, metascore): #, userscore, description): self.name = name self.date = date self.category = category self.metascore = metascore - self.userscore = userscore - self.description = description + #self.userscore = userscore + #self.description = description class Game(Resource): - def __init__(self, name, date, category, metascore, userscore, description, platform): - super.__init__(name, date, category, metascore, userscore, description) + def __init__(self, name, date, category, metascore, platform): #, userscore, description, platform): + super.__init__(name, date, category, metascore) #, userscore, description) self.platform = platform @@ -61,7 +65,9 @@ def valid(self): class Browser(object): def get(self, url): - request = requests.get(url) + s = requests.Session() + s.headers['User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.131 Safari/537.36' + request = s.get(url) response = Response(request.status_code, request.content) return response @@ -74,7 +80,7 @@ def __init__(self): def get(self, url): self.response = self.browser.get(url) - self.soup = bs4.BeautifulSoup(self.response.content) + self.soup = bs4.BeautifulSoup(self.response.content, 'html.parser') return self.extract_data() def extract_data(self): @@ -82,16 +88,14 @@ def extract_data(self): date = self._extract_date() category = self._extract_category() metascore = self._extract_metascore() - userscore = self._extract_userscore() - description = self._extract_description() - resource = Resource(name, date, category, metascore, userscore, description) + #userscore = self._extract_userscore() + #description = self._extract_description() + resource = Resource(name, date, category, metascore)#, userscore, description) return resource def _extract_name(self): titles = self.soup.select(".product_title") - title = titles[0].text - info = title.split("\n") - name = info[1].strip() + name = self.soup.find('a', {'class':'hover_none'}).text return name def _extract_date(self): @@ -104,8 +108,7 @@ def _extract_category(self): return Category.GAME def _extract_metascore(self): - section = self.soup.select(".metascore_wrap")[0] - score = section.select(".score_value")[0].text.strip() + score = self.soup.find('a', {'class':'metascore_anchor'}).text return int(score) def _extract_userscore(self): @@ -119,7 +122,7 @@ def _extract_description(self): description = "" if (collapsed): # There's a collapse/expand button expanded = section.select(".blurb_expanded") - description = unicode(collapsed[0].text + expanded[0].text).strip() + description = str(collapsed[0].text + expanded[0].text).strip() else: - description = unicode(section.text.strip()) - return unicode(description) + description = str(section.text.strip()) + return str(description) diff --git a/pycritic/pycritic.py.bak b/pycritic/pycritic.py.bak new file mode 100644 index 0000000..b13ac97 --- /dev/null +++ b/pycritic/pycritic.py.bak @@ -0,0 +1,126 @@ +from builtins import str +import requests +import bs4 + +# It's "seems" a good idea to use this "enum", for now +class Category(object): + ALL = 0 + MOVIE = 1 + GAME = 2 + ALBUM = 3 + TV = 4 + PERSON = 5 + TRAILER = 6 + COMPANY = 7 + +# Contains info about the query to be made +class Query(object): + # Standard constructor (w/ parameters) + def __init__(self, category, terms): + self.category = category + self.terms = terms + self.base_url = "http://www.metacritic.com/search/" + partial_url = {Category.ALL: self.base_url + "all", + Category.MOVIE: self.base_url + "movie", + Category.GAME: self.base_url + "game", + Category.ALBUM: self.base_url + "album", + Category.TV: self.base_url + "tv", + Category.PERSON: self.base_url + "person", + Category.TRAILER: self.base_url + "trailer", + Category.COMPANY: self.base_url + "company"}[self.category] + self.url = partial_url + "/" + terms + "/results" + + # Returns the URL of the created query + def get_url(self): + return self.url + +# This class represents a generic resource found at Metacritic +class Resource(object): + def __init__(self, name, date, category, metascore, userscore, description): + self.name = name + self.date = date + self.category = category + self.metascore = metascore + self.userscore = userscore + self.description = description + + +class Game(Resource): + def __init__(self, name, date, category, metascore, userscore, description, platform): + super.__init__(name, date, category, metascore, userscore, description) + self.platform = platform + + +class Response(object): + def __init__(self, status, content): + self.status = status + self.content = content + + def valid(self): + return (self.status == 200) + + +class Browser(object): + def get(self, url): + request = requests.get(url) + response = Response(request.status_code, request.content) + return response + + +class Scraper(object): + def __init__(self): + self.browser = Browser() + self.response = "" + self.soup = "" + + def get(self, url): + self.response = self.browser.get(url) + self.soup = bs4.BeautifulSoup(self.response.content) + return self.extract_data() + + def extract_data(self): + name = self._extract_name() + date = self._extract_date() + category = self._extract_category() + metascore = self._extract_metascore() + userscore = self._extract_userscore() + description = self._extract_description() + resource = Resource(name, date, category, metascore, userscore, description) + return resource + + def _extract_name(self): + titles = self.soup.select(".product_title") + title = titles[0].text + info = title.split("\n") + name = info[1].strip() + return name + + def _extract_date(self): + dates = self.soup.select(".release_data") + date = dates[0].select(".data")[0].text.strip() + return date + + def _extract_category(self): + # TODO + return Category.GAME + + def _extract_metascore(self): + section = self.soup.select(".metascore_wrap")[0] + score = section.select(".score_value")[0].text.strip() + return int(score) + + def _extract_userscore(self): + section = self.soup.select(".userscore_wrap")[0] + score = section.select(".score_value")[0].text.strip() + return float(score) + + def _extract_description(self): + section = self.soup.select(".product_summary")[0].select(".data")[0] + collapsed = section.select(".blurb_collapsed") + description = "" + if (collapsed): # There's a collapse/expand button + expanded = section.select(".blurb_expanded") + description = unicode(collapsed[0].text + expanded[0].text).strip() + else: + description = unicode(section.text.strip()) + return unicode(description) diff --git a/test/basic.py b/test/basic.py.bak similarity index 100% rename from test/basic.py rename to test/basic.py.bak From 8a6eeb8be9c8313ecafdf6ce9acdd4d7ac282d01 Mon Sep 17 00:00:00 2001 From: Andy Mahoney <32443674+Saious119@users.noreply.github.com> Date: Mon, 8 Aug 2022 22:15:09 -0400 Subject: [PATCH 2/8] works for most of the game stuff --- response.html | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 response.html diff --git a/response.html b/response.html new file mode 100644 index 0000000..e69de29 From 7580821fd8e9fcd558936d728c593b95a433b119 Mon Sep 17 00:00:00 2001 From: Andy Mahoney <32443674+Saious119@users.noreply.github.com> Date: Mon, 8 Aug 2022 22:18:11 -0400 Subject: [PATCH 3/8] updated readme for the branch --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 78005d5..7f28b46 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ What is this? ------------- A simple Python module that works as a Metacritic API. It uses the [BeautifulSoup][bs] and the [Requests][requests] library. -Pycritic is under development and hasn't been tested enough (just in Python 2.7.3 for now). +Pycritic is under development and hasn't been tested enough (just in Python 3.10 for now). Comming Soon ™ -------------------- From c2f7d860dac697c681c298d81bbcf78effa65464 Mon Sep 17 00:00:00 2001 From: Andy Mahoney <32443674+Saious119@users.noreply.github.com> Date: Mon, 8 Aug 2022 22:19:09 -0400 Subject: [PATCH 4/8] removed empty debug file --- response.html | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 response.html diff --git a/response.html b/response.html deleted file mode 100644 index e69de29..0000000 From 8dbcaf67574f90c0e3bb0c04ff33eeb0fb2c7491 Mon Sep 17 00:00:00 2001 From: Andy Mahoney <32443674+Saious119@users.noreply.github.com> Date: Mon, 8 Aug 2022 23:02:19 -0400 Subject: [PATCH 5/8] fully works now --- pycritic/basic.py | 8 ++------ pycritic/pycritic.py | 15 +++++++-------- 2 files changed, 9 insertions(+), 14 deletions(-) diff --git a/pycritic/basic.py b/pycritic/basic.py index 63ab4b0..eb2df16 100644 --- a/pycritic/basic.py +++ b/pycritic/basic.py @@ -6,16 +6,12 @@ def print_resource_data(resource): print("Name: " + resource.name) print("Release date: " + resource.date) print("Metascore: " + str(resource.metascore)) - #print("Userscore: " + str(resource.userscore)) - #print("Description: " + resource.description) + print("Userscore: " + str(resource.userscore)) + print("Description: " + resource.description) def main(): scraper = pycritic.Scraper() - #alien = scraper.get("http://www.metacritic.com/movie/alien") - #print_resource_data(alien) - #aliens = scraper.get("http://www.metacritic.com/movie/aliens") - #print_resource_data(aliens) fviir = scraper.get("http://www.metacritic.com/game/playstation-4/final-fantasy-vii-remake") print_resource_data(fviir) fallout = scraper.get("https://www.metacritic.com/game/pc/fallout-4") diff --git a/pycritic/pycritic.py b/pycritic/pycritic.py index 98746c9..fe91592 100644 --- a/pycritic/pycritic.py +++ b/pycritic/pycritic.py @@ -39,13 +39,13 @@ def get_url(self): # This class represents a generic resource found at Metacritic class Resource(object): - def __init__(self, name, date, category, metascore): #, userscore, description): + def __init__(self, name, date, category, metascore, userscore, description): self.name = name self.date = date self.category = category self.metascore = metascore - #self.userscore = userscore - #self.description = description + self.userscore = userscore + self.description = description class Game(Resource): @@ -88,9 +88,9 @@ def extract_data(self): date = self._extract_date() category = self._extract_category() metascore = self._extract_metascore() - #userscore = self._extract_userscore() - #description = self._extract_description() - resource = Resource(name, date, category, metascore)#, userscore, description) + userscore = self._extract_userscore() + description = self._extract_description() + resource = Resource(name, date, category, metascore, userscore, description) return resource def _extract_name(self): @@ -112,8 +112,7 @@ def _extract_metascore(self): return int(score) def _extract_userscore(self): - section = self.soup.select(".userscore_wrap")[0] - score = section.select(".score_value")[0].text.strip() + score = self.soup.find_all('a', {'class':'metascore_anchor'})[1].text return float(score) def _extract_description(self): From d56d937a5e311c386ef4399965e24230576926f8 Mon Sep 17 00:00:00 2001 From: Andy Mahoney <32443674+Saious119@users.noreply.github.com> Date: Mon, 8 Aug 2022 23:06:22 -0400 Subject: [PATCH 6/8] updated for python 3 syntax --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 7f28b46..de6ec76 100644 --- a/README.md +++ b/README.md @@ -25,15 +25,15 @@ import pycritic scraper = pycritic.Scraper() resource = scraper.get("http://www.metacritic.com/game/pc/fallout-new-vegas") -print resource.name +print(resource.name) # >> Fallout New Vegas -print resource.date +print(resource.date) # >> Oct 19, 2010 -print resource.metascore +print(resource.metascore) # >> 84 -print resource.userscore +print(resource.userscore) # >> 8.0 -print resource.description +print(resource.description) # >> The latest game in the post-nuclear RPG series is being developed by many members of the Fallout 1 and 2 team at Obsidian Entertainment using the Fallout 3 engine. ``` From 5ba9263638b727e8152522035972a2e83338dfbe Mon Sep 17 00:00:00 2001 From: Andy Mahoney <32443674+Saious119@users.noreply.github.com> Date: Mon, 8 Aug 2022 23:08:59 -0400 Subject: [PATCH 7/8] remove pointless bak and folder --- test/basic.py.bak | 21 --------------------- 1 file changed, 21 deletions(-) delete mode 100644 test/basic.py.bak diff --git a/test/basic.py.bak b/test/basic.py.bak deleted file mode 100644 index 2d2e2e1..0000000 --- a/test/basic.py.bak +++ /dev/null @@ -1,21 +0,0 @@ -import pycritic - -def print_resource_data(resource): - print "Name: " + resource.name - print "Release date: " + resource.date - print "Metascore: " + str(resource.metascore) - print "Userscore: " + str(resource.userscore) - print "Description: " + resource.description - - -def main(): - scraper = pycritic.Scraper() - alien = scraper.get("http://www.metacritic.com/movie/alien") - print_resource_data(alien) - aliens = scraper.get("http://www.metacritic.com/movie/aliens") - print_resource_data(aliens) - fallout = scraper.get("http://www.metacritic.com/game/pc/fallout-new-vegas") - print_resource_data(fallout) - -if __name__ == "__main__": - main() From 1992452fbf9d9bc7cf99f8f70138f2b3b8587f84 Mon Sep 17 00:00:00 2001 From: Andy Mahoney <32443674+Saious119@users.noreply.github.com> Date: Tue, 4 Jul 2023 00:24:46 -0400 Subject: [PATCH 8/8] fixed a bug where metacritic score would fail to parse --- pycritic/pycritic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pycritic/pycritic.py b/pycritic/pycritic.py index fe91592..2228d40 100644 --- a/pycritic/pycritic.py +++ b/pycritic/pycritic.py @@ -109,7 +109,7 @@ def _extract_category(self): def _extract_metascore(self): score = self.soup.find('a', {'class':'metascore_anchor'}).text - return int(score) + return float(score) def _extract_userscore(self): score = self.soup.find_all('a', {'class':'metascore_anchor'})[1].text