Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9,134 changes: 0 additions & 9,134 deletions JSONGameData/ado_aja_20190224.json

This file was deleted.

11,568 changes: 0 additions & 11,568 deletions JSONGameData/her_gra_20180921.json

This file was deleted.

8,078 changes: 0 additions & 8,078 deletions JSONGameData/nec_vvv_20100807.json

This file was deleted.

9,478 changes: 0 additions & 9,478 deletions JSONGameData/psv_fey_20101024.json

This file was deleted.

1,686 changes: 0 additions & 1,686 deletions JSONPlayerData/player_1011578.json

This file was deleted.

773 changes: 0 additions & 773 deletions JSONPlayerData/player_1028621.json

This file was deleted.

849 changes: 0 additions & 849 deletions JSONPlayerData/player_1064273.json

This file was deleted.

621 changes: 0 additions & 621 deletions JSONPlayerData/player_1064340.json

This file was deleted.

272 changes: 153 additions & 119 deletions Topic_collection_module.py
Original file line number Diff line number Diff line change
@@ -1,129 +1,163 @@
import sys
from bs4 import BeautifulSoup
import re
import json
from operator import itemgetter

def EventConnect(assists, regulargoals, missedpenalties, penaltygoals, redcards, yellowcards, yellowreds, owngoals):
def CreateSmallEventSummary(event):
minute = 0
if 'c_ActionMinute' in event:
minute = event['c_ActionMinute'].replace("'","")
else:
minute = int(event['n_ActionTime']) // 60000 #milliseconds to minutes
eventdict = {'minute': minute}
#TODO: change this to PersonID
eventdict.update({'player': event['c_Person']})
homeaway = 'home'
if event['n_HomeOrAway']==-1:
homeaway = 'away'
eventdict.update({'team':homeaway})
return eventdict

#sometimes event happen in overtime and the "minute" will be 45+1
#this considers the "+" as a decimal separator to sort these events correctly
def EventSorter(x):
return float(x['minute'].replace('+',"."))

#Connect the assists with the regular goals
eventlist = []

#First get the goal that was scored
for goal in regulargoals:
goaldict = CreateSmallEventSummary(goal)
goaldict.update({'event': 'regular goal'})
eventlist.append(goaldict.copy())
def EventConnect(soup, assists, regulargoals, missedpenalties, penaltygoals, redcards, yellowcards, yellowreds, owngoals):
def getteam(soup, player):
homelist = soup.find('lineups').find('home').find_all('name')
homelist.extend(soup.find('substitutes').find('home').find_all('name'))
homelist.extend(soup.find('lineups').find('home').find_all('goalcomshownname'))
homelist.extend(soup.find('substitutes').find('home').find_all('goalcomshownname'))

for assist in assists:
assistdict = CreateSmallEventSummary(assist)
#TODO: change this to SubPersonID
assistdict.update({'assist': assist['c_SubPerson']})
assistdict.update({'event': 'regular goal'})
eventlist.append(assistdict.copy())

otherevents = [missedpenalties, penaltygoals, redcards, yellowcards, yellowreds, owngoals]
eventdictlist = ['missed penalty', 'penalty goal', 'red card', 'yellow card', 'twice yellow', 'own goal']
for idx, category in enumerate(otherevents):
for event in category:
eventdict = CreateSmallEventSummary(event)
eventdict.update({'event': eventdictlist[idx]})
eventlist.append(eventdict.copy())
#Sort the list of all events by minutes so you get a chronological succession of events
eventlist = sorted(eventlist, key=EventSorter)
return eventlist
homelistfullname = soup.find('lineups').find('home').find_all('fullname')
homelistfullname.extend(soup.find('substitutes').find('home').find_all('fullname'))
for idx, val in enumerate(homelistfullname):
homelistfullname[idx] = homelistfullname[idx].text
awaylistfullname = soup.find('lineups').find('away').find_all('fullname')
awaylistfullname.extend(soup.find('substitutes').find('away').find_all('fullname'))
for idx, val in enumerate(awaylistfullname):
awaylistfullname[idx] = awaylistfullname[idx].text

def GameCourseEvents(jsondata):
assists = []
regulargoals = []
missedpenalties = []
penaltygoals = []
redcards = []
yellowreds = []
yellowcards = []
owngoals = []
for event in jsondata['MatchActions']:
actionset = -1
actioncode1 = -1
actioncode2 = -1
actioncode3 = -1
if 'n_ActionSet' in event:
actionset = event['n_ActionSet']==1 #goal
if 'n_ActionCode' in event:
actioncode1 = event['n_ActionCode']
if 'n_ActionCode2' in event:
actioncode2 = event['n_ActionCode2']
if 'n_ActionCode3' in event:
actioncode3 = event['n_ActionCode3']
#actionset==1 is a goal. actioncode1 is a sum of the ActionCodes found in the Excel documentation
#we need to check with a 'bitwise or' the result. For example,
#actionset = 68 => 64+4 = own + goal = own goal
#actionset = 76 => 64+8+4 = own + penalty + goal = own goal on penalty shoot
if actionset==1: #goal
if actioncode1 & 64:
owngoals.append(event)
continue
if actioncode1 & 8:
penaltygoals.append(event)
continue
#in nec_vvv_20100807 the assist is not reported as actioncode1
#so let's add multiple checks for this
if actioncode1 & 128 or ('n_ActionReasonID' in event and event['n_ActionReasonID'] == 37) or ('c_ActionReason' in event and event['c_ActionReason'].casefold() == "assist"):
assists.append(event)
continue
if actioncode1 == 4: #the most boring goals are just "goals"
regulargoals.append(event)
for idx, val in enumerate(homelist):
homelist[idx] = homelist[idx].text
if player in homelist:
return {'team': 'home'}
awaylist = soup.find('lineups').find('away').find_all('name')
awaylist.extend(soup.find('substitutes').find('away').find_all('name'))
awaylist.extend(soup.find('lineups').find('away').find_all('goalcomshownname'))
awaylist.extend(soup.find('substitutes').find('away').find_all('goalcomshownname'))
for idx, val in enumerate(awaylist):
awaylist[idx] = awaylist[idx].text
if player in awaylist:
return {'team': 'away'}
#If nothing is found, search for the last name
lastname = player.split()[-1]
lastnamehomelist = []
lastnameawaylist = []
for name in homelist:
try:
lastnamehomelist.append(name.split()[-1])
except IndexError:
continue
for name in awaylist:
try:
lastnameawaylist.append(name.split()[-1])
except IndexError:
continue
if lastname in lastnamehomelist:
return {'team': 'home'}
if lastname in lastnameawaylist:
return {'team': 'away'}
else:
for fullname in homelistfullname:
if lastname in fullname:
return {'team': 'home'}
for fullname in awaylistfullname:
if lastname in fullname:
return {'team': 'away'}
else:
print(lastname)
sys.exit(1)
#Connect the assists with the regular goals
eventlist = []
#First get the goal that was scored
for goal in regulargoals:
minute = goal['minute']
try:
minute = int(minute)
except ValueError:
minute = re.findall(r'\d+', minute)
minute = map(int, minute)
minute = sum(minute)
#Make a dict with the minute and goalscorer
goaldict = {'minute': minute}
goalscorer = goal.text
goaldict.update({'player': goalscorer})
#See if there is a matching assist
for assist in assists:
minute2 = assist['minute']
try:
minute2 = int(minute2)
except ValueError:
minute2 = re.findall(r'\d+', minute2)
minute2 = map(int, minute2)
minute2 = sum(minute2)
if minute2 == minute:
goaldict.update({'assist': assist.text})
try:
goaldict.update(getteam(soup, goalscorer))
except TypeError:
print(goalscorer)
sys.exit(1)
goaldict.update({'event': 'regular goal'})
eventlist.append(goaldict.copy())
otherevents = [missedpenalties, penaltygoals, redcards, yellowcards, yellowreds, owngoals]
eventdictlist = ['missed penalty', 'penalty goal', 'red card', 'yellow card', 'twice yellow', 'own goal']
for idx, category in enumerate(otherevents):
for event in category:
minute = event['minute']
try:
minute = int(minute)
except ValueError:
minute = re.findall(r'\d+', minute)
minute = map(int, minute)
minute = sum(minute)
eventdict = {'minute': minute}
eventplayer = event.text
try:
eventdict.update(getteam(soup, eventplayer))
except TypeError:
print('Player not found: ' + eventplayer)
sys.exit(1)
eventdict.update({'player': eventplayer})
eventdict.update({'event': eventdictlist[idx]})
eventlist.append(eventdict.copy())
#Sort the list of all events by minutes so you get a chronological succession of events
eventlist = sorted(eventlist, key=itemgetter('minute'))
return eventlist


if actionset==10: # Missed penalties. Same as before, could use ActionCodes to get more info
missedpenalties.append(event)

if actionset==3:
if actioncode1 & 2048:
yellowcards.append(event)

if actioncode1 & 4096:
yellowreds.append(event)

if actioncode1 & 8192:
redcards.append(event)
eventdict = EventConnect(assists, regulargoals, missedpenalties, penaltygoals, redcards, yellowcards, yellowreds, owngoals)
return eventdict
def GameCourseEvents(soup):
owngoals2 = []
assists = soup.find('events').find('assistlist').findChildren()
regulargoals = soup.find('events').find('goallist').findChildren()
if len(regulargoals) == 0:
regulargoalshome = soup.find('highlights').find('home').find('goalscorerslist').findChildren()
regulargoalsaway = soup.find('highlights').find('away').find('goalscorerslist').findChildren()
regulargoals = regulargoalshome + regulargoalsaway
#Delete all goals that are own goals (these are appended to a backup owngoals list)
num = len(regulargoals)-1
while num >= 0:
if regulargoals[num]['owngoal'] == 'y':
owngoals2.append(regulargoals[num])
del regulargoals[num]
num -= 1

missedpenalties = soup.find('events').find('missedpenaltylist').findChildren()
penaltygoals = soup.find('events').find('penaltygoallist').findChildren()
redcards = soup.find('events').find('redcardlist').findChildren()
yellowcards = soup.find('events').find('yellowcardlist').findChildren()
yellowreds = soup.find('events').find('yellowredlist').findChildren()
owngoals = soup.find('events').find('owngoallist').findChildren()
if (len(owngoals) == 0) and (len(owngoals2) != 0):
owngoals = owngoals2
eventdict = EventConnect(soup, assists, regulargoals, missedpenalties, penaltygoals, redcards, yellowcards, yellowreds, owngoals)
return eventdict

def TopicCollection(file):
with open(file, 'rb') as f:
jsondata = json.load(f)
eventlist = GameCourseEvents(jsondata)
gamecourselist = []
gamestatisticslist = []
twiceyellowlist = []
for eventdict in eventlist:
if (eventdict['event'] == 'regular goal') or (eventdict['event'] == 'missed penalty') or (eventdict['event'] == 'penalty goal') or (eventdict['event'] == 'own goal'):
gamecourselist.append(eventdict)
elif (eventdict['event'] == 'red card') or (eventdict['event'] == 'yellow card') or (eventdict['event'] == 'twice yellow'):
gamestatisticslist.append(eventdict)
if eventdict['event'] == 'twice yellow':
twiceyellowlist.append(eventdict['player'])
return gamecourselist, gamestatisticslist
with open(file, 'rb') as f:
soup = BeautifulSoup(f, "lxml")
eventlist = GameCourseEvents(soup)
gamecourselist = []
gamestatisticslist = []
twiceyellowlist = []
for eventdict in eventlist:
if (eventdict['event'] == 'regular goal') or (eventdict['event'] == 'missed penalty') or (eventdict['event'] == 'penalty goal') or (eventdict['event'] == 'own goal'):
gamecourselist.append(eventdict)
elif (eventdict['event'] == 'red card') or (eventdict['event'] == 'yellow card') or (eventdict['event'] == 'twice yellow'):
gamestatisticslist.append(eventdict)
if eventdict['event'] == 'twice yellow':
twiceyellowlist.append(eventdict['player'])
return gamecourselist, gamestatisticslist

#import pprint
#pp = pprint.PrettyPrinter(indent=4)
#pp.pprint(TopicCollection('./JSONGameData/nec_vvv_20100807.json'))
#print(TopicCollection('C:/Syncmap/Promotie/MASC Newspaper/GoalStats/InfoXMLs/DG_NEC_27112015_goal.xml'))