Dream Team
Dream Team
import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
# Initialize webdriver
driver = webdriver.Chrome()
final_data = pd.DataFrame()
# Sample URL used in the example (update with the actual player URLs)
driver.get("http://www.cricmetric.com/playerstats.py?
player=KA+Pollard&role=all&format=T20I&groupby=match&start_date=2021-10-
17&end_date=2022-10-17")
# List of players to iterate over (update this list with actual player names or
IDs)
players = ["player1", "player2", "player3"] # Example players
for i in players:
driver.find_element_by_xpath('//*[@id="player"]').clear()
driver.find_element_by_xpath('//*[@id="player"]').send_keys(i)
try:
driver.find_element_by_xpath('/html/body/div[1]/div[1]/div[2]/div/form/
input[3]').click()
except:
driver.find_element_by_xpath('/html/body/div[1]/div[1]/div[2]/div/form/
input[3]').click()
time.sleep(3)
try:
# Batting data
bat =
driver.find_element_by_xpath('//*[@id="T20I-Batting"]/div/table').text
stats = pd.DataFrame(bat.split('\n')[0].str.split(',', expand=True)[0:-1])
stats.columns = stats.iloc[0]
stats = stats[1:]
del stats['%']
stats = stats[['Match', 'Runs', 'Balls', 'Out', '4s', '6s', 'Dot']]
stats.columns = ['Match', 'Runs Scored', 'Balls Played', 'Out', 'Bat SR',
'50', '100', '4s Scored', '6s Scored', 'Bat Dot%']
time.sleep(5)
except:
continue
try:
# Bowling data
bowl =
driver.find_element_by_xpath('//*[@id="T20I-Bowling"]/div/table').text
stats2 = pd.DataFrame(bowl.split('\n')[0].str.split(',', expand=True)[0:-
1])
stats2.columns = stats2.iloc[0]
stats2 = stats2[1:]
stats2 = stats2[['Match', 'Overs', 'Runs', 'Wickets', 'Econ', 'SR', '5W',
'4s', '6s', 'Dot%']]
stats2.columns = ['Match', 'Overs Bowled', 'Runs Given', 'Wickets Taken',
'Econ', 'Bowl Avg', 'Bowl SR', '5W', '4s Given', '6s Given']
except:
stats2 = pd.DataFrame({'Match': [], 'Overs Bowled': [], 'Runs Given': [],
'Wickets Taken': [], 'Econ': [], 'Bowl Avg': [], 'Bowl SR': [], '5W': [], '4s
Given': [], '6s Given': []})
final_data
# Linear Regression
# Fitting the model and checking accuracy
X = model1_df[model1_df.columns[1:-1]]
y = model1_df[model1_df.columns[-1]]
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import linear_model
# Using ridge regression to predict the next match's performance based on the same
player's performance in past
models = pd.DataFrame()
for i in players_list:
player = final_data[final_data['Player'] == i]
player_new = player.dropna()
X = player_new[player_new.columns[2:11]]
y = player_new[player_new.columns[22:23]]
ridge = pd.DataFrame()
for j in range(0, 101):
points = linear_model.Ridge(alpha=j).fit(X_train, y_train)
ridge_df = pd.DataFrame({'Alpha': pd.Series(j), 'Train':
pd.Series(points.score(X_train, y_train)), 'Test': pd.Series(points.score(X_test,
y_test))})
ridge = ridge.append(ridge_df)
ridge['Average'] = ridge[['Train', 'Test']].mean(axis=1)
try:
k = ridge[ridge['Average'] == ridge['Average'].max()]['Alpha'][0]
except:
k = ridge[ridge['Average'] == ridge['Average'].max()]['Alpha'][0]
next_runs = linear_model.Ridge(alpha=k)
next_runs.fit(X_train, y_train)
X = player_new[player_new.columns[11:21]]
y = player_new[player_new.columns[22:23]]
ridge = pd.DataFrame()
for j in range(0, 101):
points = linear_model.Ridge(alpha=j).fit(X_train, y_train)
ridge_df = pd.DataFrame({'Alpha': pd.Series(j), 'Train':
pd.Series(points.score(X_train, y_train)), 'Test': pd.Series(points.score(X_test,
y_test))})
ridge = ridge.append(ridge_df)
ridge['Average'] = ridge[['Train', 'Test']].mean(axis=1)
try:
k = ridge[ridge['Average'] == ridge['Average'].max()]['Alpha'][0]
except:
k = ridge[ridge['Average'] == ridge['Average'].max()]['Alpha'][0]
next_balls = linear_model.Ridge(alpha=k)
next_balls.fit(X_train, y_train)
X = player_new[player_new.columns[11:21]]
y = player_new[player_new.columns[25:26]]
ridge = pd.DataFrame()
for j in range(0, 101):
points = linear_model.Ridge(alpha=j).fit(X_train, y_train)
ridge_df = pd.DataFrame({'Alpha': pd.Series(j), 'Train':
pd.Series(points.score(X_train, y_train)), 'Test': pd.Series(points.score(X_test,
y_test))})
ridge = ridge.append(ridge_df)
ridge['Average'] = ridge[['Train', 'Test']].mean(axis=1)
try:
k = ridge[ridge['Average'] == ridge['Average'].max()]['Alpha'][0]
except:
k = ridge[ridge['Average'] == ridge['Average'].max()]['Alpha'][0]
next_wkts = linear_model.Ridge(alpha=k)
next_wkts.fit(X_train, y_train)
X = player_new[player_new.columns[11:21]]
y = player_new[player_new.columns[24:25]]
ridge = pd.DataFrame()
for j in range(0, 101):
points = linear_model.Ridge(alpha=j).fit(X_train, y_train)
ridge_df = pd.DataFrame({'Alpha': pd.Series(j), 'Train':
pd.Series(points.score(X_train, y_train)), 'Test': pd.Series(points.score(X_test,
y_test))})
ridge = ridge.append(ridge_df)
ridge['Average'] = ridge[['Train', 'Test']].mean(axis=1)
try:
k = ridge[ridge['Average'] == ridge['Average'].max()]['Alpha'][0]
except:
k = ridge[ridge['Average'] == ridge['Average'].max()]['Alpha'][0]
next_overs = linear_model.Ridge(alpha=k)
next_overs.fit(X_train, y_train)
X = player_new[player_new.columns[11:21]]
y = player_new[player_new.columns[24:25]]
latest = player.groupby('Player').tail(1)
next_runs_given = next_runs.predict(latest[latest.columns[11:21]])
next_balls_faced = next_balls.predict(latest[latest.columns[11:21]])
next_wkts_taken = next_wkts.predict(latest[latest.columns[11:21]])
next_overs_faced = next_overs.predict(latest[latest.columns[11:21]])