0% found this document useful (0 votes)
5 views4 pages

Dream Team

The document outlines a Python script that utilizes Selenium and Pandas to scrape cricket player statistics from a website, processes the data into a DataFrame, and performs linear regression and ridge regression to predict future performance metrics for players. It includes steps for data extraction, cleaning, and merging batting and bowling statistics, followed by model training and evaluation. Finally, it generates predictions for various performance metrics based on historical data.

Uploaded by

995aarvee
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
5 views4 pages

Dream Team

The document outlines a Python script that utilizes Selenium and Pandas to scrape cricket player statistics from a website, processes the data into a DataFrame, and performs linear regression and ridge regression to predict future performance metrics for players. It includes steps for data extraction, cleaning, and merging batting and bowling statistics, followed by model training and evaluation. Finally, it generates predictions for various performance metrics based on historical data.

Uploaded by

995aarvee
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 4

import pandas as pd

import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By

# Initialize webdriver
driver = webdriver.Chrome()

final_data = pd.DataFrame()

# Sample URL used in the example (update with the actual player URLs)
driver.get("http://www.cricmetric.com/playerstats.py?
player=KA+Pollard&role=all&format=T20I&groupby=match&start_date=2021-10-
17&end_date=2022-10-17")

# List of players to iterate over (update this list with actual player names or
IDs)
players = ["player1", "player2", "player3"] # Example players

for i in players:
driver.find_element_by_xpath('//*[@id="player"]').clear()
driver.find_element_by_xpath('//*[@id="player"]').send_keys(i)
try:
driver.find_element_by_xpath('/html/body/div[1]/div[1]/div[2]/div/form/
input[3]').click()
except:
driver.find_element_by_xpath('/html/body/div[1]/div[1]/div[2]/div/form/
input[3]').click()
time.sleep(3)
try:
# Batting data
bat =
driver.find_element_by_xpath('//*[@id="T20I-Batting"]/div/table').text
stats = pd.DataFrame(bat.split('\n')[0].str.split(',', expand=True)[0:-1])
stats.columns = stats.iloc[0]
stats = stats[1:]
del stats['%']
stats = stats[['Match', 'Runs', 'Balls', 'Out', '4s', '6s', 'Dot']]
stats.columns = ['Match', 'Runs Scored', 'Balls Played', 'Out', 'Bat SR',
'50', '100', '4s Scored', '6s Scored', 'Bat Dot%']
time.sleep(5)
except:
continue

try:
# Bowling data
bowl =
driver.find_element_by_xpath('//*[@id="T20I-Bowling"]/div/table').text
stats2 = pd.DataFrame(bowl.split('\n')[0].str.split(',', expand=True)[0:-
1])
stats2.columns = stats2.iloc[0]
stats2 = stats2[1:]
stats2 = stats2[['Match', 'Overs', 'Runs', 'Wickets', 'Econ', 'SR', '5W',
'4s', '6s', 'Dot%']]
stats2.columns = ['Match', 'Overs Bowled', 'Runs Given', 'Wickets Taken',
'Econ', 'Bowl Avg', 'Bowl SR', '5W', '4s Given', '6s Given']
except:
stats2 = pd.DataFrame({'Match': [], 'Overs Bowled': [], 'Runs Given': [],
'Wickets Taken': [], 'Econ': [], 'Bowl Avg': [], 'Bowl SR': [], '5W': [], '4s
Given': [], '6s Given': []})

overall = pd.merge(stats, stats2, on='Match')


overall['overall'] = overall['Runs Scored'] + overall['Wickets Taken'] #
Example calculation
overall = overall.sort_values(by='Match')
overall.insert(0, 'Player', i)
overall = overall.fillna(0)
final_data = final_data.append(overall)

final_data

from sklearn.model_selection import train_test_split


from sklearn import linear_model

# Assuming 'model1_df' is the DataFrame containing the data


# Ensure 'model1_df' is defined before running this code

# Linear Regression
# Fitting the model and checking accuracy

X = model1_df[model1_df.columns[1:-1]]
y = model1_df[model1_df.columns[-1]]

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=9999)

points_model = linear_model.LinearRegression().fit(X_train, y_train)

print('Training set accuracy:', points_model.score(X_train, y_train))


print('Test set accuracy:', points_model.score(X_test, y_test))

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import linear_model

# Using ridge regression to predict the next match's performance based on the same
player's performance in past
models = pd.DataFrame()

for i in players_list:
player = final_data[final_data['Player'] == i]
player_new = player.dropna()

X = player_new[player_new.columns[2:11]]
y = player_new[player_new.columns[22:23]]

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=123)

ridge = pd.DataFrame()
for j in range(0, 101):
points = linear_model.Ridge(alpha=j).fit(X_train, y_train)
ridge_df = pd.DataFrame({'Alpha': pd.Series(j), 'Train':
pd.Series(points.score(X_train, y_train)), 'Test': pd.Series(points.score(X_test,
y_test))})
ridge = ridge.append(ridge_df)
ridge['Average'] = ridge[['Train', 'Test']].mean(axis=1)
try:
k = ridge[ridge['Average'] == ridge['Average'].max()]['Alpha'][0]
except:
k = ridge[ridge['Average'] == ridge['Average'].max()]['Alpha'][0]
next_runs = linear_model.Ridge(alpha=k)
next_runs.fit(X_train, y_train)

X = player_new[player_new.columns[11:21]]
y = player_new[player_new.columns[22:23]]

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=123)

ridge = pd.DataFrame()
for j in range(0, 101):
points = linear_model.Ridge(alpha=j).fit(X_train, y_train)
ridge_df = pd.DataFrame({'Alpha': pd.Series(j), 'Train':
pd.Series(points.score(X_train, y_train)), 'Test': pd.Series(points.score(X_test,
y_test))})
ridge = ridge.append(ridge_df)
ridge['Average'] = ridge[['Train', 'Test']].mean(axis=1)
try:
k = ridge[ridge['Average'] == ridge['Average'].max()]['Alpha'][0]
except:
k = ridge[ridge['Average'] == ridge['Average'].max()]['Alpha'][0]
next_balls = linear_model.Ridge(alpha=k)
next_balls.fit(X_train, y_train)

X = player_new[player_new.columns[11:21]]
y = player_new[player_new.columns[25:26]]

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=123)

ridge = pd.DataFrame()
for j in range(0, 101):
points = linear_model.Ridge(alpha=j).fit(X_train, y_train)
ridge_df = pd.DataFrame({'Alpha': pd.Series(j), 'Train':
pd.Series(points.score(X_train, y_train)), 'Test': pd.Series(points.score(X_test,
y_test))})
ridge = ridge.append(ridge_df)
ridge['Average'] = ridge[['Train', 'Test']].mean(axis=1)
try:
k = ridge[ridge['Average'] == ridge['Average'].max()]['Alpha'][0]
except:
k = ridge[ridge['Average'] == ridge['Average'].max()]['Alpha'][0]
next_wkts = linear_model.Ridge(alpha=k)
next_wkts.fit(X_train, y_train)

X = player_new[player_new.columns[11:21]]
y = player_new[player_new.columns[24:25]]

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=123)

ridge = pd.DataFrame()
for j in range(0, 101):
points = linear_model.Ridge(alpha=j).fit(X_train, y_train)
ridge_df = pd.DataFrame({'Alpha': pd.Series(j), 'Train':
pd.Series(points.score(X_train, y_train)), 'Test': pd.Series(points.score(X_test,
y_test))})
ridge = ridge.append(ridge_df)
ridge['Average'] = ridge[['Train', 'Test']].mean(axis=1)
try:
k = ridge[ridge['Average'] == ridge['Average'].max()]['Alpha'][0]
except:
k = ridge[ridge['Average'] == ridge['Average'].max()]['Alpha'][0]
next_overs = linear_model.Ridge(alpha=k)
next_overs.fit(X_train, y_train)

X = player_new[player_new.columns[11:21]]
y = player_new[player_new.columns[24:25]]

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=123)

latest = player.groupby('Player').tail(1)
next_runs_given = next_runs.predict(latest[latest.columns[11:21]])
next_balls_faced = next_balls.predict(latest[latest.columns[11:21]])
next_wkts_taken = next_wkts.predict(latest[latest.columns[11:21]])
next_overs_faced = next_overs.predict(latest[latest.columns[11:21]])

models.at[i, 'next_runs_given'] = round(next_runs_given[0], 0)


models.at[i, 'next_balls_faced'] = round(next_balls_faced[0], 0)
models.at[i, 'next_wkts_taken'] = round(next_wkts_taken[0], 0)
models.at[i, 'next_overs_faced'] = round(next_overs_faced[0], 0)
models.at[i, 'next_runs_given'] = round(next_runs_given[0], 0)
models.at[i, 'next_balls_faced'] = round(next_balls_faced[0], 0)
models.at[i, 'next_wkts_taken'] = round(next_wkts_taken[0], 0)
models.at[i, 'next_overs_faced'] = round(next_overs_faced[0], 0)

# Display the models DataFrame with predictions


print(models)

You might also like

pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy