#!pip install chess
!pip install plotly
import chess
import chess.pgn
import pandas as pd
import time
from datetime import datetime
from pytz import timezone
import os
import requests
import re
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
Collecting plotly
Downloading plotly-6.0.1-py3-none-any.whl (14.8 MB)
Requirement already satisfied: packaging in c:\users\user\anaconda3\lib\site-packages (from plotly) (21.0)
Collecting narwhals>=1.15.1
Downloading narwhals-1.38.2-py3-none-any.whl (338 kB)
Requirement already satisfied: pyparsing>=2.0.2 in c:\users\user\anaconda3\lib\site-packages (from packaging->plotly) (3.0.4)
Installing collected packages: narwhals, plotly
Successfully installed narwhals-1.38.2 plotly-6.0.1
def download_pgn_file(username, year, month, output_dir, user_agent_email):
url = f"https://api.chess.com/pub/player/{username}/games/{year}/{month}/pgn"
headers = {'User-Agent': user_agent_email}
try:
response = requests.get(url, headers=headers)
response.raise_for_status() # Check for HTTP request errors
pgn_data = response.text
if pgn_data:
filename = f"{username}_{year}_{month:02d}.pgn"
with open(os.path.join(output_dir, filename), 'w') as file:
file.write(pgn_data)
print(f"Saved PGN data for {year}-{month:02d} to {filename}")
else:
print(f"No PGN data available for {year}-{month:02d}")
except requests.HTTPError as http_err:
if response.status_code == 403:
print(f"Access forbidden for {url}. Verify the URL and username.")
else:
print(f"HTTP error occurred: {http_err}")
except requests.RequestException as e:
print(f"Error fetching PGN data: {e}")
# Example usage
username = 'SJB_137' # Replace with your Chess.com username
output_dir = './chess_games'
user_agent_email = os.getenv("CHESS_COM_EMAIL") # export your email as an environment variable
# Ensure the output directory exists
if not os.path.exists(output_dir):
os.makedirs(output_dir)
# Fetch and save PGN for the last 12 months
current_year = datetime.now().year
current_month = datetime.now().month
for i in range(12):
year = current_year - 1 if current_month - i <= 0 else current_year
month = (current_month - i - 1) % 12 + 1
download_pgn_file(username, year, month, output_dir, user_agent_email)
time.sleep(1) # To avoid rate limiting
Saved PGN data for 2025-05 to SJB_137_2025_05.pgn
Saved PGN data for 2025-04 to SJB_137_2025_04.pgn
Saved PGN data for 2025-03 to SJB_137_2025_03.pgn
Saved PGN data for 2025-02 to SJB_137_2025_02.pgn
Saved PGN data for 2025-01 to SJB_137_2025_01.pgn
Saved PGN data for 2024-12 to SJB_137_2024_12.pgn
Saved PGN data for 2024-11 to SJB_137_2024_11.pgn
Saved PGN data for 2024-10 to SJB_137_2024_10.pgn
Saved PGN data for 2024-09 to SJB_137_2024_09.pgn
Saved PGN data for 2024-08 to SJB_137_2024_08.pgn
Saved PGN data for 2024-07 to SJB_137_2024_07.pgn
Saved PGN data for 2024-06 to SJB_137_2024_06.pgn
def read_pgn_files(directory_path):
games = []
# Loop through all files in the directory
for filename in os.listdir(directory_path):
if filename.endswith(".pgn"):
file_path = os.path.join(directory_path, filename)
with open(file_path) as pgn_file:
while True:
game = chess.pgn.read_game(pgn_file)
if game is None:
break
games.append(game)
return games
def extract_game_data(games):
games_data = []
for game in games:
game_data = {
"Date": game.headers["Date"],
"White": game.headers["White"],
"Black": game.headers["Black"],
"Result": game.headers["Result"],
"ECO": game.headers["ECO"],
"ECOUrl": game.headers.get("ECOUrl", ""),
"WhiteElo": game.headers.get("WhiteElo", ""),
"BlackElo": game.headers.get("BlackElo", ""),
"EndTime": game.headers.get("EndTime", ""),
"Moves": game.board().variation_san(game.mainline_moves())
}
games_data.append(game_data)
return games_data
def main(directory_path):
games = read_pgn_files(directory_path)
games_data = extract_game_data(games)
df = pd.DataFrame(games_data)
df.to_csv("chess_games_data.csv", index=False)
# Specify the directory containing the PGN files
directory_path = "chess_games/"
main(directory_path)
df = pd.read_csv("chess_games_data.csv")
# Define the function to apply
def update_result(row):
if row['Result'] == '1-0':
return 'Win' if row['White'] == 'SJB_137' else 'Loss'
elif row['Result'] == '0-1':
return 'Win' if row['White'] != 'SJB_137' else 'Loss'
elif row['Result'] == '1/2-1/2':
return 'Draw'
else:
return row['Result']
# Apply the function to the DataFrame
df['Result'] = df.apply(update_result, axis=1)
def convert_pacific_to_warsaw(time_str):
# Define time zones
pacific = timezone('US/Pacific')
warsaw = timezone('Europe/Warsaw')
# Remove the timezone part and determine if it's PDT or PST
if 'PDT' in time_str:
time_str = time_str.replace(' PDT', '')
elif 'PST' in time_str:
time_str = time_str.replace(' PST', '')
# Parse the time string
time_obj = datetime.strptime(time_str, '%H:%M:%S')
# Localize to the Pacific time zone (handling DST automatically)
time_pacific = pacific.localize(time_obj)
# Convert to Warsaw time
time_warsaw = time_pacific.astimezone(warsaw)
return time_warsaw
# Apply the conversion function to the DataFrame
df['time_warsaw'] = df['EndTime'].apply(convert_pacific_to_warsaw)
# Convert the new column to datetime
df['time_warsaw'] = pd.to_datetime(df['time_warsaw'])
# Convert the `date` column to datetime
df['Date'] = pd.to_datetime(df['Date'], format='%Y.%m.%d')
# Combine the `date` and `time_warsaw` columns
def combine_date_time(date, time_warsaw):
# Extract time components
time_only = time_warsaw.timetz()
# Combine date and time
combined_datetime = datetime.combine(date, datetime.min.time()).replace(
hour=time_only.hour, minute=time_only.minute, second=time_only.second, microsecond=time_only.microsecond)
# Convert to the correct timezone
combined_datetime = timezone('Europe/Warsaw').localize(combined_datetime)
return combined_datetime
df['datetime_warsaw'] = df.apply(lambda row: combine_date_time(row['Date'], row['time_warsaw']), axis=1)
# Extract the day
df.loc[:,'day_of_the_week'] = df['datetime_warsaw'].dt.day_name()
# Extract the hour and create the 'time_of_day' column
def get_time_of_day(hour):
if 6 <= hour < 12:
return 'morning'
elif 12 <= hour < 18:
return 'afternoon'
elif 18 <= hour < 24:
return 'evening'
else:
return 'graveyard'
df.loc[:, 'hour'] = df['datetime_warsaw'].dt.hour
df.loc[:, 'time_of_day'] = df['hour'].apply(get_time_of_day)
# Create new columns 'My_Elo' and 'Opponent_Elo' using apply function
df['My_Elo'] = df.apply(lambda row: row['WhiteElo'] if row['White'] == 'SJB_137' else row['BlackElo'], axis=1)
df['Opponent_Elo'] = df.apply(lambda row: row['BlackElo'] if row['White'] == 'SJB_137' else row['WhiteElo'], axis=1)
# Convert 'My_Elo' and 'Opponent_Elo' columns to integer
df['My_Elo'] = df['My_Elo'].astype(int)
df['Opponent_Elo'] = df['Opponent_Elo'].astype(int)
# Function to count the total number of moves in a single row
def count_all_moves(moves):
# Split the string by spaces
parts = moves.split()
# Filter out move numbers (e.g., '1.', '2.', '3.')
move_count = len([part for part in parts if not part[:-1].isdigit()])
# Each move consists of two parts, if there's an odd number of parts, it means the last move is incomplete
complete_moves = move_count // 2
if move_count % 2 != 0:
complete_moves += 1
return complete_moves
# Apply the function to the 'Moves' column and create a new column 'TotalMoves'
df['number_moves'] = df['Moves'].apply(count_all_moves)
# Function to extract moves
def extract_moves(row):
moves = row['Moves'].split()
my_moves = []
opponent_moves = []
for i in range(1, len(moves), 3): # Iterate over moves, skipping the move numbers
if row['White'] == 'SJB_137':
my_moves.append(moves[i])
if i + 1 < len(moves):
opponent_moves.append(moves[i + 1])
else:
opponent_moves.append('')
else:
if i + 1 < len(moves):
my_moves.append(moves[i + 1])
opponent_moves.append(moves[i])
else:
my_moves.append('')
opponent_moves.append(moves[i])
return my_moves, opponent_moves
# Apply the function to create the list of moves
df[['my_moves', 'opponent_moves']] = df.apply(lambda row: pd.Series(extract_moves(row)), axis=1)
# Determine the maximum number of moves
max_moves = max(df['my_moves'].map(len).max(), df['opponent_moves'].map(len).max())
# Create a new DataFrame with move columns
my_move_columns = pd.DataFrame(df['my_moves'].to_list(), index=df.index, columns=[f'my_move_{i+1}' for i in range(max_moves)])
opponent_move_columns = pd.DataFrame(df['opponent_moves'].to_list(), index=df.index, columns=[f'opponent_move_{i+1}' for i in range(max_moves)])
# Concatenate the new move columns with the original DataFrame
df = pd.concat([df, my_move_columns, opponent_move_columns], axis=1)
# Drop the temporary 'My_moves' and 'My_opponent_moves' columns
df.drop(columns=['my_moves', 'opponent_moves'], inplace=True)
df.drop(['WhiteElo', 'BlackElo'], axis=1, inplace=True)
def extract_opening_name(url):
# Define the regex pattern to match the required part of the string
pattern = r"openings/([^0-9]+)-\d"
match = re.search(pattern, url)
if match:
# Get the matched part and replace hyphens with spaces
opening_name = match.group(1).replace('-', ' ')
return opening_name
else:
return None
# Apply the function to the 'ECOUrl' column
df['VariationName'] = df['ECOUrl'].apply(extract_opening_name)
# Optionally, save to CSV
df.to_csv("chess_games_data.csv", index=False)
# Aggregate 'My_Elo' values by date
df_daily = df.groupby('datetime_warsaw')['My_Elo'].mean().reset_index()
# Create the figure
fig = go.Figure()
# Add trace
fig.add_trace(go.Scatter(
x=df_daily['datetime_warsaw'],
y=df_daily['My_Elo'],
mode='lines+markers',
name='My Elo'
))
# Update layout to match the style of the Matplotlib plot
fig.update_layout(
title='My Chess.com Elo rating',
xaxis_title='Date',
yaxis_title='My Elo',
xaxis=dict(showgrid=True),
yaxis=dict(showgrid=True),
template='plotly_white'
)
# Show plot
fig.show()
# Aggregate 'My_Elo' values by date
df_daily = df.groupby('Date')['My_Elo'].count().reset_index()
# Create the figure
fig = go.Figure()
# Add trace
fig.add_trace(go.Scatter(
x=df_daily['Date'],
y=df_daily['My_Elo'],
mode='lines+markers',
name='My Elo'
))
# Update layout to match the style of the Matplotlib plot
fig.update_layout(
title='Daily # games played on Chess.com',
xaxis_title='Date',
yaxis_title='# games played',
xaxis=dict(showgrid=True),
yaxis=dict(showgrid=True),
template='plotly_white'
)
# Show plot
fig.show()
white = df[df['White'] == 'SJB_137']
white.Result.value_counts()
Win 649
Loss 548
Draw 93
Name: Result, dtype: int64
# Calculate total games
total_games_white = white.shape[0]
# Count occurrences of each result
total_wins_white = (white['Result'] == 'Win').sum()
total_losses_white = (white['Result'] == 'Loss').sum()
total_draws_white = (white['Result'] == 'Draw').sum()
black = df[df['Black'] == 'SJB_137']
black.Result.value_counts()
Loss 645
Win 558
Draw 84
Name: Result, dtype: int64
# Calculate total games
total_games_black = black.shape[0]
# Count occurrences of each result
total_wins_black = (black['Result'] == 'Win').sum()
total_losses_black = (black['Result'] == 'Loss').sum()
total_draws_black = (black['Result'] == 'Draw').sum()
# Create a pie chart using Plotly
labels_white = ['Wins', 'Losses', 'Draws']
values_white = [total_wins_white, total_losses_white, total_draws_white]
# Create a pie chart using Plotly
labels_black = ['Wins', 'Losses', 'Draws']
values_black = [total_wins_black, total_losses_black, total_draws_black]
# Create figure with subplots
fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]],
subplot_titles=['White stats', 'Black stats'])
# Pie chart 1
fig.add_trace(go.Pie(labels=labels_white, values=values_white, name='White'), 1, 1)
# Pie chart 2
fig.add_trace(go.Pie(labels=labels_black, values=values_black, name='Black'), 1, 2)
# Update layout
fig.update_layout(
title_text='Comparison of results playing as Black or White',
annotations=[dict(text='', x=0.5, y=0.5, font_size=20, showarrow=False) for _ in range(2)], # Empty annotations for centering
)
# Show plot
fig.show()
As expected playing as White gives me an advantage which is translated into a higher win rate.
https://www.365chess.com/eco.php
df.VariationName.value_counts()
Scandinavian Defense Mieses Kotrc Variation 104
Scandinavian Defense 102
Queens Pawn Opening 97
Bishops Opening 87
Queens Pawn Opening Chigorin Variation 65
...
Kings Gambit Declined Keene Defense 1
Ruy Lopez Opening Classical Defense 1
Reti Opening Ross Gambit 1
Portuguese Opening 1
Birds Opening Dutch Variation 1
Name: VariationName, Length: 126, dtype: int64
# Count the occurrences of each variation name
opening_counts = df['VariationName'].value_counts().head(10).reset_index()
opening_counts.columns = ['VariationName', 'Count']
# Sort the DataFrame in decreasing order of 'Count'
opening_counts = opening_counts.sort_values(by='Count', ascending=True)
# Create a bar plot using Plotly
fig = px.bar(opening_counts, x='Count', y='VariationName', orientation='h',
title='Top 10 variations based on my Chess.com games',
labels={'VariationName': 'Variation name', 'Count': 'Count'})
fig.show()
# Function to convert chess notation to board coordinates (0-63)
def chess_notation_to_index(move):
file_map = {'a': 0, 'b': 1, 'c': 2, 'd': 3, 'e': 4, 'f': 5, 'g': 6, 'h': 7}
rank_map = {'1': 7, '2': 6, '3': 5, '4': 4, '5': 3, '6': 2, '7': 1, '8': 0}
if move[0] == 'N':
file_char = move[1]
else:
file_char = move[0]
if move[0] == 'N':
rank_char = move[2]
else:
rank_char = move[1]
file_index = file_map[file_char]
rank_index = rank_map[rank_char]
return rank_index * 8 + file_index
# Apply function using .loc to avoid SettingWithCopyWarning
white.loc[:, 'my_move_1_index'] = white['my_move_1'].apply(chess_notation_to_index)
white.my_move_1.value_counts()
c:\Users\User\anaconda3\lib\site-packages\pandas\core\indexing.py:1667: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
e4 788
d4 494
e3 7
Nf3 1
Name: my_move_1, dtype: int64
# Calculate frequencies of each move_1 position on an 8x8 chessboard
white_heatmap_data = [[0]*8 for _ in range(8)]
for position in white['my_move_1_index']:
row = position // 8
col = position % 8
white_heatmap_data[row][col] += 1
# Create annotated heatmap using Plotly
fig = go.Figure(data=go.Heatmap(
z=white_heatmap_data,
x=['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'],
y=['8', '7', '6', '5', '4', '3', '2', '1'], # Correct order for y-axis
colorscale='Blues',
colorbar=dict(title='Frequency')
))
# Add annotations
for i in range(len(white_heatmap_data)):
for j in range(len(white_heatmap_data[i])):
fig.add_annotation(
x=j, y=i,
text=str(white_heatmap_data[i][j]),
showarrow=False,
font=dict(color='black', size=12)
)
# Update layout to correctly order the y-axis
fig.update_layout(
title='Distribution of my opening moves playing as WHITE on Chess.com',
xaxis=dict(title='File'),
yaxis=dict(title='Rank', autorange='reversed'),
xaxis_tickmode='array',
yaxis_tickmode='array',
xaxis_tickvals=list(range(8)),
yaxis_tickvals=list(range(8)),
xaxis_ticktext=['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'],
yaxis_ticktext=['8', '1', '2', '3', '4', '5', '6', '7'], # Correct order for y-axis
width=600,
height=500,
autosize=False
)
# Show plot
fig.show()
# Apply function using .loc to avoid SettingWithCopyWarning
black.loc[:, 'my_move_1_index'] = black['my_move_1'].apply(chess_notation_to_index)
black.my_move_1.value_counts()
e5 634
d5 508
e6 61
Nf6 27
Nc6 26
d6 8
f6 5
b6 3
Na6 3
g6 3
c5 2
a5 2
g5 2
b5 1
Nh6 1
h5 1
Name: my_move_1, dtype: int64
# Calculate frequencies of each move_1 position on an 8x8 chessboard
black_heatmap_data = [[0]*8 for _ in range(8)]
for position in black['my_move_1_index']:
row = position // 8
col = position % 8
black_heatmap_data[row][col] += 1
# Create annotated heatmap using Plotly
fig = go.Figure(data=go.Heatmap(
z=black_heatmap_data,
x=['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'],
y=['8', '7', '6', '5', '4', '3', '2', '1'], # Correct order for y-axis
colorscale='Blues',
colorbar=dict(title='Frequency')
))
# Add annotations
for i in range(len(black_heatmap_data)):
for j in range(len(black_heatmap_data[i])):
fig.add_annotation(
x=j, y=i,
text=str(black_heatmap_data[i][j]),
showarrow=False,
font=dict(color='black', size=12)
)
# Update layout to correctly order the y-axis
fig.update_layout(
title='Distribution of my opening moves playing as BLACK on Chess.com',
xaxis=dict(title='File'),
yaxis=dict(title='Rank', autorange='reversed'),
xaxis_tickmode='array',
yaxis_tickmode='array',
xaxis_tickvals=list(range(8)),
yaxis_tickvals=list(range(8)),
xaxis_ticktext=['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'],
yaxis_ticktext=['8', '1', '2', '3', '4', '5', '6', '7'], # Correct order for y-axis
width=600,
height=500,
autosize=False
)
# Show plot
fig.show()
# Aggregate the results by first move
move_results_white = white.groupby(['my_move_1','Result']).size().unstack(fill_value=0)
# Calculate win rates
move_results_white['Total'] = move_results_white.sum(axis=1)
move_results_white['Win Rate'] = (move_results_white['Win'] / move_results_white['Total']) * 100
move_results_white
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
Result | Draw | Loss | Win | Total | Win Rate |
---|---|---|---|---|---|
my_move_1 | |||||
Nf3 | 0 | 1 | 0 | 1 | 0.000000 |
d4 | 32 | 207 | 255 | 494 | 51.619433 |
e3 | 0 | 5 | 2 | 7 | 28.571429 |
e4 | 61 | 335 | 392 | 788 | 49.746193 |
# Initialize an 8x8 matrix with zeros
white_heatmap_data_winrates = [[0 for _ in range(8)] for _ in range(8)]
# Aggregate the results by first move
move_results_white_index = white.groupby(['my_move_1_index','Result']).size().unstack(fill_value=0)
# Calculate win rates
move_results_white_index['Total'] = move_results_white_index.sum(axis=1)
move_results_white_index['Win Rate'] = (move_results_white_index['Win'] / move_results_white_index['Total']) * 100
# Fill the heatmap data with win rates
for move, row in move_results_white_index.iterrows():
row_idx = move // 8
col_idx = move % 8
white_heatmap_data_winrates[row_idx][col_idx] = row['Win Rate']
# Create the heatmap with percentage win rates
fig = go.Figure(data=go.Heatmap(
z=white_heatmap_data_winrates,
x=['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'],
y=['8', '7', '6', '5', '4', '3', '2', '1'], # Correct order for y-axis
colorscale='Blues',
colorbar=dict(title='Win Rate (%)')
))
# Add annotations for the percentage win rates
for i in range(len(white_heatmap_data_winrates)):
for j in range(len(white_heatmap_data_winrates[i])):
fig.add_annotation(
x=j, y=i,
text=f"{white_heatmap_data_winrates[i][j]:.1f}%",
showarrow=False,
font=dict(color='black', size=12)
)
# Update layout to correctly order the y-axis
fig.update_layout(
title='Win rates based on my opening move playing as WHITE',
xaxis=dict(title='File'),
yaxis=dict(title='Rank', autorange='reversed'),
xaxis_tickmode='array',
yaxis_tickmode='array',
xaxis_tickvals=list(range(8)),
yaxis_tickvals=list(range(8)),
xaxis_ticktext=['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'],
yaxis_ticktext=['8', '1', '2', '3', '4', '5', '6', '7'], # Correct order for y-axis
width=600,
height=500,
autosize=False
)
# Show plot
fig.show()
# Aggregate the results by first move
move_results_black = black.groupby(['my_move_1','Result']).size().unstack(fill_value=0)
# Calculate win rates
move_results_black['Total'] = move_results_black.sum(axis=1)
move_results_black['Win Rate'] = (move_results_black['Win'] / move_results_black['Total']) * 100
move_results_black
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
Result | Draw | Loss | Win | Total | Win Rate |
---|---|---|---|---|---|
my_move_1 | |||||
Na6 | 1 | 2 | 0 | 3 | 0.000000 |
Nc6 | 1 | 18 | 7 | 26 | 26.923077 |
Nf6 | 2 | 18 | 7 | 27 | 25.925926 |
Nh6 | 0 | 0 | 1 | 1 | 100.000000 |
a5 | 0 | 2 | 0 | 2 | 0.000000 |
b5 | 0 | 1 | 0 | 1 | 0.000000 |
b6 | 0 | 2 | 1 | 3 | 33.333333 |
c5 | 0 | 2 | 0 | 2 | 0.000000 |
d5 | 33 | 253 | 222 | 508 | 43.700787 |
d6 | 0 | 2 | 6 | 8 | 75.000000 |
e5 | 45 | 305 | 284 | 634 | 44.794953 |
e6 | 2 | 33 | 26 | 61 | 42.622951 |
f6 | 0 | 3 | 2 | 5 | 40.000000 |
g5 | 0 | 0 | 2 | 2 | 100.000000 |
g6 | 0 | 3 | 0 | 3 | 0.000000 |
h5 | 0 | 1 | 0 | 1 | 0.000000 |
# Initialize an 8x8 matrix with zeros
black_heatmap_data_winrates = [[0 for _ in range(8)] for _ in range(8)]
# Aggregate the results by first move
move_results_black_index = black.groupby(['my_move_1_index','Result']).size().unstack(fill_value=0)
# Calculate win rates
move_results_black_index['Total'] = move_results_black_index.sum(axis=1)
move_results_black_index['Win Rate'] = (move_results_black_index['Win'] / move_results_black_index['Total']) * 100
# Fill the heatmap data with win rates
for move, row in move_results_black_index.iterrows():
row_idx = move // 8
col_idx = move % 8
black_heatmap_data_winrates[row_idx][col_idx] = row['Win Rate']
# Create the heatmap with percentage win rates
fig = go.Figure(data=go.Heatmap(
z=black_heatmap_data_winrates,
x=['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'],
y=['8', '7', '6', '5', '4', '3', '2', '1'], # Correct order for y-axis
colorscale='Blues',
colorbar=dict(title='Win Rate (%)')
))
# Add annotations for the percentage win rates
for i in range(len(black_heatmap_data_winrates)):
for j in range(len(black_heatmap_data_winrates[i])):
fig.add_annotation(
x=j, y=i,
text=f"{black_heatmap_data_winrates[i][j]:.1f}%",
showarrow=False,
font=dict(color='black', size=12)
)
# Update layout to correctly order the y-axis
fig.update_layout(
title='Win rates based on my opening move playing as BLACK',
xaxis=dict(title='File'),
yaxis=dict(title='Rank', autorange='reversed'),
xaxis_tickmode='array',
yaxis_tickmode='array',
xaxis_tickvals=list(range(8)),
yaxis_tickvals=list(range(8)),
xaxis_ticktext=['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'],
yaxis_ticktext=['8', '1', '2', '3', '4', '5', '6', '7'], # Correct order for y-axis
width=600,
height=500,
autosize=False
)
# Show plot
fig.show()
# Aggregate the results by day of the week
day_of_the_week_results = df.groupby(['day_of_the_week','Result']).size().unstack(fill_value=0)
# Calculate win rates
day_of_the_week_results['Total'] = day_of_the_week_results.sum(axis=1)
day_of_the_week_results['Win Rate'] = (day_of_the_week_results['Win'] / day_of_the_week_results['Total']) * 100
day_of_the_week_results
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
Result | Draw | Loss | Win | Total | Win Rate |
---|---|---|---|---|---|
day_of_the_week | |||||
Friday | 25 | 154 | 162 | 341 | 47.507331 |
Monday | 33 | 174 | 215 | 422 | 50.947867 |
Saturday | 14 | 94 | 91 | 199 | 45.728643 |
Sunday | 16 | 135 | 109 | 260 | 41.923077 |
Thursday | 25 | 224 | 183 | 432 | 42.361111 |
Tuesday | 34 | 215 | 226 | 475 | 47.578947 |
Wednesday | 30 | 197 | 221 | 448 | 49.330357 |
# Reset index to use day_of_the_week as a column for plotting
day_of_the_week_results = day_of_the_week_results.reset_index()
# Define the order of days
days_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
# Create a horizontal bar plot using Plotly
fig = px.bar(day_of_the_week_results, x='Win Rate', y='day_of_the_week', orientation='h',
title='Win rate by day of the week',
labels={'day_of_the_week': 'Day', 'Win Rate': 'Win rate (%)'},
category_orders={'day_of_the_week': days_order})
fig.show()
So, it looks like Mondays and Wednesdays are the best days for me to play. What time of day though?
# Aggregate the results by time of day
time_of_day_results = df.groupby(['time_of_day','Result']).size().unstack(fill_value=0)
# Calculate win rates
time_of_day_results['Total'] = time_of_day_results.sum(axis=1)
time_of_day_results['Win Rate'] = (time_of_day_results['Win'] / time_of_day_results['Total']) * 100
time_of_day_results
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
Result | Draw | Loss | Win | Total | Win Rate |
---|---|---|---|---|---|
time_of_day | |||||
afternoon | 10 | 58 | 75 | 143 | 52.447552 |
evening | 78 | 477 | 476 | 1031 | 46.168768 |
graveyard | 60 | 492 | 490 | 1042 | 47.024952 |
morning | 29 | 166 | 166 | 361 | 45.983380 |
# Reset index to use day_of_the_week as a column for plotting
time_of_day_results = time_of_day_results.reset_index()
# Define the order
time_of_day_order = ['morning', 'afternoon', 'evening', 'graveyard']
# Create a horizontal bar plot using Plotly
fig = px.bar(time_of_day_results, x='Win Rate', y='time_of_day', orientation='h',
title='Win rate by time of day',
labels={'time_of_day': 'Time of day', 'Win Rate': 'Win rate (%)'},
category_orders={'time_of_day': time_of_day_order})
fig.show()
It seems like afternoons are best. So, Monday and Wednesday afternoons, might be a sensible time to play.
# Aggregate the results by number of moves
number_moves_results = df.groupby(['number_moves','Result']).size().unstack(fill_value=0)
# Calculate win rates
number_moves_results['Total'] = number_moves_results.sum(axis=1)
number_moves_results['Win Rate'] = (number_moves_results['Win'] / number_moves_results['Total']) * 100
number_moves_results['Loss Rate'] = (number_moves_results['Loss'] / number_moves_results['Total']) * 100
number_moves_results['Draw Rate'] = (number_moves_results['Draw'] / number_moves_results['Total']) * 100
number_moves_results
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
Result | Draw | Loss | Win | Total | Win Rate | Loss Rate | Draw Rate |
---|---|---|---|---|---|---|---|
number_moves | |||||||
1 | 0 | 0 | 2 | 2 | 100.0 | 0.0 | 0.0 |
2 | 0 | 0 | 1 | 1 | 100.0 | 0.0 | 0.0 |
3 | 0 | 0 | 3 | 3 | 100.0 | 0.0 | 0.0 |
4 | 0 | 0 | 25 | 25 | 100.0 | 0.0 | 0.0 |
5 | 0 | 3 | 9 | 12 | 75.0 | 25.0 | 0.0 |
... | ... | ... | ... | ... | ... | ... | ... |
100 | 1 | 0 | 0 | 1 | 0.0 | 0.0 | 100.0 |
104 | 0 | 0 | 1 | 1 | 100.0 | 0.0 | 0.0 |
115 | 1 | 0 | 0 | 1 | 0.0 | 0.0 | 100.0 |
116 | 1 | 0 | 0 | 1 | 0.0 | 0.0 | 100.0 |
137 | 1 | 0 | 0 | 1 | 0.0 | 0.0 | 100.0 |
98 rows × 7 columns
# Reset index to use number_moves as a column for plotting
number_moves_results = number_moves_results.reset_index()
# Create a line plot for Win Rate vs number of moves using Plotly
fig = px.line(number_moves_results, x='number_moves', y='Win Rate',
title='My win rate vs number of moves on Chess.com',
labels={'number_moves': 'Number of moves', 'Win Rate': 'Win rate (%)'})
fig.show()
From the above graphic I can see that my win rates generally tend to fall as the length of the game increases. That doesn't necessarily mean I tend to lose as the game length increases - the longer the game, the more likely the result will be a draw, I suspect. Let's plot draw rate against number of moves to see if this is the case.
# Create a line plot for Draw Rate vs number of moves using Plotly
fig = px.line(number_moves_results, x='number_moves', y='Draw Rate',
title='My draw rate vs number of moves on Chess.com',
labels={'number_moves': 'Number of moves', 'Win Rate': 'Win rate (%)'})
fig.show()