Tuesday, 6 February 2024

Naive skill estimation

Naive Skill Estimation. As simple as it gets.



500 different seeds lead to this distribution:




Mean: 0.5333787274909965
Std: 0.15858765760754445




...................................

from dataclasses import dataclass
import random
import trueskill
from scipy.stats import spearmanr


# set seed 
random.seed(0)

def run(n_games):

    @dataclass
    class Player:
        name: str
        skill: int
        estimated_skill: trueskill.Rating = None

    n_players = 100
    skill_n = 100
    players = [Player(f"Player {i}", random.randint(1, skill_n)) for i in range(n_players)]
    ground_truth = [player for player in players]
    ground_truth.sort(key=lambda x: x.skill, reverse=True)


    players.sort(key=lambda x: x.skill, reverse=True)

    random.shuffle(players)


    @dataclass 
    class GameScore:
        player: Player
        score: float


    @dataclass 
    class Game:
        name: str
        players: list[Player]
        scores: list[GameScore]

    games:list[Game] = []

    for i in range(n_games):
        n_players_in_this_game = random.randint(2, n_players)
        game_players = random.sample(players, n_players_in_this_game)

        sigma = random.randint(1, int(skill_n * 0.2))

        scores = []
        for player in game_players:
            in_game_skill = player.skill + random.gauss(0, sigma)
            scores.append(GameScore(player, in_game_skill))

        scores.sort(key=lambda x: x.score, reverse=True)

        games.append(Game(f'Game {i}', game_players, scores))



    def get_score(ground_truth: list[Player], players: list[Player]) -> float:
        expected = [player.name for player in ground_truth]
        actual = [player.name for player in players]
        return spearmanr(expected, actual).correlation


    for player in players:
        player.estimated_skill = 0


    for game in games:

        for i, score in enumerate(game.scores):
            score.player.estimated_skill += len(game.scores) - i


    players.sort(key=lambda x: x.estimated_skill, reverse=True)


    return get_score(ground_truth, players)


from tqdm import tqdm
x = list(range(1, 2000, 5))
y = [run(n) for n in tqdm(x)]

import matplotlib.pyplot as plt
plt.plot(x, y)
plt.show()

Historgram created via:

from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt

results = []
for i in tqdm(range(500)):
    random.seed(i)
    results.append(run(1000))

results = np.array(results)

# histogram with 50 bins
plt.hist(results, bins=30)
plt.show()

print(f"Mean: {results.mean()}")
print(f"Std: {results.std()}")

No comments:

Post a Comment

Parse Wikipedia dump

""" This module processes Wikipedia dump files by extracting individual articles and parsing them into a structured format, ...