Implementation of Prisonner’s Dilemma with Memory

Implementation of Prisonner’s Dilemma with Memory#

import numpy as np
import pandas as pd
history = np.empty((0,2),int)
for i in range(10):
    history = np.vstack((history,np.array([np.round(np.random.rand()),0]).astype(int)))
history
array([[0, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [0, 0],
       [1, 0],
       [1, 0],
       [0, 0],
       [0, 0]])
def pick_fight(pl1,pl2,N=10):
    history = np.empty((0,2),int)
    for i in range(N):
        (a,b) = [pl1(0,history),pl2(1,history)]
        #print(a,b)
        history = np.vstack((history,np.array([a,b])))
    return history

def score_history(history):
    # Returns the mean score
    s00 = np.sum((h[:,0] == 0 ) & (h[:,1] == 0)) # Both silent (2,2)
    s01 = np.sum((h[:,0] == 0 ) & (h[:,1] == 1)) # A silent,  B defects (0,3)
    s10 = np.sum((h[:,0] == 1 ) & (h[:,1] == 0)) # A defects, B silent (3,0)
    s11 = np.sum((h[:,0] == 1 ) & (h[:,1] == 1)) # Both defect (1,1)
    
    return np.array([s00*2+s10*3+s11*1,s00*2+s01*3+s11*1])/history.shape[0]
def pl_tit4tat(pl_no=0,history=[]):
    # starts with silence (0)
    # afterwards, returns the last action of the opponent
    opp_no = np.mod(pl_no+1,2)
    if(not history.size):
        return 0
    return history[-1,opp_no]

def pl_rando(pl_no=0,history=[]):
    # returns random
    return np.random.randint(0,2)

def pl_good(pl_no=0,history=[]):
    # always silent
    return 0

def pl_evil(pl_no=0,history=[]):
    # always defects
    return 1

def pl_last10avg(pl_no,history=[]):
    # Starts with silence (0)
    # Returns the average of opponent's last 10 turns
    opp_no = np.mod(pl_no+1,2)
    if(not history.size):
        return 0
    return int(np.round(np.mean(history[-10:,opp_no])))
    
pls = ['pl_rando','pl_tit4tat','pl_good','pl_evil','pl_last10avg']
chart = pd.DataFrame(columns=pls,index=pls)

no_pl = len(pls)
for i in range(no_pl):
    for j in range(no_pl):
        print("{:s} vs {:s}".format(pls[i],pls[j]))
        p1 = locals()[pls[i]]
        p2 = locals()[pls[j]]
        h = pick_fight(p1,p2,N=10000)
        (a,b) = score_history(h)
        print("{:.2f}, {:.2f}".format(a,b))
        if(a > b):
            print("{:s} (1st player) WINS!!!".format(pls[i]))
        elif(a < b):
            print("{:s} (2nd player) WINS!!!".format(pls[j]))
        else:
            print("IT'S A DRAW!!!")
        print("-"*45)
        
        chart.loc[pls[i]][pls[j]] = (a,b)
print(chart)
for pl in pls:
    # Row-wise        
    chart.loc[pl,'Tot'] = np.sum(chart.loc[pl,pls].sum()[0::2])

for pl in pls:
    # Col-wise
    chart.loc['ToT',pl] = np.sum(chart[pl][:-1].sum()[1::2])
    
totscore = pd.DataFrame(columns=pls)
for pl in pls:
    #print(pl,chart.loc[pl,'Tot'], chart.loc['ToT',pl])
    totscore.loc['tot',pl] = chart.loc[pl,'Tot'] + chart.loc['ToT',pl]
print(totscore)
pl_rando vs pl_rando
1.50, 1.49
pl_rando (1st player) WINS!!!
---------------------------------------------
pl_rando vs pl_tit4tat
1.50, 1.50
pl_rando (1st player) WINS!!!
---------------------------------------------
pl_rando vs pl_good
2.51, 0.99
pl_rando (1st player) WINS!!!
---------------------------------------------
pl_rando vs pl_evil
0.50, 2.00
pl_evil (2nd player) WINS!!!
---------------------------------------------
pl_rando vs pl_last10avg
1.76, 1.37
pl_rando (1st player) WINS!!!
---------------------------------------------
pl_tit4tat vs pl_rando
1.50, 1.50
pl_rando (2nd player) WINS!!!
---------------------------------------------
pl_tit4tat vs pl_tit4tat
2.00, 2.00
IT'S A DRAW!!!
---------------------------------------------
pl_tit4tat vs pl_good
2.00, 2.00
IT'S A DRAW!!!
---------------------------------------------
pl_tit4tat vs pl_evil
1.00, 1.00
pl_evil (2nd player) WINS!!!
---------------------------------------------
pl_tit4tat vs pl_last10avg
2.00, 2.00
IT'S A DRAW!!!
---------------------------------------------
pl_good vs pl_rando
0.99, 2.51
pl_rando (2nd player) WINS!!!
---------------------------------------------
pl_good vs pl_tit4tat
2.00, 2.00
IT'S A DRAW!!!
---------------------------------------------
pl_good vs pl_good
2.00, 2.00
IT'S A DRAW!!!
---------------------------------------------
pl_good vs pl_evil
0.00, 3.00
pl_evil (2nd player) WINS!!!
---------------------------------------------
pl_good vs pl_last10avg
2.00, 2.00
IT'S A DRAW!!!
---------------------------------------------
pl_evil vs pl_rando
2.00, 0.50
pl_evil (1st player) WINS!!!
---------------------------------------------
pl_evil vs pl_tit4tat
1.00, 1.00
pl_evil (1st player) WINS!!!
---------------------------------------------
pl_evil vs pl_good
3.00, 0.00
pl_evil (1st player) WINS!!!
---------------------------------------------
pl_evil vs pl_evil
1.00, 1.00
IT'S A DRAW!!!
---------------------------------------------
pl_evil vs pl_last10avg
1.00, 1.00
pl_evil (1st player) WINS!!!
---------------------------------------------
pl_last10avg vs pl_rando
1.39, 1.73
pl_rando (2nd player) WINS!!!
---------------------------------------------
pl_last10avg vs pl_tit4tat
2.00, 2.00
IT'S A DRAW!!!
---------------------------------------------
pl_last10avg vs pl_good
2.00, 2.00
IT'S A DRAW!!!
---------------------------------------------
pl_last10avg vs pl_evil
1.00, 1.00
pl_evil (2nd player) WINS!!!
---------------------------------------------
pl_last10avg vs pl_last10avg
2.00, 2.00
IT'S A DRAW!!!
---------------------------------------------
                      pl_rando        pl_tit4tat          pl_good  \
pl_rando      (1.5029, 1.4903)  (1.5018, 1.5015)  (2.5055, 0.989)   
pl_tit4tat    (1.5012, 1.5015)        (2.0, 2.0)       (2.0, 2.0)   
pl_good       (0.9866, 2.5067)        (2.0, 2.0)       (2.0, 2.0)   
pl_evil       (1.9982, 0.5009)  (1.0002, 0.9999)       (3.0, 0.0)   
pl_last10avg  (1.3856, 1.7324)        (2.0, 2.0)       (2.0, 2.0)   

                       pl_evil      pl_last10avg  
pl_rando      (0.4982, 2.0036)    (1.7624, 1.37)  
pl_tit4tat    (0.9999, 1.0002)        (2.0, 2.0)  
pl_good             (0.0, 3.0)        (2.0, 2.0)  
pl_evil             (1.0, 1.0)  (1.0002, 0.9999)  
pl_last10avg  (0.9999, 1.0002)        (2.0, 2.0)  
    pl_rando pl_tit4tat  pl_good  pl_evil pl_last10avg
tot  13.7702    17.0025  13.9756  16.0026      16.7554
chart
pl_rando pl_tit4tat pl_good pl_evil pl_last10avg Tot
pl_rando (1.5029, 1.4903) (1.5018, 1.5015) (2.5055, 0.989) (0.4982, 2.0036) (1.7624, 1.37) 7.7708
pl_tit4tat (1.5012, 1.5015) (2.0, 2.0) (2.0, 2.0) (0.9999, 1.0002) (2.0, 2.0) 8.5011
pl_good (0.9866, 2.5067) (2.0, 2.0) (2.0, 2.0) (0.0, 3.0) (2.0, 2.0) 6.9866
pl_evil (1.9982, 0.5009) (1.0002, 0.9999) (3.0, 0.0) (1.0, 1.0) (1.0002, 0.9999) 7.9986
pl_last10avg (1.3856, 1.7324) (2.0, 2.0) (2.0, 2.0) (0.9999, 1.0002) (2.0, 2.0) 8.3855
ToT 5.9994 8.5014 6.989 8.004 8.3699 NaN
totscore
pl_rando pl_tit4tat pl_good pl_evil pl_last10avg
tot 13.7702 17.0025 13.9756 16.0026 16.7554
chart.loc['pl_evil',pls] # row-wise
pl_rando        (1.9982, 0.5009)
pl_tit4tat      (1.0002, 0.9999)
pl_good               (3.0, 0.0)
pl_evil               (1.0, 1.0)
pl_last10avg    (1.0002, 0.9999)
Name: pl_evil, dtype: object
chart.loc['pl_evil',pls].sum()[0::2]
(1.9982, 1.0002, 3.0, 1.0, 1.0002)
chart.loc['pl_evil','tot'] = np.sum(chart.loc['pl_evil',pls].sum()[0::2])
chart['pl_evil'] # col-wise
pl_rando        (0.4982, 2.0036)
pl_tit4tat      (0.9999, 1.0002)
pl_good               (0.0, 3.0)
pl_evil               (1.0, 1.0)
pl_last10avg    (0.9999, 1.0002)
ToT                        8.004
Name: pl_evil, dtype: object
chart['pl_evil'].sum()[1::2]
array([10.0076,  9.0042, 11.004 ,  9.004 ,  9.0042])
chart.loc['ToT','pl_evil'] = np.sum(chart['pl_evil'].sum()[1::2])
chart.loc['pl_evil','pl_good']
(3.0, 0.0)
chart.loc['pl_evil','tot'] = 5
chart
pl_rando pl_tit4tat pl_good pl_evil pl_last10avg Tot tot
pl_rando (1.5029, 1.4903) (1.5018, 1.5015) (2.5055, 0.989) (0.4982, 2.0036) (1.7624, 1.37) 7.7708 NaN
pl_tit4tat (1.5012, 1.5015) (2.0, 2.0) (2.0, 2.0) (0.9999, 1.0002) (2.0, 2.0) 8.5011 NaN
pl_good (0.9866, 2.5067) (2.0, 2.0) (2.0, 2.0) (0.0, 3.0) (2.0, 2.0) 6.9866 NaN
pl_evil (1.9982, 0.5009) (1.0002, 0.9999) (3.0, 0.0) (1.0, 1.0) (1.0002, 0.9999) 7.9986 5.0
pl_last10avg (1.3856, 1.7324) (2.0, 2.0) (2.0, 2.0) (0.9999, 1.0002) (2.0, 2.0) 8.3855 NaN
ToT 5.9994 8.5014 6.989 48.024 8.3699 NaN NaN
chart2 = chart.copy()
chart2
pl_rando pl_tit4tat pl_good pl_evil pl_last10avg Tot tot
pl_rando (1.5029, 1.4903) (1.5018, 1.5015) (2.5055, 0.989) (0.4982, 2.0036) (1.7624, 1.37) 7.7708 NaN
pl_tit4tat (1.5012, 1.5015) (2.0, 2.0) (2.0, 2.0) (0.9999, 1.0002) (2.0, 2.0) 8.5011 NaN
pl_good (0.9866, 2.5067) (2.0, 2.0) (2.0, 2.0) (0.0, 3.0) (2.0, 2.0) 6.9866 NaN
pl_evil (1.9982, 0.5009) (1.0002, 0.9999) (3.0, 0.0) (1.0, 1.0) (1.0002, 0.9999) 7.9986 5.0
pl_last10avg (1.3856, 1.7324) (2.0, 2.0) (2.0, 2.0) (0.9999, 1.0002) (2.0, 2.0) 8.3855 NaN
ToT 5.9994 8.5014 6.989 48.024 8.3699 NaN NaN
chart2.loc['ToT'] = 0
chart2
pl_rando pl_tit4tat pl_good pl_evil pl_last10avg Tot tot
pl_rando (1.5029, 1.4903) (1.5018, 1.5015) (2.5055, 0.989) (0.4982, 2.0036) (1.7624, 1.37) 7.7708 NaN
pl_tit4tat (1.5012, 1.5015) (2.0, 2.0) (2.0, 2.0) (0.9999, 1.0002) (2.0, 2.0) 8.5011 NaN
pl_good (0.9866, 2.5067) (2.0, 2.0) (2.0, 2.0) (0.0, 3.0) (2.0, 2.0) 6.9866 NaN
pl_evil (1.9982, 0.5009) (1.0002, 0.9999) (3.0, 0.0) (1.0, 1.0) (1.0002, 0.9999) 7.9986 5.0
pl_last10avg (1.3856, 1.7324) (2.0, 2.0) (2.0, 2.0) (0.9999, 1.0002) (2.0, 2.0) 8.3855 NaN
ToT 0 0 0 0 0 0.0000 0.0
chart2.loc['ToT','pl_tit4tat'] = 5
chart2
pl_rando pl_tit4tat pl_good pl_evil pl_last10avg Tot tot
pl_rando (1.5029, 1.4903) (1.5018, 1.5015) (2.5055, 0.989) (0.4982, 2.0036) (1.7624, 1.37) 7.7708 NaN
pl_tit4tat (1.5012, 1.5015) (2.0, 2.0) (2.0, 2.0) (0.9999, 1.0002) (2.0, 2.0) 8.5011 NaN
pl_good (0.9866, 2.5067) (2.0, 2.0) (2.0, 2.0) (0.0, 3.0) (2.0, 2.0) 6.9866 NaN
pl_evil (1.9982, 0.5009) (1.0002, 0.9999) (3.0, 0.0) (1.0, 1.0) (1.0002, 0.9999) 7.9986 5.0
pl_last10avg (1.3856, 1.7324) (2.0, 2.0) (2.0, 2.0) (0.9999, 1.0002) (2.0, 2.0) 8.3855 NaN
ToT 0 5 0 0 0 0.0000 0.0