Implementation of Prisonner’s Dilemma with Memory

Implementation of Prisonner’s Dilemma with Memory#

import numpy as np
import pandas as pd

history = np.empty((0,2),int)

for i in range(10):
    history = np.vstack((history,np.array([np.round(np.random.rand()),0]).astype(int)))

history

array([[0, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [0, 0],
       [1, 0],
       [1, 0],
       [0, 0],
       [0, 0]])

def pick_fight(pl1,pl2,N=10):
    history = np.empty((0,2),int)
    for i in range(N):
        (a,b) = [pl1(0,history),pl2(1,history)]
        #print(a,b)
        history = np.vstack((history,np.array([a,b])))
    return history

def score_history(history):
    # Returns the mean score
    s00 = np.sum((h[:,0] == 0 ) & (h[:,1] == 0)) # Both silent (2,2)
    s01 = np.sum((h[:,0] == 0 ) & (h[:,1] == 1)) # A silent,  B defects (0,3)
    s10 = np.sum((h[:,0] == 1 ) & (h[:,1] == 0)) # A defects, B silent (3,0)
    s11 = np.sum((h[:,0] == 1 ) & (h[:,1] == 1)) # Both defect (1,1)
    
    return np.array([s00*2+s10*3+s11*1,s00*2+s01*3+s11*1])/history.shape[0]

def pl_tit4tat(pl_no=0,history=[]):
    # starts with silence (0)
    # afterwards, returns the last action of the opponent
    opp_no = np.mod(pl_no+1,2)
    if(not history.size):
        return 0
    return history[-1,opp_no]

def pl_rando(pl_no=0,history=[]):
    # returns random
    return np.random.randint(0,2)

def pl_good(pl_no=0,history=[]):
    # always silent
    return 0

def pl_evil(pl_no=0,history=[]):
    # always defects
    return 1

def pl_last10avg(pl_no,history=[]):
    # Starts with silence (0)
    # Returns the average of opponent's last 10 turns
    opp_no = np.mod(pl_no+1,2)
    if(not history.size):
        return 0
    return int(np.round(np.mean(history[-10:,opp_no])))
    

pls = ['pl_rando','pl_tit4tat','pl_good','pl_evil','pl_last10avg']

chart = pd.DataFrame(columns=pls,index=pls)

no_pl = len(pls)
for i in range(no_pl):
    for j in range(no_pl):
        print("{:s} vs {:s}".format(pls[i],pls[j]))
        p1 = locals()[pls[i]]
        p2 = locals()[pls[j]]
        h = pick_fight(p1,p2,N=10000)
        (a,b) = score_history(h)
        print("{:.2f}, {:.2f}".format(a,b))
        if(a > b):
            print("{:s} (1st player) WINS!!!".format(pls[i]))
        elif(a < b):
            print("{:s} (2nd player) WINS!!!".format(pls[j]))
        else:
            print("IT'S A DRAW!!!")
        print("-"*45)
        
        chart.loc[pls[i]][pls[j]] = (a,b)
print(chart)
for pl in pls:
    # Row-wise        
    chart.loc[pl,'Tot'] = np.sum(chart.loc[pl,pls].sum()[0::2])

for pl in pls:
    # Col-wise
    chart.loc['ToT',pl] = np.sum(chart[pl][:-1].sum()[1::2])
    
totscore = pd.DataFrame(columns=pls)
for pl in pls:
    #print(pl,chart.loc[pl,'Tot'], chart.loc['ToT',pl])
    totscore.loc['tot',pl] = chart.loc[pl,'Tot'] + chart.loc['ToT',pl]
print(totscore)

pl_rando vs pl_rando
1.50, 1.49
pl_rando (1st player) WINS!!!
---------------------------------------------
pl_rando vs pl_tit4tat

1.50, 1.50
pl_rando (1st player) WINS!!!
---------------------------------------------
pl_rando vs pl_good
2.51, 0.99
pl_rando (1st player) WINS!!!
---------------------------------------------
pl_rando vs pl_evil
0.50, 2.00
pl_evil (2nd player) WINS!!!
---------------------------------------------
pl_rando vs pl_last10avg

1.76, 1.37
pl_rando (1st player) WINS!!!
---------------------------------------------
pl_tit4tat vs pl_rando
1.50, 1.50
pl_rando (2nd player) WINS!!!
---------------------------------------------
pl_tit4tat vs pl_tit4tat

2.00, 2.00
IT'S A DRAW!!!
---------------------------------------------
pl_tit4tat vs pl_good
2.00, 2.00
IT'S A DRAW!!!
---------------------------------------------
pl_tit4tat vs pl_evil

1.00, 1.00
pl_evil (2nd player) WINS!!!
---------------------------------------------
pl_tit4tat vs pl_last10avg

2.00, 2.00
IT'S A DRAW!!!
---------------------------------------------
pl_good vs pl_rando
0.99, 2.51
pl_rando (2nd player) WINS!!!
---------------------------------------------
pl_good vs pl_tit4tat

2.00, 2.00
IT'S A DRAW!!!
---------------------------------------------
pl_good vs pl_good
2.00, 2.00
IT'S A DRAW!!!
---------------------------------------------
pl_good vs pl_evil
0.00, 3.00
pl_evil (2nd player) WINS!!!
---------------------------------------------
pl_good vs pl_last10avg

2.00, 2.00
IT'S A DRAW!!!
---------------------------------------------
pl_evil vs pl_rando
2.00, 0.50
pl_evil (1st player) WINS!!!
---------------------------------------------
pl_evil vs pl_tit4tat
1.00, 1.00
pl_evil (1st player) WINS!!!
---------------------------------------------
pl_evil vs pl_good

3.00, 0.00
pl_evil (1st player) WINS!!!
---------------------------------------------
pl_evil vs pl_evil
1.00, 1.00
IT'S A DRAW!!!
---------------------------------------------
pl_evil vs pl_last10avg

1.00, 1.00
pl_evil (1st player) WINS!!!
---------------------------------------------
pl_last10avg vs pl_rando

1.39, 1.73
pl_rando (2nd player) WINS!!!
---------------------------------------------
pl_last10avg vs pl_tit4tat

2.00, 2.00
IT'S A DRAW!!!
---------------------------------------------
pl_last10avg vs pl_good

2.00, 2.00
IT'S A DRAW!!!
---------------------------------------------
pl_last10avg vs pl_evil
1.00, 1.00
pl_evil (2nd player) WINS!!!
---------------------------------------------
pl_last10avg vs pl_last10avg

2.00, 2.00
IT'S A DRAW!!!
---------------------------------------------
                      pl_rando        pl_tit4tat          pl_good  \
pl_rando      (1.5029, 1.4903)  (1.5018, 1.5015)  (2.5055, 0.989)   
pl_tit4tat    (1.5012, 1.5015)        (2.0, 2.0)       (2.0, 2.0)   
pl_good       (0.9866, 2.5067)        (2.0, 2.0)       (2.0, 2.0)   
pl_evil       (1.9982, 0.5009)  (1.0002, 0.9999)       (3.0, 0.0)   
pl_last10avg  (1.3856, 1.7324)        (2.0, 2.0)       (2.0, 2.0)   

                       pl_evil      pl_last10avg  
pl_rando      (0.4982, 2.0036)    (1.7624, 1.37)  
pl_tit4tat    (0.9999, 1.0002)        (2.0, 2.0)  
pl_good             (0.0, 3.0)        (2.0, 2.0)  
pl_evil             (1.0, 1.0)  (1.0002, 0.9999)  
pl_last10avg  (0.9999, 1.0002)        (2.0, 2.0)  
    pl_rando pl_tit4tat  pl_good  pl_evil pl_last10avg
tot  13.7702    17.0025  13.9756  16.0026      16.7554

chart

	pl_rando	pl_tit4tat	pl_good	pl_evil	pl_last10avg	Tot
pl_rando	(1.5029, 1.4903)	(1.5018, 1.5015)	(2.5055, 0.989)	(0.4982, 2.0036)	(1.7624, 1.37)	7.7708
pl_tit4tat	(1.5012, 1.5015)	(2.0, 2.0)	(2.0, 2.0)	(0.9999, 1.0002)	(2.0, 2.0)	8.5011
pl_good	(0.9866, 2.5067)	(2.0, 2.0)	(2.0, 2.0)	(0.0, 3.0)	(2.0, 2.0)	6.9866
pl_evil	(1.9982, 0.5009)	(1.0002, 0.9999)	(3.0, 0.0)	(1.0, 1.0)	(1.0002, 0.9999)	7.9986
pl_last10avg	(1.3856, 1.7324)	(2.0, 2.0)	(2.0, 2.0)	(0.9999, 1.0002)	(2.0, 2.0)	8.3855
ToT	5.9994	8.5014	6.989	8.004	8.3699	NaN

totscore

	pl_rando	pl_tit4tat	pl_good	pl_evil	pl_last10avg
tot	13.7702	17.0025	13.9756	16.0026	16.7554

chart.loc['pl_evil',pls] # row-wise

pl_rando        (1.9982, 0.5009)
pl_tit4tat      (1.0002, 0.9999)
pl_good               (3.0, 0.0)
pl_evil               (1.0, 1.0)
pl_last10avg    (1.0002, 0.9999)
Name: pl_evil, dtype: object

chart.loc['pl_evil',pls].sum()[0::2]

(1.9982, 1.0002, 3.0, 1.0, 1.0002)

chart.loc['pl_evil','tot'] = np.sum(chart.loc['pl_evil',pls].sum()[0::2])

chart['pl_evil'] # col-wise

pl_rando        (0.4982, 2.0036)
pl_tit4tat      (0.9999, 1.0002)
pl_good               (0.0, 3.0)
pl_evil               (1.0, 1.0)
pl_last10avg    (0.9999, 1.0002)
ToT                        8.004
Name: pl_evil, dtype: object

chart['pl_evil'].sum()[1::2]

array([10.0076,  9.0042, 11.004 ,  9.004 ,  9.0042])

chart.loc['ToT','pl_evil'] = np.sum(chart['pl_evil'].sum()[1::2])

chart.loc['pl_evil','pl_good']

(3.0, 0.0)

chart.loc['pl_evil','tot'] = 5

chart

	pl_rando	pl_tit4tat	pl_good	pl_evil	pl_last10avg	Tot	tot
pl_rando	(1.5029, 1.4903)	(1.5018, 1.5015)	(2.5055, 0.989)	(0.4982, 2.0036)	(1.7624, 1.37)	7.7708	NaN
pl_tit4tat	(1.5012, 1.5015)	(2.0, 2.0)	(2.0, 2.0)	(0.9999, 1.0002)	(2.0, 2.0)	8.5011	NaN
pl_good	(0.9866, 2.5067)	(2.0, 2.0)	(2.0, 2.0)	(0.0, 3.0)	(2.0, 2.0)	6.9866	NaN
pl_evil	(1.9982, 0.5009)	(1.0002, 0.9999)	(3.0, 0.0)	(1.0, 1.0)	(1.0002, 0.9999)	7.9986	5.0
pl_last10avg	(1.3856, 1.7324)	(2.0, 2.0)	(2.0, 2.0)	(0.9999, 1.0002)	(2.0, 2.0)	8.3855	NaN
ToT	5.9994	8.5014	6.989	48.024	8.3699	NaN	NaN

chart2 = chart.copy()

chart2

	pl_rando	pl_tit4tat	pl_good	pl_evil	pl_last10avg	Tot	tot
pl_rando	(1.5029, 1.4903)	(1.5018, 1.5015)	(2.5055, 0.989)	(0.4982, 2.0036)	(1.7624, 1.37)	7.7708	NaN
pl_tit4tat	(1.5012, 1.5015)	(2.0, 2.0)	(2.0, 2.0)	(0.9999, 1.0002)	(2.0, 2.0)	8.5011	NaN
pl_good	(0.9866, 2.5067)	(2.0, 2.0)	(2.0, 2.0)	(0.0, 3.0)	(2.0, 2.0)	6.9866	NaN
pl_evil	(1.9982, 0.5009)	(1.0002, 0.9999)	(3.0, 0.0)	(1.0, 1.0)	(1.0002, 0.9999)	7.9986	5.0
pl_last10avg	(1.3856, 1.7324)	(2.0, 2.0)	(2.0, 2.0)	(0.9999, 1.0002)	(2.0, 2.0)	8.3855	NaN
ToT	5.9994	8.5014	6.989	48.024	8.3699	NaN	NaN

chart2.loc['ToT'] = 0

chart2

	pl_rando	pl_tit4tat	pl_good	pl_evil	pl_last10avg	Tot	tot
pl_rando	(1.5029, 1.4903)	(1.5018, 1.5015)	(2.5055, 0.989)	(0.4982, 2.0036)	(1.7624, 1.37)	7.7708	NaN
pl_tit4tat	(1.5012, 1.5015)	(2.0, 2.0)	(2.0, 2.0)	(0.9999, 1.0002)	(2.0, 2.0)	8.5011	NaN
pl_good	(0.9866, 2.5067)	(2.0, 2.0)	(2.0, 2.0)	(0.0, 3.0)	(2.0, 2.0)	6.9866	NaN
pl_evil	(1.9982, 0.5009)	(1.0002, 0.9999)	(3.0, 0.0)	(1.0, 1.0)	(1.0002, 0.9999)	7.9986	5.0
pl_last10avg	(1.3856, 1.7324)	(2.0, 2.0)	(2.0, 2.0)	(0.9999, 1.0002)	(2.0, 2.0)	8.3855	NaN
ToT	0	0	0	0	0	0.0000	0.0

chart2.loc['ToT','pl_tit4tat'] = 5

chart2

	pl_rando	pl_tit4tat	pl_good	pl_evil	pl_last10avg	Tot	tot
pl_rando	(1.5029, 1.4903)	(1.5018, 1.5015)	(2.5055, 0.989)	(0.4982, 2.0036)	(1.7624, 1.37)	7.7708	NaN
pl_tit4tat	(1.5012, 1.5015)	(2.0, 2.0)	(2.0, 2.0)	(0.9999, 1.0002)	(2.0, 2.0)	8.5011	NaN
pl_good	(0.9866, 2.5067)	(2.0, 2.0)	(2.0, 2.0)	(0.0, 3.0)	(2.0, 2.0)	6.9866	NaN
pl_evil	(1.9982, 0.5009)	(1.0002, 0.9999)	(3.0, 0.0)	(1.0, 1.0)	(1.0002, 0.9999)	7.9986	5.0
pl_last10avg	(1.3856, 1.7324)	(2.0, 2.0)	(2.0, 2.0)	(0.9999, 1.0002)	(2.0, 2.0)	8.3855	NaN
ToT	0	5	0	0	0	0.0000	0.0