diff --git a/.idea/codeStyles/Project.xml b/.idea/codeStyles/Project.xml new file mode 100644 index 0000000..acb4196 --- /dev/null +++ b/.idea/codeStyles/Project.xml @@ -0,0 +1,9 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/codeStyles/codeStyleConfig.xml b/.idea/codeStyles/codeStyleConfig.xml new file mode 100644 index 0000000..a55e7a1 --- /dev/null +++ b/.idea/codeStyles/codeStyleConfig.xml @@ -0,0 +1,5 @@ + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/ChessGame_tf2.py b/ChessGame_tf2.py index 279a08f..4c3a7dd 100755 --- a/ChessGame_tf2.py +++ b/ChessGame_tf2.py @@ -182,9 +182,9 @@ def change_player(self): def perform_AI(self): print ('...AI is calculating...') - START_TIME = time.clock() + START_TIME = time.perf_counter() move, win_rate = self.cchess_engine.select_move(self.ai_function) - time_used = time.clock() - START_TIME + time_used = time.perf_counter() - START_TIME print ('...Use %fs...' % time_used) if self.current_player == "w": self.time_red.append(time_used) diff --git a/Mastering_Chess_and_Shogi_by_Self-Play_with_a_General_Reinforcement_Learning_Algorithm.ipynb b/Mastering_Chess_and_Shogi_by_Self-Play_with_a_General_Reinforcement_Learning_Algorithm.ipynb index 8d7c777..00ccd44 100644 --- a/Mastering_Chess_and_Shogi_by_Self-Play_with_a_General_Reinforcement_Learning_Algorithm.ipynb +++ b/Mastering_Chess_and_Shogi_by_Self-Play_with_a_General_Reinforcement_Learning_Algorithm.ipynb @@ -579,7 +579,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.7" + "version": "3.8.5" } }, "nbformat": 4, diff --git a/Mastering_the_Game_of_Go_without_Human_Knowledge.ipynb b/Mastering_the_Game_of_Go_without_Human_Knowledge.ipynb index 6626403..5df5189 100644 --- a/Mastering_the_Game_of_Go_without_Human_Knowledge.ipynb +++ b/Mastering_the_Game_of_Go_without_Human_Knowledge.ipynb @@ -837,7 +837,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.7" + "version": "3.8.5" } }, "nbformat": 4, diff --git a/cchess-zero.ipynb b/cchess-zero.ipynb index 229469e..8ea1a9c 100644 --- a/cchess-zero.ipynb +++ b/cchess-zero.ipynb @@ -82,7 +82,7 @@ "我们是要抛弃人类棋谱的,学会如何下棋完全是通过自对弈来完成。\n", "\n", "过程是这样,首先生成棋谱,然后将棋谱作为输入训练神经网络,训练好的神经网络用来预测落子和胜率。如下图:\n", - "![a1](assets\\a1.png\")" + "" ] }, { diff --git a/chessman/__pycache__/Bing.cpython-35.pyc b/chessman/__pycache__/Bing.cpython-35.pyc index 2d8028c..8747aa4 100644 Binary files a/chessman/__pycache__/Bing.cpython-35.pyc and b/chessman/__pycache__/Bing.cpython-35.pyc differ diff --git a/chessman/__pycache__/Che.cpython-35.pyc b/chessman/__pycache__/Che.cpython-35.pyc index 441c38d..988e295 100644 Binary files a/chessman/__pycache__/Che.cpython-35.pyc and b/chessman/__pycache__/Che.cpython-35.pyc differ diff --git a/chessman/__pycache__/Ma.cpython-35.pyc b/chessman/__pycache__/Ma.cpython-35.pyc index 78db3bc..ed92bae 100644 Binary files a/chessman/__pycache__/Ma.cpython-35.pyc and b/chessman/__pycache__/Ma.cpython-35.pyc differ diff --git a/chessman/__pycache__/Pao.cpython-35.pyc b/chessman/__pycache__/Pao.cpython-35.pyc index 2fd0631..364e38b 100644 Binary files a/chessman/__pycache__/Pao.cpython-35.pyc and b/chessman/__pycache__/Pao.cpython-35.pyc differ diff --git a/chessman/__pycache__/Shi.cpython-35.pyc b/chessman/__pycache__/Shi.cpython-35.pyc index 16d4355..55c01cf 100644 Binary files a/chessman/__pycache__/Shi.cpython-35.pyc and b/chessman/__pycache__/Shi.cpython-35.pyc differ diff --git a/chessman/__pycache__/Shuai.cpython-35.pyc b/chessman/__pycache__/Shuai.cpython-35.pyc index e9e3ead..b7b9587 100644 Binary files a/chessman/__pycache__/Shuai.cpython-35.pyc and b/chessman/__pycache__/Shuai.cpython-35.pyc differ diff --git a/chessman/__pycache__/Xiang.cpython-35.pyc b/chessman/__pycache__/Xiang.cpython-35.pyc index 9f88bab..85199bd 100644 Binary files a/chessman/__pycache__/Xiang.cpython-35.pyc and b/chessman/__pycache__/Xiang.cpython-35.pyc differ diff --git a/chessman/__pycache__/__init__.cpython-35.pyc b/chessman/__pycache__/__init__.cpython-35.pyc old mode 100755 new mode 100644 index d2aac0b..f37b022 Binary files a/chessman/__pycache__/__init__.cpython-35.pyc and b/chessman/__pycache__/__init__.cpython-35.pyc differ diff --git a/main.py b/main.py index 5f09bbb..a10da15 100755 --- a/main.py +++ b/main.py @@ -6,6 +6,11 @@ asyncio.set_event_loop_policy(uvloop.EventLoopPolicy()) import tensorflow as tf + +# from tensorflow.python.framework import ops +# ops.reset_default_graph() + + import numpy as np import os import sys @@ -20,13 +25,13 @@ from threading import Lock from concurrent.futures import ThreadPoolExecutor -def flipped_uci_labels(param): +def flipped_uci_labels(param):#快速翻动标签 def repl(x): return "".join([(str(9 - int(a)) if a.isdigit() else a) for a in x]) - +# Python isdigit() 方法检测字符串是否只由数字组成。 return [repl(x) for x in param] -# 创建所有合法走子UCI,size 2086 +# 创建所有合法走子UCI,size 2086 UCCI中国象棋通用引擎协议 def create_uci_labels(): labels_array = [] letters = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i'] @@ -51,6 +56,7 @@ def create_uci_labels(): [(l1, t) for t in range(10)] + \ [(l1 + a, n1 + b) for (a, b) in [(-2, -1), (-1, -2), (-2, 1), (1, -2), (2, -1), (-1, 2), (2, 1), (1, 2)]] # 马走日 + #z画一个坐标系可以看出来,这些点是代表马🐎在当前位置可以走的8个位置 for (l2, n2) in destinations: if (l1, n1) != (l2, n2) and l2 in range(9) and n2 in range(10): move = letters[l1] + numbers[n1] + letters[l2] + numbers[n2] @@ -64,11 +70,11 @@ def create_uci_labels(): return labels_array -def create_position_labels(): +def create_position_labels():#创建一个位置/安置标签 labels_array = [] - letters = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i'] + letters = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i']#横坐标 letters.reverse() - numbers = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] + numbers = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']#纵坐标 for l1 in range(9): for n1 in range(10): @@ -77,7 +83,7 @@ def create_position_labels(): # labels_array.reverse() return labels_array -def create_position_labels_reverse(): +def create_position_labels_reverse(): #创建一个新的位置标签 labels_array = [] letters = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i'] letters.reverse() @@ -90,7 +96,7 @@ def create_position_labels_reverse(): labels_array.reverse() return labels_array -class leaf_node(object): +class leaf_node(object): #目标叶子节点 def __init__(self, in_parent, in_prior_p, in_state): self.P = in_prior_p self.Q = 0 @@ -120,6 +126,8 @@ def get_Q_plus_U(self, c_puct): this node's prior adjusted for its visit count, u c_puct -- a number in (0, inf) controlling the relative impact of values, Q, and prior probability, P, on this node's score. + 计算并返回该节点的值:叶计算、Q和 该节点已根据其访问计数 + (u c_puck——(0,inf)中的一个数字,控制值Q和的相对影响 在这个节点的scor上,先验概率P """ # self._u = c_puct * self._P * np.sqrt(self._parent._n_visits) / (1 + self._n_visits) self.U = c_puct * self.P * np.sqrt(self.parent.N) / ( 1 + self.N) @@ -1333,7 +1341,9 @@ def get_action(self, state, temperature = 1e-3): # for i in range(self.playout_counts): # state_sim = copy.deepcopy(state) # self.mcts.do_simulation(state_sim, self.game_borad.current_player, self.game_borad.restrict_round) - + # 取得当前局面下所有子节点的合法走子和相应的访问量。 + # 这个所有子节点可能并不会覆盖所有合法的走子,这个是由树搜索的质量决定的,加大模拟次数会搜索更多不同的走法, + # 就是加大思考的深度,考虑更多的局面,避免出现有些特别重要的棋步却没有考虑到的情况。 self.mcts.main(state, self.game_borad.current_player, self.game_borad.restrict_round, self.playout_counts) actions_visits = [(act, nod.N) for act, nod in self.mcts.root.child.items()] diff --git a/policy_value_network.py b/policy_value_network.py index bb6c5f9..719823f 100755 --- a/policy_value_network.py +++ b/policy_value_network.py @@ -1,7 +1,7 @@ #coding:utf-8 import tensorflow as tf +from tensorflow.python.framework import ops import numpy as np - import os @@ -12,7 +12,7 @@ def __init__(self, res_block_nums = 7): self.is_logging = True """reset TF Graph""" - tf.reset_default_graph() + ops.reset_default_graph() """Creat a new graph for the network""" # g = tf.Graph()