chengstone · aohongchang · Feb 3, 2021 · Jul 8, 2021
diff --git a/.idea/codeStyles/Project.xml b/.idea/codeStyles/Project.xml
diff --git a/.idea/codeStyles/codeStyleConfig.xml b/.idea/codeStyles/codeStyleConfig.xml
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
diff --git a/ChessGame_tf2.py b/ChessGame_tf2.py
@@ -182,9 +182,9 @@ def change_player(self):
 
     def perform_AI(self):
         print ('...AI is calculating...')
-        START_TIME = time.clock()
+        START_TIME = time.perf_counter()
         move, win_rate = self.cchess_engine.select_move(self.ai_function)
-        time_used = time.clock() - START_TIME
+        time_used = time.perf_counter() - START_TIME
         print ('...Use %fs...' % time_used)
         if self.current_player == "w":
             self.time_red.append(time_used)

diff --git a/Mastering_Chess_and_Shogi_by_Self-Play_with_a_General_Reinforcement_Learning_Algorithm.ipynb b/Mastering_Chess_and_Shogi_by_Self-Play_with_a_General_Reinforcement_Learning_Algorithm.ipynb
@@ -579,7 +579,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.7"
+   "version": "3.8.5"
   }
  },
  "nbformat": 4,

diff --git a/Mastering_the_Game_of_Go_without_Human_Knowledge.ipynb b/Mastering_the_Game_of_Go_without_Human_Knowledge.ipynb
@@ -837,7 +837,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.7"
+   "version": "3.8.5"
   }
  },
  "nbformat": 4,

diff --git a/cchess-zero.ipynb b/cchess-zero.ipynb
@@ -82,7 +82,7 @@
     "我们是要抛弃人类棋谱的，学会如何下棋完全是通过自对弈来完成。\n",
     "\n",
     "过程是这样，首先生成棋谱，然后将棋谱作为输入训练神经网络，训练好的神经网络用来预测落子和胜率。如下图：\n",
-    "![a1](assets\\a1.png\")"
+    "<img src='assets\\a1.png'\\>"
    ]
   },
   {

diff --git a/chessman/__pycache__/Bing.cpython-35.pyc b/chessman/__pycache__/Bing.cpython-35.pyc
diff --git a/chessman/__pycache__/Che.cpython-35.pyc b/chessman/__pycache__/Che.cpython-35.pyc
diff --git a/chessman/__pycache__/Ma.cpython-35.pyc b/chessman/__pycache__/Ma.cpython-35.pyc
diff --git a/chessman/__pycache__/Pao.cpython-35.pyc b/chessman/__pycache__/Pao.cpython-35.pyc
diff --git a/chessman/__pycache__/Shi.cpython-35.pyc b/chessman/__pycache__/Shi.cpython-35.pyc
diff --git a/chessman/__pycache__/Shuai.cpython-35.pyc b/chessman/__pycache__/Shuai.cpython-35.pyc
diff --git a/chessman/__pycache__/Xiang.cpython-35.pyc b/chessman/__pycache__/Xiang.cpython-35.pyc
diff --git a/chessman/__pycache__/__init__.cpython-35.pyc b/chessman/__pycache__/__init__.cpython-35.pyc
diff --git a/main.py b/main.py
@@ -6,6 +6,11 @@
 asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
 
 import tensorflow as tf
+
+# from tensorflow.python.framework import ops
+# ops.reset_default_graph()
+
+
 import numpy as np
 import os
 import sys
@@ -20,13 +25,13 @@
 from threading import Lock
 from concurrent.futures import ThreadPoolExecutor
 
-def flipped_uci_labels(param):
+def flipped_uci_labels(param):#快速翻动标签
     def repl(x):
         return "".join([(str(9 - int(a)) if a.isdigit() else a) for a in x])
-
+# Python isdigit() 方法检测字符串是否只由数字组成。
     return [repl(x) for x in param]
 
-# 创建所有合法走子UCI，size 2086
+# 创建所有合法走子UCI，size 2086      UCCI中国象棋通用引擎协议
 def create_uci_labels():
     labels_array = []
     letters = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i']
@@ -51,6 +56,7 @@ def create_uci_labels():
                            [(l1, t) for t in range(10)] + \
                            [(l1 + a, n1 + b) for (a, b) in
                             [(-2, -1), (-1, -2), (-2, 1), (1, -2), (2, -1), (-1, 2), (2, 1), (1, 2)]]  # 马走日
+                        #z画一个坐标系可以看出来，这些点是代表马🐎在当前位置可以走的8个位置
             for (l2, n2) in destinations:
                 if (l1, n1) != (l2, n2) and l2 in range(9) and n2 in range(10):
                     move = letters[l1] + numbers[n1] + letters[l2] + numbers[n2]
@@ -64,11 +70,11 @@ def create_uci_labels():
 
     return labels_array
 
-def create_position_labels():
+def create_position_labels():#创建一个位置/安置标签
     labels_array = []
-    letters = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i']
+    letters = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i']#横坐标
     letters.reverse()
-    numbers = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
+    numbers = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']#纵坐标
 
     for l1 in range(9):
         for n1 in range(10):
@@ -77,7 +83,7 @@ def create_position_labels():
 #     labels_array.reverse()
     return labels_array
 
-def create_position_labels_reverse():
+def create_position_labels_reverse(): #创建一个新的位置标签
     labels_array = []
     letters = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i']
     letters.reverse()
@@ -90,7 +96,7 @@ def create_position_labels_reverse():
     labels_array.reverse()
     return labels_array
 
-class leaf_node(object):
+class leaf_node(object): #目标叶子节点
     def __init__(self, in_parent, in_prior_p, in_state):
         self.P = in_prior_p
         self.Q = 0
@@ -120,6 +126,8 @@ def get_Q_plus_U(self, c_puct):
         this node's prior adjusted for its visit count, u
         c_puct -- a number in (0, inf) controlling the relative impact of values, Q, and
             prior probability, P, on this node's score.
+            计算并返回该节点的值:叶计算、Q和 该节点已根据其访问计数
+            (u c_puck——(0,inf)中的一个数字，控制值Q和的相对影响 在这个节点的scor上，先验概率P
         """
         # self._u = c_puct * self._P * np.sqrt(self._parent._n_visits) / (1 + self._n_visits)
         self.U = c_puct * self.P * np.sqrt(self.parent.N) / ( 1 + self.N)
@@ -1333,7 +1341,9 @@ def get_action(self, state, temperature = 1e-3):
         # for i in range(self.playout_counts):
         #     state_sim = copy.deepcopy(state)
         #     self.mcts.do_simulation(state_sim, self.game_borad.current_player, self.game_borad.restrict_round)
-
+        # 取得当前局面下所有子节点的合法走子和相应的访问量。
+        # 这个所有子节点可能并不会覆盖所有合法的走子，这个是由树搜索的质量决定的，加大模拟次数会搜索更多不同的走法，
+        # 就是加大思考的深度，考虑更多的局面，避免出现有些特别重要的棋步却没有考虑到的情况。
         self.mcts.main(state, self.game_borad.current_player, self.game_borad.restrict_round, self.playout_counts)
 
         actions_visits = [(act, nod.N) for act, nod in self.mcts.root.child.items()]

diff --git a/policy_value_network.py b/policy_value_network.py
@@ -1,7 +1,7 @@
 #coding:utf-8
 import tensorflow as tf
+from tensorflow.python.framework import ops
 import numpy as np
-
 import os
 
 
@@ -12,7 +12,7 @@ def __init__(self, res_block_nums = 7):
         self.is_logging = True
 
         """reset TF Graph"""
-        tf.reset_default_graph()
+        ops.reset_default_graph()
         """Creat a new graph for the network"""
         # g = tf.Graph()