emuro · emuro · Jul 18, 2019 · Jul 18, 2019 · Jul 18, 2019 · Jul 18, 2019
diff --git a/.github/deploy_docs.sh b/.github/deploy_docs.sh
@@ -72,15 +72,15 @@ DEST_SLUG=biopython/docs
 # Biopython was installed to run Sphinx and build the docs,
 # can use this:
 DEST_DIR=`python -c "import Bio; v=Bio.__version__; print('dev' if 'dev' in v else v)"`
-SOURCE_DIR=${TRAVIS_BUILD_DIR:-$PWD}/Doc/api/_build/html
+SOURCE_DIR=${TRAVIS_BUILD_DIR:-$PWD}/Doc/_build/html
 WORKING_DIR=/tmp/deploy_biopython_docs
 
 if [ -z "$DEST_DIR" ]; then
    echo "ERROR: Failed to get Biopython version, is it not installed?"
    python -c "import Bio; print(Bio.__version__)"
    false
 fi
-DEST_DIR=$DEST_DIR/api
+DEST_DIR=$DEST_DIR/
 echo "Aiming to deploy $SOURCE_DIR to $DEST_SLUG branch gh-pages as $DEST_DIR"
 
 # On TravisCI, must create the variable using '\ ' and '\n', so

diff --git a/.github/ref_fix.py b/.github/ref_fix.py
@@ -0,0 +1,47 @@
+#!/usr/bin/env python
+import re
+import sys
+
+# re_link = re.compile(r"`\[(.+)\] <#(.+)>`__")
+re_link = re.compile(r"`\[([A-Za-z0-9_:\-.]+)\] <#([A-Za-z0-9_:\-.]+)>`__")
+
+assert re_link.findall(r"Chapter \ `[chapter:quick_start] <#chapter:quick_start>`__ before\n")
+assert re_link.findall(r"(see Section `[sec:appendix-handles] <#sec:appendix-handles>`__):")
+assert re_link.findall(r"Section `[sec:Bio.SeqIO-and-StringIO] <#sec:Bio.SeqIO-and-StringIO>`__):")
+assert re_link.findall(r"Figure `[fig:three_track_cl2] <#fig:three_track_cl2>`__.")
+assert list(re_link.finditer(r"in Chapter \ `[chapter:seq_annot] <#chapter:seq_annot>`__. This aims to"))
+assert len(list(re_link.finditer(r"functions (`[eq:OP] <#eq:OP>`__) and (`[eq:NOP] <#eq:NOP>`__)."))) == 2
+
+re_section = re.compile(r"`[0-9.]+ <#([A-Za-z0-9_:\-.]+)>`__")
+
+assert re_section.findall(r"the label’s color (used in Section `1.1.9 <#sec:gd_nice_example>`__).")
+
+def fix_line(line):
+    # e.g. Chapter \ `[chapter:quick_start] <#chapter:quick_start>`__
+    line = line.replace("\xa0\\ ", " ")
+    for match in re_link.finditer(line):
+        old = match.group()
+        ref = match.group(1)
+        assert ref == match.group(2), old
+        new = r":ref:`%s`" % ref
+        line = line.replace(old, new)
+        print("%s -> %s" % (old, new))
+    for match in re_section.finditer(line):
+        old = match.group()
+        ref = match.group(1)
+        assert old.endswith("<#%s>`__" % ref), old
+        new = r":ref:`%s`" % ref
+        line = line.replace(old, new)
+        print("%s -> %s" % (old, new))
+    return line
+
+def fix_file(filename):
+    with open(filename) as handle:
+        lines = list(handle)
+    with open(filename, "w") as handle:
+        for line in lines:
+            handle.write(fix_line(line))
+
+for f in sys.argv[1:]:
+    sys.stderr.write("Fixing %s\n" % f)
+    fix_file(f)
diff --git a/.github/requirements-rtd.txt b/.github/requirements-rtd.txt
@@ -0,0 +1,3 @@
+# dot to mean current dir, i.e. Biopython itself
+.
+numpydoc
diff --git a/.github/rst_chunk.py b/.github/rst_chunk.py
@@ -0,0 +1,20 @@
+#!/usr/bin/env python
+import sys
+
+if len(sys.argv) == 2:
+    prefix = sys.argv[1]
+else:
+    prefix = "chapter"
+marker = ".. _%s" % prefix
+
+handle = sys.stdout
+for line in sys.stdin:
+    if line.rstrip().startswith(marker) and line.rstrip()[-1] == ":":
+        if handle != sys.stdout:
+            handle.close()
+        filename = "%s_%s.rst" % (prefix, line.rstrip()[len(marker) + 1:-1])
+        sys.stderr.write("Starting %s\n" % filename)
+        handle = open(filename, "w")
+    handle.write(line)
+if handle != sys.stdout:
+    handle.close()
diff --git a/.travis-tox.ini b/.travis-tox.ini
@@ -166,5 +166,5 @@ deps =
     sphinx_rtd_theme
 commands =
     bash -c \'python setup.py install > /dev/null\'
-    bash -c \'mkdir -p Doc/api/_templates Doc/api/_static Doc/api/_build\'
-    make -C Doc/api/ html
+    bash -c \'mkdir -p Doc/_templates Doc/_static Doc/_build\'
+    make -C Doc/ html
diff --git a/Doc/Makefile b/Doc/Makefile
@@ -1,51 +1,20 @@
-subdirs :=
-
-
-all:  Tutorial.html Tutorial.txt pdf $(subdirs)
-pdf:  Tutorial.pdf biopdb_faq.pdf
-
-Tutorial.pdf: Tutorial.tex Tutorial/chapter_*.tex
-	pdflatex --shell-escape Tutorial.tex
-	pdflatex --shell-escape Tutorial.tex
-	pdflatex --shell-escape Tutorial.tex
-
-biopdb_faq.pdf: biopdb_faq.tex
-	pdflatex biopdb_faq.tex
-	pdflatex biopdb_faq.tex
-	pdflatex biopdb_faq.tex
-
-Tutorial.html: Tutorial.tex Tutorial/chapter_*.tex
-	hevea -fix Tutorial.tex
-
-Tutorial.txt: Tutorial.tex Tutorial/chapter_*.tex
-	hevea -fix -text Tutorial.tex
-
-clean-subdirs: $(subdirs)
-	( for f in $^ ; do $(MAKE) clean -C $$f ; done )
-
-clean: clean-subdirs
-	rm -f Tutorial.aux
-	rm -f Tutorial.toc
-	rm -f Tutorial.log
-	rm -f Tutorial.out
-	rm -f Tutorial.haux
-	rm -f Tutorial.htoc
-	rm -f biopdb_faq.aux
-	rm -f biopdb_faq.log
-	rm -f biopdb_faq.out
-	rm -f Tutorial/*.aux
-
-distclean-subdirs: $(subdirs)
-	( for f in $^ ; do $(MAKE) distclean -C $$f ; done )
-
-distclean: clean distclean-subdirs
-	rm -f biopdb_faq.pdf
-	rm -f Tutorial.pdf
-	rm -f Tutorial.html
-	rm -f Tutorial.txt
-	rm -f *_motif.gif #output from hacha
-
-.PHONY: $(subdirs)
-$(subdirs):
-	$(MAKE) -C $@
-
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line.
+SPHINXOPTS    =
+SPHINXBUILD   = sphinx-build
+SPHINXPROJ    = Bio
+SOURCEDIR     = .
+BUILDDIR      = _build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/Doc/Tutorial.tex b/Doc/Tutorial.tex
@@ -45,57 +45,18 @@
 \documentclass{report}
 \usepackage{url}
 \usepackage{fullpage}
-\usepackage{hevea}
 \usepackage{graphicx}
-
-% For syntax coloring of python, pycon, bash etc in pdflatex:
 \usepackage{minted}
-% Minted fails on hevea, https://github.com/gpoore/minted/issues/234
-% silently fall back on verbatim - ignore the language argument:
-%HEVEA \newenvironment{minted}[1]{\verbatim}{\endverbatim}
-
-% make everything have section numbers
-\setcounter{secnumdepth}{4}
-
-% Make links between references
 \usepackage{hyperref}
-\newif\ifpdf
-\ifx\pdfoutput\undefined
-  \pdffalse
-\else
-  \pdfoutput=1
-  \pdftrue
-\fi
-\ifpdf
-  \hypersetup{colorlinks=true, hyperindex=true, citecolor=red, urlcolor=blue}
-\fi
 
 \begin{document}
 
-\begin{htmlonly}
 \title{Biopython Tutorial and Cookbook}
-\end{htmlonly}
-\begin{latexonly}
-\title{
-%Hack to get the logo on the PDF front page:
-\includegraphics[width=\textwidth]{images/biopython_logo.pdf}\\
-%Hack to get some white space using a blank line:
-~\\
-Biopython Tutorial and Cookbook}
-\end{latexonly}
 
 \author{Jeff Chang, Brad Chapman, Iddo Friedberg, Thomas Hamelryck, \\
 Michiel de Hoon, Peter Cock, Tiago Antao, Eric Talevich, Bartek Wilczy\'{n}ski}
 \date{Last Update -- 22 July 2019 (Biopython 1.75.dev0)}
 
-%Hack to get the logo at the start of the HTML front page:
-%(hopefully this isn't going to be too wide for most people)
-\begin{rawhtml}
-<P ALIGN="center">
-<IMG ALIGN="center" SRC="images/biopython_logo.svg" TITLE="Biopython Logo" ALT="[Biopython Logo]" width="450" height="300" />
-</p>
-\end{rawhtml}
-
 \maketitle
 \tableofcontents
 
@@ -118,6 +79,7 @@
 \include{Tutorial/chapter_graphics}
 \include{Tutorial/chapter_kegg}
 \include{Tutorial/chapter_phenotype}
+%\include{Tutorial/chapter_codonalign}
 \include{Tutorial/chapter_cookbook}
 \include{Tutorial/chapter_testing}
 \include{Tutorial/chapter_advanced}

diff --git a/Doc/doc.tex → Doc/Tutorial/chapter_codonalign.tex b/Doc/doc.tex → Doc/Tutorial/chapter_codonalign.tex
diff --git a/Doc/Tutorial/chapter_introduction.tex b/Doc/Tutorial/chapter_introduction.tex
@@ -118,21 +118,13 @@ \section{Frequently Asked Questions (FAQ)}
   licensed under your choice of the \emph{Biopython License Agreement} or
   the \emph{BSD 3-Clause License}.
 
-\begin{latexonly}
   \includegraphics[width=6cm]{images/biopython_logo.pdf}\\
-\end{latexonly}
-\begin{rawhtml}
-  <IMG ALIGN="center" SRC="images/biopython_logo.svg" TITLE="Biopython Logo (2017 onwards)" ALT="[New Biopython Logo]" width="300" height="200" />
-  <IMG ALIGN="center" SRC="images/biopython_logo_old.jpg" TITLE="Old Biopython Logo (2003-2017)" ALT="[Old Biopython Logo]" width="512" height="144" />
-\end{rawhtml}
 
   Prior to this, the Biopython logo was two yellow snakes forming a double
   helix around the word ``BIOPYTHON'', designed by Henrik Vestergaard and
   Thomas Hamelryck in 2003 as part of an open competition.
 
-\begin{latexonly}
   \includegraphics[width=7cm]{images/biopython_logo_old.jpg}\\
-\end{latexonly}
 
   \item \emph{Do you have a change-log listing what's new in each release?} \\
   See the file \verb|NEWS.rst| included with the source code (originally called

diff --git a/Doc/Tutorial/chapter_learning.tex b/Doc/Tutorial/chapter_learning.tex
@@ -7,6 +7,7 @@ \section{The Logistic Regression Model}
 \label{sec:LogisticRegression}
 
 \subsection{Background and Purpose}
+\label{sec:LogisticRegressionBackground}
 
 Logistic regression is a supervised learning approach that attempts to distinguish $K$ classes from each other using a weighted sum of some predictor variables $x_i$. The logistic regression model is used to calculate the weights $\beta_i$ of the predictor variables. In Biopython, the logistic regression model is currently implemented for two classes only ($K = 2$); the number of predictor variables has no predefined limit.
 
@@ -34,10 +35,10 @@ \subsection{Background and Purpose}
 
 In the logistic regression model, the probability of belonging to a class depends on the score via the logistic function. For the two classes OP and NOP, we can write this as
 \begin{eqnarray}
-\Pr(\mathrm{OP}|x_1, x_2) & = & \frac{\exp(\beta_0 + \beta_1 x_1 + \beta_2 x_2)}{1+\exp(\beta_0 + \beta_1 x_1 + \beta_2 x_2)} \label{eq:OP} \\
-\Pr(\mathrm{NOP}|x_1, x_2) & = & \frac{1}{1+\exp(\beta_0 + \beta_1 x_1 + \beta_2 x_2)} \label{eq:NOP}
+\Pr(\mathrm{OP}|x_1, x_2) & = & \frac{\exp(\beta_0 + \beta_1 x_1 + \beta_2 x_2)}{1+\exp(\beta_0 + \beta_1 x_1 + \beta_2 x_2)} \\
+\Pr(\mathrm{NOP}|x_1, x_2) & = & \frac{1}{1+\exp(\beta_0 + \beta_1 x_1 + \beta_2 x_2)}
 \end{eqnarray}
-Using a set of gene pairs for which it is known whether they belong to the same operon (class OP) or to different operons (class NOP), we can calculate the weights $\beta_0$, $\beta_1$, $\beta_2$ by maximizing the log-likelihood corresponding to the probability functions (\ref{eq:OP}) and (\ref{eq:NOP}).
+Using a set of gene pairs for which it is known whether they belong to the same operon (class OP) or to different operons (class NOP), we can calculate the weights $\beta_0$, $\beta_1$, $\beta_2$ by maximizing the log-likelihood corresponding to these probability functions.
 
 \subsection{Training the logistic regression model}
 \label{sec:LogisticRegressionTraining}
@@ -208,7 +209,8 @@ \subsection{Using the logistic regression model for classification}
 \end{minted}
 (which, by the way, agrees with the biological literature).
 
-To find out how confident we can be in these predictions, we can call the \verb+calculate+ function to obtain the probabilities (equations (\ref{eq:OP}) and \ref{eq:NOP}) for class OP and NOP. For \textit{yxcE}, \textit{yxcD} we find
+To find out how confident we can be in these predictions, we can call the \verb+calculate+ function to obtain the probabilities for class OP and NOP (using the equations introduced in Section~\ref{sec:LogisticRegressionBackground}).
+For \textit{yxcE}, \textit{yxcD} we find
 \begin{minted}{pycon}
 >>> q, p = LogisticRegression.calculate(model, [6, -173.143442352])
 >>> print("class OP: probability =", p, "class NOP: probability =", q)
@@ -270,9 +272,9 @@ \subsection{Using the logistic regression model for classification}
 
 \subsection{Logistic Regression, Linear Discriminant Analysis, and Support Vector Machines}
 
-The logistic regression model is similar to linear discriminant analysis. In linear discriminant analysis, the class probabilities also follow equations (\ref{eq:OP}) and (\ref{eq:NOP}). However, instead of estimating the coefficients $\beta$ directly, we first fit a normal distribution to the predictor variables $x$. The coefficients $\beta$ are then calculated from the means and covariances of the normal distribution. If the distribution of $x$ is indeed normal, then we expect linear discriminant analysis to perform better than the logistic regression model. The logistic regression model, on the other hand, is more robust to deviations from normality.
+The logistic regression model is similar to linear discriminant analysis. In linear discriminant analysis, the class probabilities also follow the OP and NOP equations introduced in Section~\ref{sec:LogisticRegressionBackground}. However, instead of estimating the coefficients $\beta$ directly, we first fit a normal distribution to the predictor variables $x$. The coefficients $\beta$ are then calculated from the means and covariances of the normal distribution. If the distribution of $x$ is indeed normal, then we expect linear discriminant analysis to perform better than the logistic regression model. The logistic regression model, on the other hand, is more robust to deviations from normality.
 
-Another similar approach is a support vector machine with a linear kernel. Such an SVM also uses a linear combination of the predictors, but estimates the coefficients $\beta$ from the predictor variables $x$ near the boundary region between the classes. If the logistic regression model (equations (\ref{eq:OP}) and (\ref{eq:NOP})) is a good description for $x$ away from the boundary region, we expect the logistic regression model to perform better than an SVM with a linear kernel, as it relies on more data. If not, an SVM with a linear kernel may perform better.
+Another similar approach is a support vector machine with a linear kernel. Such an SVM also uses a linear combination of the predictors, but estimates the coefficients $\beta$ from the predictor variables $x$ near the boundary region between the classes. If the logistic regression model (OP and NOP equations in Section~\ref{sec:LogisticRegressionBackground})) is a good description for $x$ away from the boundary region, we expect the logistic regression model to perform better than an SVM with a linear kernel, as it relies on more data. If not, an SVM with a linear kernel may perform better.
 
 Trevor Hastie, Robert Tibshirani, and Jerome Friedman: \textit{The Elements of Statistical Learning. Data Mining, Inference, and Prediction}. Springer Series in Statistics, 2001. Chapter 4.4.
 

diff --git a/Doc/api/Makefile b/Doc/api/Makefile