From f16d238053f90ed540577711e1468f9359bec137 Mon Sep 17 00:00:00 2001 From: capyBearista Date: Sat, 25 Oct 2025 02:14:51 -0400 Subject: [PATCH 1/3] Fix typos and grammatical errors (repeated wording) across several files. - fix grammatical error in README.md: "The eight courses are in the curriculum are:" -> "The eight courses in the curriculum are:" - fix misspelling in ai_foundations/visualizations/plots.py: "correclty" -> "correctly" - remove word repetition "and and" in course_2/gdm_lab_2_1_preprocess_data.ipynb - remove word repetition "to to" in course_2/gdm_lab_2_2_tokenize_texts_into_characters_and_words.ipynb - fix misspelling "occurence" -> "occurrence" in course_3/gdm_lab_3_1_distinguish_between_signal_and_noise.ipynb - remove word repetition "at at" in course_3/gdm_lab_3_6_mitigate_overfitting.ipynb - fix "fo" -> "of" in course_3/gdm_lab_3_6_mitigate_overfitting.ipynb - remove word repetition "a a" in course_4/gdm_lab_4_5_reflection_on_trainable_parameters.ipynb --- README.md | 2 +- ai_foundations/visualizations/plots.py | 2 +- course_2/gdm_lab_2_1_preprocess_data.ipynb | 2 +- ...gdm_lab_2_2_tokenize_texts_into_characters_and_words.ipynb | 2 +- .../gdm_lab_3_1_distinguish_between_signal_and_noise.ipynb | 2 +- course_3/gdm_lab_3_6_mitigate_overfitting.ipynb | 4 ++-- course_4/gdm_lab_4_5_reflection_on_trainable_parameters.ipynb | 2 +- 7 files changed, 8 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index b2855b4..6ec711f 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ initially through a private beta program. ## The AI Research Foundations Courses -The eight courses are in the curriculum are: +The eight courses in the curriculum are: 1. Build Your Own Small Language Model 2. Represent Your Language Data diff --git a/ai_foundations/visualizations/plots.py b/ai_foundations/visualizations/plots.py index b7961bd..def40ed 100644 --- a/ai_foundations/visualizations/plots.py +++ b/ai_foundations/visualizations/plots.py @@ -290,7 +290,7 @@ def plot_data_and_decision_boundary( if sum(classification_errors) == 0: print( - "\n\n✅ Well done! Your decision boundary correclty separates" + "\n\n✅ Well done! Your decision boundary correctly separates" " all data points." ) else: diff --git a/course_2/gdm_lab_2_1_preprocess_data.ipynb b/course_2/gdm_lab_2_1_preprocess_data.ipynb index c128ab3..cae4043 100644 --- a/course_2/gdm_lab_2_1_preprocess_data.ipynb +++ b/course_2/gdm_lab_2_1_preprocess_data.ipynb @@ -350,7 +350,7 @@ "id": "Qr23mRhp5fit" }, "source": [ - "Test your function. Make sure that `<` is replaced with `<`, and `>` is replaced with `>` and and `&` is replaced with `&`:" + "Test your function. Make sure that `<` is replaced with `<`, and `>` is replaced with `>` and `&` is replaced with `&`:" ] }, { diff --git a/course_2/gdm_lab_2_2_tokenize_texts_into_characters_and_words.ipynb b/course_2/gdm_lab_2_2_tokenize_texts_into_characters_and_words.ipynb index d0ecc7e..60cdf4e 100644 --- a/course_2/gdm_lab_2_2_tokenize_texts_into_characters_and_words.ipynb +++ b/course_2/gdm_lab_2_2_tokenize_texts_into_characters_and_words.ipynb @@ -429,7 +429,7 @@ "id": "2zqyYr9pSpNx" }, "source": [ - "As a first step, take a look again at the first paragraph in the Africa Galore dataset to to remind yourself what the data looks like.\n" + "As a first step, take a look again at the first paragraph in the Africa Galore dataset to remind yourself what the data looks like.\n" ] }, { diff --git a/course_3/gdm_lab_3_1_distinguish_between_signal_and_noise.ipynb b/course_3/gdm_lab_3_1_distinguish_between_signal_and_noise.ipynb index d6f0bc5..0d949d3 100644 --- a/course_3/gdm_lab_3_1_distinguish_between_signal_and_noise.ipynb +++ b/course_3/gdm_lab_3_1_distinguish_between_signal_and_noise.ipynb @@ -76,7 +76,7 @@ "source": [ "### Tasks\n", "\n", - "You will work with three small language models that have all been trained on a noisy version of the Africa Galore dataset. In this dataset, one of the paragraphs includes a spelling mistake. The phrase \"a vibrant symbol of\" is misspelled as \"a vibrant symbol fo\". Furthermore, this is the only occurence of the phrase \"a vibrant symbol\". All other paragraphs that include the word symbol do not include the adjective \"vibrant\".\n", + "You will work with three small language models that have all been trained on a noisy version of the Africa Galore dataset. In this dataset, one of the paragraphs includes a spelling mistake. The phrase \"a vibrant symbol of\" is misspelled as \"a vibrant symbol fo\". Furthermore, this is the only occurrence of the phrase \"a vibrant symbol\". All other paragraphs that include the word symbol do not include the adjective \"vibrant\".\n", "\n", "**In this lab, you will**:\n", "* Compare the continuations to different prompts for models that have been trained for 10, 400, and 1,000 epochs.\n", diff --git a/course_3/gdm_lab_3_6_mitigate_overfitting.ipynb b/course_3/gdm_lab_3_6_mitigate_overfitting.ipynb index a7f738d..8bc21cf 100644 --- a/course_3/gdm_lab_3_6_mitigate_overfitting.ipynb +++ b/course_3/gdm_lab_3_6_mitigate_overfitting.ipynb @@ -257,7 +257,7 @@ "source": [ "## Tune hyperparameters\n", "\n", - "In the following cells, you will change one hyperparameter at at time. For each set of hyperparameters, you will train and evaluate a model. You will also inspect the loss curves and accuracy curves for each training run.\n", + "In the following cells, you will change one hyperparameter at a time. For each set of hyperparameters, you will train and evaluate a model. You will also inspect the loss curves and accuracy curves for each training run.\n", "\n", "Run the following cell to define a function that performs the training and visualizations for a given set of hyperparameters. In this function, you will see all components required for training a model, such as the loss function and the optimizer. For now, ignore these details. You will learn more about each of these components in later articles and labs." ] @@ -756,7 +756,7 @@ "\n", "You probably noticed that for both the dropout rate and weight decay strength, the model's performance initially improved compared to the baseline when you set them to a smaller value. However, performance worsened significantly when you set either of these values too high. This is a very common pattern and usually you have to try several values until you find the one that works best for your model and dataset.\n", "\n", - "In this lab, you also modified one hyperparameter at a time. In practice, you often want to combine overfitting methods, for example dropout and early stopping. If you want to experiment further, add more cells to this lab and try out additional combinations fo hyperparameters." + "In this lab, you also modified one hyperparameter at a time. In practice, you often want to combine overfitting methods, for example dropout and early stopping. If you want to experiment further, add more cells to this lab and try out additional combinations of hyperparameters." ] }, { diff --git a/course_4/gdm_lab_4_5_reflection_on_trainable_parameters.ipynb b/course_4/gdm_lab_4_5_reflection_on_trainable_parameters.ipynb index d562076..899c63b 100644 --- a/course_4/gdm_lab_4_5_reflection_on_trainable_parameters.ipynb +++ b/course_4/gdm_lab_4_5_reflection_on_trainable_parameters.ipynb @@ -1283,7 +1283,7 @@ "source": [ "## Optional: Training the model\n", "\n", - "As a last optional exercise, if you would like to see this model in action, you can run the following hidden cell to load the Africa Galore dataset, tokenize and pad the data, and train the model. This will take about one minute to run on a Colab instance with a a GPU or 10 minutes on a Colab instance with a CPU.\n", + "As a last optional exercise, if you would like to see this model in action, you can run the following hidden cell to load the Africa Galore dataset, tokenize and pad the data, and train the model. This will take about one minute to run on a Colab instance with a GPU or 10 minutes on a Colab instance with a CPU.\n", "\n", "You can then sample continuations to a prompt from the model in the cell after the training loop.\n", "\n" From e18f7759a8ac99a1d5353cd917ebdd68cf13c189 Mon Sep 17 00:00:00 2001 From: Imad Saddik <79410781+ImadSaddik@users.noreply.github.com> Date: Thu, 30 Oct 2025 22:05:13 +0100 Subject: [PATCH 2/3] Fixed a typo in the notebook --- ..._compare_n_gram_models_and_transformer_language_models.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/course_1/gdm_lab_1_3_compare_n_gram_models_and_transformer_language_models.ipynb b/course_1/gdm_lab_1_3_compare_n_gram_models_and_transformer_language_models.ipynb index a7038f8..7b41a06 100644 --- a/course_1/gdm_lab_1_3_compare_n_gram_models_and_transformer_language_models.ipynb +++ b/course_1/gdm_lab_1_3_compare_n_gram_models_and_transformer_language_models.ipynb @@ -626,7 +626,7 @@ "\n", "You have now directly compared the generations of a trigram model and a transformer model and have observed many differences. These comparisons highlighted contrasts in terms of fluency, coherence and relevance between the two models. While the n-gram model often generated word salads or failed to generate a continuation at all, the transformer model generally generated quite reasonable responses (though sometimes they may have not been entirely perfect either).\n", "\n", - "Note that this comparison was stacked against the n-gram model. That is because the difference between the trigram model and the Gemma-1B model, which were both trained the Africa Galore dataset, is not only one of implementation. The Gemma-1B model has also been trained on a very large dataset. In comparison, the trigram model has only been trained on the paragraphs in the Africa Galore dataset. That being said, even if you had trained the n-gram model on as much data as the Gemma-1B model, the transformer model would have still performed much better.\n", + "Note that this comparison was stacked against the n-gram model. That is because the difference between the trigram model and the Gemma-1B model, which were both trained on the Africa Galore dataset, is not only one of implementation. The Gemma-1B model has also been trained on a very large dataset. In comparison, the trigram model has only been trained on the paragraphs in the Africa Galore dataset. That being said, even if you had trained the n-gram model on as much data as the Gemma-1B model, the transformer model would have still performed much better.\n", "\n", "There are two primary reasons for this:\n", "- Transformers have much larger context windows and can therefore consider the information of tokens that are further away from the token to be generated. N-gram models, on the other hand, only have a context window of $n-1$. So in the case of the trigram model, the model only considered the last two tokens for making predictions.\n", From 25b775c246b862c0aaad3a5490fc42bc9518d535 Mon Sep 17 00:00:00 2001 From: boiled-darvari <165815977+boiled-darvari@users.noreply.github.com> Date: Mon, 10 Nov 2025 13:58:14 -0500 Subject: [PATCH 3/3] Add retry logic for loading Gemma-1B model Transient Gemma-1B Loading Error in Colab GPU Runtime --- ...odels_and_transformer_language_models.ipynb | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/course_1/gdm_lab_1_3_compare_n_gram_models_and_transformer_language_models.ipynb b/course_1/gdm_lab_1_3_compare_n_gram_models_and_transformer_language_models.ipynb index 7b41a06..dd212eb 100644 --- a/course_1/gdm_lab_1_3_compare_n_gram_models_and_transformer_language_models.ipynb +++ b/course_1/gdm_lab_1_3_compare_n_gram_models_and_transformer_language_models.ipynb @@ -237,8 +237,22 @@ "print(\"Loaded trigram model.\\n\")\n", "\n", "print(\"Loading Gemma-1B model...\")\n", - "gemma_model = generation.load_gemma()\n", - "print(\"Loaded Gemma-1B model.\")" + "import time\n", + "\n", + "# Retry loading the Gemma model with exception handling\n", + "max_load_retries = 3\n", + "for i in range(max_load_retries):\n", + " try:\n", + " gemma_model = generation.load_gemma()\n", + " print(\"Loaded Gemma-1B model.\")\n", + " break # Exit the loop if loading is successful\n", + " except Exception as e:\n", + " print(f\"Attempt {i+1}/{max_load_retries}: Error loading Gemma model: {e}\")\n", + " if i < max_load_retries - 1:\n", + " print(\"Retrying in 10 seconds...\")\n", + " time.sleep(10)\n", + " else:\n", + " print(\"Failed to load Gemma model after multiple retries.\")\n", ] }, {