diff --git a/AdvancedModel.py b/AdvancedModel.py
index e530173..75e3f71 100644
--- a/AdvancedModel.py
+++ b/AdvancedModel.py
@@ -6,16 +6,16 @@
 from torch.utils.data.dataloader import DataLoader
 from torchtext.vocab import Vocab
 from torch.utils.data.dataset import Dataset, TensorDataset
-from collections import Counter
+from collections import Counter, OrderedDict
 from chu_liu_edmonds import decode_mst
 import matplotlib.pyplot as plt
 from datetime import datetime
 from tqdm import tqdm
 from timeit import default_timer as timer
 import csv
+from BasicModel import run_basic_model
 
 torch.manual_seed(0)
-
 UNKNOWN_TOKEN = "<unk>"
 PAD_TOKEN = "<pad>"
 ROOT_TOKEN = "<root>"
@@ -55,23 +55,30 @@ def get_vocabs(list_of_paths):
          A POS and words indexes dictionaries.
     """
 
-    words_dict = {PAD_TOKEN, ROOT_TOKEN, UNKNOWN_TOKEN}
-    pos_dict = {PAD_TOKEN, ROOT_TOKEN, UNKNOWN_TOKEN}
+    words_dict = OrderedDict([(PAD_TOKEN, 1), (ROOT_TOKEN, 1), (UNKNOWN_TOKEN, 1)])
+    pos_dict = OrderedDict([(PAD_TOKEN, 1), (ROOT_TOKEN, 1), (UNKNOWN_TOKEN, 1)])
+
     for file_path in list_of_paths:
         with open(file_path) as f:
             for line in f:
                 split_line = line.split('\t')
                 if len(split_line) == 1:  # the end of a sentence denotes by \n line.
                     continue
-                word, pos_tag, head = split_line[1], split_line[3], int(split_line[6])
-                words_dict.add(word)
-                pos_dict.add(pos_tag)
+                word, pos_tag = split_line[1], split_line[3]
+                if word in words_dict:
+                    words_dict[word] = words_dict[word] + 1
+                else:
+                    words_dict[word] = 1
+                if pos_tag in pos_dict:
+                    pos_dict[pos_tag] = pos_dict[pos_tag] + 1
+                else:
+                    pos_dict[pos_tag] = 1
 
     return words_dict, pos_dict
 
 
 class DataReader:
-    """ Read the data from the requested file and hold it's components. """
+    """ Reads the data from the requested file and hold it's components. """
 
     def __init__(self, word_dict, pos_dict, file_path, competition=False):
         """
@@ -288,7 +295,8 @@ def __init__(self, word_emb_dim, pos_emb_dim, hidden_dim, word_vocab_size, tag_v
         self.relu = nn.ReLU()
 
     def word_tag_dropout(self, words, postags, p_drop):
-        # can't work with batches
+        """ Word\tag dropout based on DEEP BIAFFINE ATTENTION FOR NEURAL DEPENDENCY PARSING
+              - Christopher D. Manning, Timothy Dozat """
         p_matrix_word = torch.rand(size=words.shape, device=words.device)
         p_matrix_pos = torch.rand(size=words.shape, device=words.device)
         w_dropout_mask = (p_matrix_word > p_drop).long()
@@ -361,7 +369,7 @@ def get_acc(edge_scores, headers_idx_tensors, batch_size, max_length, sentence_l
     return acc
 
 
-def evaluate(model, words_dict, pos_dict, batch_size):
+def evaluate(model, words_dict, pos_dict, batch_size, path_test):
     """
     Evaluate our model on a validation set.
     Args:
@@ -375,13 +383,13 @@ def evaluate(model, words_dict, pos_dict, batch_size):
     print("Evaluating Started")
 
     model.eval()
-    path_test = "Data/test.labeled"
-    test = DependencyDataset(words_dict, pos_dict, path_test, padding=True)
-    test_data_loader = DataLoader(test, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)
-    num_of_sentences = len(test)
-    acc = 0
-    num_of_words = 0
     with torch.no_grad():
+        test = DependencyDataset(words_dict, pos_dict, path_test, padding=True)
+        test_data_loader = DataLoader(test, batch_size=batch_size, shuffle=False, num_workers=0)
+        num_of_sentences = len(test)
+        acc = 0
+        num_of_words = 0
+
         for batch_idx, input_data in enumerate(test_data_loader):
             words_idx_tensor, pos_idx_tensor, headers_idx_tensor, sentence_length = input_data
             headers_idx_tensors = [headers[:sentence_length[i]] for i, headers in enumerate(headers_idx_tensor)]
@@ -392,9 +400,9 @@ def evaluate(model, words_dict, pos_dict, batch_size):
             acc += get_acc(batched_scores, headers_idx_tensors, batch_size, max_length, sentence_length)
             num_of_words += (max_length - 1)
 
-    acc = acc/num_of_words
-    print("Evaluating Ended")
-    return acc, _loss
+        acc = acc/num_of_words
+        print("Evaluating Ended")
+        return acc, _loss
 
 
 def print_plots(train_acc_list, train_loss_list, test_acc_list, test_loss_list, _time=''):
@@ -545,30 +553,40 @@ def train(self):
             train_acc_list.append(float(acc))
             train_loss_list.append(float(printable_loss))
             # Runs a validation phase.
-            test_acc, test_loss = evaluate(encoder, words_dict, pos_dict, self.batch_size)
+            test_acc, test_loss = evaluate(encoder, words_dict, pos_dict, self.batch_size, self.path_test)
             test_acc_list.append(test_acc)
             test_loss_list.append(test_loss)
+            time_id = datetime.now().strftime("%m_%d_%Y_%H_%M_%S")
+            with open(r"{}_advanced_model_full_{}.pkl".format(epoch, time_id), "wb") as output_file:
+                torch.save(encoder.state_dict(), output_file)
             print("Epoch {} Completed,\tLoss {}\tAccuracy: {}\t Test Accuracy: {}".format(epoch + 1, train_loss_list[-1],
                                                                                           train_acc_list[-1], test_acc))
 
+            # Saves our model results.
+            with open('parser_results_info.csv', 'a') as f:
+                writer = csv.writer(f)
+                writer.writerow([epoch + 1, train_loss_list[-1], train_acc_list[-1], test_acc])
+
         # Saves our learned model and plot some graphs.
         time_id = datetime.now().strftime("%m_%d_%Y_%H_%M_%S")
         print_plots(train_acc_list, train_loss_list, test_acc_list, test_loss_list, time_id)
         end_time = timer()
-        torch.save(encoder, 'encoder{}.pth'.format(time_id))
+        with open(r"advanced_model_full_{}.pkl".format(time_id), "wb") as output_file:
+            torch.save(encoder.state_dict(), output_file)
         print("the training took: {} sec ".format(round(end_time - start_time, 2)))
         return test_acc_list, time_id
 
 
-if __name__ == '__main__':
+def get_hyper_parameters():
+    """Returns the hyper parameters of the model."""
+    path_train = "Data/combined.labeled"
+    path_test = "Data/val.labeled"
+    return (100, 100, 100, 500, 1, 30, 0.002, path_train, path_test, 0.3, 0.3, 0.3)
 
-    path_train = "Data/train.labeled"
-    path_test = "Data/test.labeled"
 
-    hyper_parameters_list = [(100, 100, 100, 500, 1, 30, 0.002, path_train, path_test, 0.3, 0.3, 0.3),
-                             (80, 100, 100, 400, 1, 30, 0.002, path_train, path_test, 0.5, 0.3, 0.1),
-                             (80, 100, 100, 400, 1, 40, 0.005, path_train, path_test, 0.3, 0.3, 0.3),
-                             (60, 100, 200, 400, 1, 20, 0.002, path_train, path_test, 0.3, 0.3, 0.3)]
+if __name__ == '__main__':
+
+    hyper_parameters_list = [get_hyper_parameters()]
 
     for hyper_parameters in hyper_parameters_list:
         EPOCHS, WORD_EMBEDDING_DIM, POS_EMBEDDING_DIM, HIDDEN_DIM, BATCH_SIZE, BATCH_ACCUMULATE, LEARNING_RATE, path_train, path_test, WORD_TAG_DROPOUT, EMBEDDING_DROPOUT, LSTM_DROPOUT = hyper_parameters
@@ -581,8 +599,12 @@ def train(self):
         epoch_max = np.argmax(test_acc_list)
 
         # Saves our model hyper parameters settings.
-        with open('parser_results_info.csv', 'a') as f:
+        with open('parser_settings.csv', 'a') as f:
             writer = csv.writer(f)
             writer.writerow([time_id, max_test_acc, epoch_max, EPOCHS, WORD_EMBEDDING_DIM, POS_EMBEDDING_DIM, HIDDEN_DIM,
                              BATCH_SIZE, BATCH_ACCUMULATE, LEARNING_RATE, WORD_TAG_DROPOUT, EMBEDDING_DROPOUT, LSTM_DROPOUT])
 
+    print("Finished training the model, based on the following hyper parameters mix: {}".format(hyper_parameters))
+
+    # print("Runs the basic model training:")
+    # run_basic_model()
diff --git a/BasicModel.py b/BasicModel.py
index a033573..bc91bfe 100644
--- a/BasicModel.py
+++ b/BasicModel.py
@@ -4,22 +4,24 @@
 import torch.nn.functional as F
 import torch.optim as optim
 from torch.utils.data.dataloader import DataLoader
-from collections import defaultdict
 from torchtext.vocab import Vocab
 from torch.utils.data.dataset import Dataset, TensorDataset
-from collections import Counter
+from collections import Counter, OrderedDict
 from chu_liu_edmonds import decode_mst
 import matplotlib.pyplot as plt
-import time
+from datetime import datetime
 from tqdm import tqdm
+from timeit import default_timer as timer
+import csv
+
+
+torch.manual_seed(0)
 
 UNKNOWN_TOKEN = "<unk>"
 PAD_TOKEN = "<pad>"
 ROOT_TOKEN = "<root>"
 SPECIAL_TOKENS = [ROOT_TOKEN, PAD_TOKEN, UNKNOWN_TOKEN]
 
-torch.manual_seed(1)
-
 
 def OpTyNLLLOSS(true_headers, score_matrix, max_len):
     """
@@ -45,6 +47,7 @@ def OpTyNLLLOSS(true_headers, score_matrix, max_len):
     return -1*_loss
 
 
+
 def get_vocabs(list_of_paths):
     """
     Creates a POS-tags and words vocabulary dictionaries
@@ -54,30 +57,38 @@ def get_vocabs(list_of_paths):
          A POS and words indexes dictionaries.
     """
 
-    words_dict = {PAD_TOKEN, ROOT_TOKEN, UNKNOWN_TOKEN}
-    pos_dict = {PAD_TOKEN, ROOT_TOKEN, UNKNOWN_TOKEN}
+    words_dict = OrderedDict([(PAD_TOKEN, 1), (ROOT_TOKEN, 1), (UNKNOWN_TOKEN, 1)])
+    pos_dict = OrderedDict([(PAD_TOKEN, 1), (ROOT_TOKEN, 1), (UNKNOWN_TOKEN, 1)])
+
     for file_path in list_of_paths:
         with open(file_path) as f:
             for line in f:
                 split_line = line.split('\t')
                 if len(split_line) == 1:  # the end of a sentence denotes by \n line.
                     continue
-                word, pos_tag, head = split_line[1], split_line[3], int(split_line[6])
-                words_dict.add(word)
-                pos_dict.add(pos_tag)
+                word, pos_tag = split_line[1], split_line[3]
+                if word in words_dict:
+                    words_dict[word] = words_dict[word] + 1
+                else:
+                    words_dict[word] = 1
+                if pos_tag in pos_dict:
+                    pos_dict[pos_tag] = pos_dict[pos_tag] + 1
+                else:
+                    pos_dict[pos_tag] = 1
 
     return words_dict, pos_dict
 
 
 class DataReader:
-    """ Read the data from the requested file and hold it's components. """
+    """ Reads the data from the requested file and hold it's components. """
 
-    def __init__(self, word_dict, pos_dict, file_path):
+    def __init__(self, word_dict, pos_dict, file_path, competition=False):
         """
         Args:
             file_path (str): holds the path to the requested file.
             words_dict, tags_dict: a dictionary - keys:words\tags, items: counts of appearances.
         """
+        self.competition = competition
         self.file_path = file_path
         self.words_dict = word_dict
         self.pos_dict = pos_dict
@@ -99,13 +110,14 @@ def __readData__(self):
                     cur_sentence_pos = [ROOT_TOKEN]
                     cur_sentence_headers = [-1]
                     continue
-                word, pos_tag, head = split_line[1], split_line[3], int(split_line[6])
+                if not self.competition:
+                    word, pos_tag, head = split_line[1], split_line[3], int(split_line[6])
+                else:
+                    word, pos_tag, head = split_line[1], split_line[3], -2
                 cur_sentence_word.append(word)
                 cur_sentence_pos.append(pos_tag)
                 cur_sentence_headers.append(head)
 
-
-
     def get_num_sentences(self):
         """returns num of sentences in data."""
         return len(self.sentences)
@@ -116,18 +128,23 @@ class DependencyDataset(Dataset):
     Holds version of our data as a PyTorch's Dataset object.
     """
 
-    def __init__(self, word_dict, pos_dict, file_path, padding=False, word_embeddings=None):
+    def __init__(self, word_dict, pos_dict, file_path, padding=False, word_embeddings=None, competition=False):
+
         """
         Args:
-            file_path (str): The path of the requested file.
-            padding (bool): Gets true if padding is required.
-            word_embeddings: A set of words mapping.
+            word_dict:
+            pos_dict:
+            file_path: The path of the requested file.
+            padding: Gets true if padding is required.
+            word_embeddings (str):  A pretrained embedding path.
+            competition (bool):  Gets True if it works on a file without gold headers.
         """
 
         super().__init__()
         self.file_path = file_path
-        self.data_reader = DataReader(word_dict, pos_dict, self.file_path)
+        self.data_reader = DataReader(word_dict, pos_dict, self.file_path, competition)
         self.vocab_size = len(self.data_reader.words_dict)
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
         if word_embeddings:
             self.words_idx_mappings, self.idx_words_mappings, self.words_vectors = word_embeddings
@@ -180,11 +197,11 @@ def convert_sentences_to_dataset(self, padding):
             for word, pos_tag, header in zip(sentence[0], sentence[1], sentence[2]):
 
                 headers_idx_list.append(header)
-                if word in words_dict:
+                if word in self.data_reader.words_dict:
                     words_idx_list.append(self.words_idx_mappings.get(word))
                 else:
                     words_idx_list.append(self.unknown_idx)
-                if pos_tag in pos_dict:
+                if pos_tag in self.data_reader.pos_dict:
                     pos_idx_list.append(self.pos_idx_mappings.get(pos_tag))
                 else:
                     pos_idx_list.append(self.unknown_idx)
@@ -198,15 +215,15 @@ def convert_sentences_to_dataset(self, padding):
                 sentence_pos_idx_list.append(pos_idx_list)
                 sentence_headers_idx_list.append(headers_idx_list)
             else:
-                sentence_words_idx_list.append(torch.tensor(words_idx_list, dtype=torch.long, requires_grad=False))
-                sentence_pos_idx_list.append(torch.tensor(pos_idx_list, dtype=torch.long, requires_grad=False))
-                sentence_headers_idx_list.append(torch.tensor(headers_idx_list, dtype=torch.long, requires_grad=False))
+                sentence_words_idx_list.append(torch.tensor(words_idx_list, dtype=torch.long, requires_grad=False).to(self.device))
+                sentence_pos_idx_list.append(torch.tensor(pos_idx_list, dtype=torch.long, requires_grad=False).to(self.device))
+                sentence_headers_idx_list.append(torch.tensor(headers_idx_list, dtype=torch.long, requires_grad=False).to(self.device))
 
         if padding:
-            all_sentence_words_idx = torch.tensor(sentence_words_idx_list, dtype=torch.long, requires_grad=False)
-            all_sentence_tags_idx = torch.tensor(sentence_pos_idx_list, dtype=torch.long, requires_grad=False)
-            all_sentence_labels_idx = torch.tensor(sentence_headers_idx_list, dtype=torch.long, requires_grad=False)
-            all_sentence_len = torch.tensor(sentence_len_list, dtype=torch.long, requires_grad=False)
+            all_sentence_words_idx = torch.tensor(sentence_words_idx_list, dtype=torch.long, requires_grad=False).to(self.device, non_blocking=True)
+            all_sentence_tags_idx = torch.tensor(sentence_pos_idx_list, dtype=torch.long, requires_grad=False).to(self.device, non_blocking=True)
+            all_sentence_labels_idx = torch.tensor(sentence_headers_idx_list, dtype=torch.long, requires_grad=False).to(self.device, non_blocking=True)
+            all_sentence_len = torch.tensor(sentence_len_list, dtype=torch.long, requires_grad=False).to(self.device, non_blocking=True)
             return TensorDataset(all_sentence_words_idx, all_sentence_tags_idx, all_sentence_labels_idx,
                                  all_sentence_len)
         else:
@@ -216,52 +233,54 @@ def convert_sentences_to_dataset(self, padding):
                                                                          sentence_len_list))}
 
 
-class DependencyParser(nn.Module):
-    def __init__(self, word_emb_dim, pos_emb_dim, hidden_dim, word_vocab_size, tag_vocab_size):
-        super(DependencyParser, self).__init__()
-        torch.manual_seed(1)
-
-        self.emb_dim = word_emb_dim + pos_emb_dim
-        torch.manual_seed(1)
+class LSTMEncoder(nn.Module):
+    """
+    Our model encoder, based on LSTM and Contrast.
+    """
+    def __init__(self, batch_size, words_dict, word_to_idx, idx_to_word,  word_emb_dim, pos_emb_dim, hidden_dim, word_vocab_size, tag_vocab_size):
+        """
+        Args:
+            word_emb_dim: The dimension of the word embedding.
+            pos_emb_dim: The dimension of the POS tag embedding.
+            hidden_dim: The dimension of the LSTM's hidden size
+            word_vocab_size: The number of words in our vocabulary.
+            tag_vocab_size: The number of tags in our vocabulary
+        """
+        super(LSTMEncoder, self).__init__()
 
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        torch.manual_seed(1)
-
-        self.word_embedding = nn.Embedding(word_vocab_size, word_emb_dim)
+        self.idx_to_word = idx_to_word
+        self.word_to_idx = word_to_idx
+        self.batch_size = batch_size
 
-        torch.manual_seed(1)
+        self.words_dict = words_dict
+        self.hidden_dim = hidden_dim
+        self.emb_dim = word_emb_dim + pos_emb_dim
 
+        self.word_embedding = nn.Embedding(word_vocab_size, word_emb_dim)
         self.tag_embedding = nn.Embedding(tag_vocab_size, pos_emb_dim)
-        torch.manual_seed(1)
         self.encoder = nn.LSTM(input_size=self.emb_dim, hidden_size=hidden_dim, num_layers=2, bidirectional=True,
                                batch_first=True)
-        torch.manual_seed(1)
+
         self.mlp = nn.Sequential(
-            nn.Linear(self.emb_dim * 4, 100),
+            nn.Linear(self.hidden_dim * 4, 100),
             nn.Tanh(),
             nn.Linear(100, 1)
         )
 
-        torch.manual_seed(1)
-        self.fc1 = nn.Linear(self.emb_dim * 4, 100)
-        torch.manual_seed(1)
-        self.tanh = nn.Tanh()
-        torch.manual_seed(1)
-        self.fc2 = nn.Linear(100, 1)
-
-    def forward(self, words_idx_tensor, pos_idx_tensor, max_length, lengths):
-        torch.manual_seed(1)
-        words_embedded = self.word_embedding(words_idx_tensor[:, :max_length].to(self.device))
+    def forward(self, words_idx_tensor, pos_idx_tensor, max_length, _evaluate=False):
 
-        torch.manual_seed(1)
-
-        tags_embedded = self.tag_embedding(pos_idx_tensor[:, :max_length].to(self.device))
-        torch.manual_seed(1)
+        # Goldberg and Kiperwasser dropout:
+        if not _evaluate:
+            mask = torch.rand((words_idx_tensor.shape[0], 250), dtype=torch.float).to(self.device)
+            drop_prob = torch.tensor([[0.25/(0.25+self.words_dict[self.idx_to_word[word_idx]])
+                                       for word_idx in words_idx_tensor[i]] for i in range(self.batch_size)]).to(self.device)
+            words_idx_tensor = words_idx_tensor.where(mask > drop_prob,
+                                                      torch.tensor(self.word_to_idx[UNKNOWN_TOKEN]).to(self.device))
 
+        words_embedded = self.word_embedding(words_idx_tensor[:, :max_length].to(self.device, non_blocking=True))
+        tags_embedded = self.tag_embedding(pos_idx_tensor[:, :max_length].to(self.device, non_blocking=True))
         embeds = torch.cat([words_embedded, tags_embedded], 2)
-
-        torch.manual_seed(1)
-
         lstm_out, _ = self.encoder(embeds)
 
         features = []
@@ -272,13 +291,9 @@ def forward(self, words_idx_tensor, pos_idx_tensor, max_length, lengths):
                  lstm_out[i].repeat(max_length, 1, 1)], -1).unsqueeze(1))
 
         features = torch.cat(features, 1)
-        torch.manual_seed(1)
-        # features = self.mlp(features)
-        edge_scores = self.fc1(features)
-        torch.manual_seed(1)
-        edge_scores = self.tanh(edge_scores)
-        torch.manual_seed(1)
-        edge_scores = self.fc2(edge_scores)
+
+        edge_scores = self.mlp(features)
+
         return edge_scores
 
 
@@ -286,12 +301,13 @@ def get_acc(edge_scores, headers_idx_tensors, batch_size, max_length, sentence_l
     """
     Uses Chu Liu Edmonds algorithm to infer a parse tree and calculates the current batch accuracy.
     Args:
-        edge_scores:
-        headers_idx_tensors:
-        batch_size:
-        max_length:
-        sentence_length:
+        edge_scores: Edge scores matrix, gained our of our chosen model.
+        headers_idx_tensors: The gold headers to compare to.
+        batch_size: The number of sentences in a batch.
+        max_length: The maximum length of a sentence in the batch.
+        sentence_length: List of all the sentences length.
     Returns:
+        The summed accuracy of the current batch.
     """
     acc = 0
     trees = []
@@ -302,132 +318,235 @@ def get_acc(edge_scores, headers_idx_tensors, batch_size, max_length, sentence_l
             has_labels=False)[0])
 
     for i in range(batch_size):
-        acc += torch.mean(torch.tensor(headers_idx_tensors[i].tolist() == trees[i], dtype=torch.float, requires_grad=False))
+        acc += torch.sum(torch.tensor(headers_idx_tensors[i][1:].tolist() == trees[i][1:], dtype=torch.float, requires_grad=False))
     return acc
 
-
-def evaluate(model, words_dict, pos_dict, batch_size):
+def evaluate(model, path_test, words_dict, pos_dict, batch_size):
     """
+    Evaluate our model on a validation set.
     Args:
-        words_dict:
-        pos_dict:
-        batch_size:
+        model: Our trained model.
+        words_dict: The word vocabulary the model trained with.
+        pos_dict: The POS tag vocabulary the model trained with.
+        batch_size: The number of sentences in a batch.
     Returns:
+        The given model's loss and accuracy gained on the validation set.
     """
     print("Evaluating Started")
-    path_test = "Data/test.labeled"
-    test = DependencyDataset(words_dict, pos_dict, path_test, padding=True)
-    test_data_loader = DataLoader(test, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)
 
+    model.eval()
+    test = DependencyDataset(words_dict, pos_dict, path_test, padding=True)
+    test_data_loader = DataLoader(test, batch_size=batch_size, shuffle=False, num_workers=0)
+    num_of_sentences = len(test)
     acc = 0
+    num_of_words = 0
     with torch.no_grad():
         for batch_idx, input_data in enumerate(test_data_loader):
             words_idx_tensor, pos_idx_tensor, headers_idx_tensor, sentence_length = input_data
             headers_idx_tensors = [headers[:sentence_length[i]] for i, headers in enumerate(headers_idx_tensor)]
             max_length = max(sentence_length)
-            batched_scores = model(words_idx_tensor, pos_idx_tensor, max_length, sentence_length)
+            batched_scores = model(words_idx_tensor, pos_idx_tensor, max_length, _evaluate=True)
 
+            _loss = OpTyNLLLOSS(headers_idx_tensors, batched_scores, max_length).requires_grad_(False).item()
             acc += get_acc(batched_scores, headers_idx_tensors, batch_size, max_length, sentence_length)
+            num_of_words += sentence_length.sum()-batch_size
 
-        acc = acc / len(test)
+    acc = acc/num_of_words
     print("Evaluating Ended")
-    return acc
+    return acc, _loss
 
 
-def print_plots(accuracy_list, loss_list):
+def print_plots(train_acc_list, train_loss_list, test_acc_list, test_loss_list, _time=''):
     """
     Prints two plot that describes our processes of learning through an NLLL loss function and the accuracy measure.
     Args:
-        accuracy_list:
-        loss_list:
+        train_acc_list: Contains the accuracy measure tracking through the training phase.
+        train_loss_list: Contains the loss measure tracking through the training phase.
+        test_acc_list: Contains the accuracy measure tracking through the evaluation phase.
+        test_loss_list: Contains the loss measure tracking through the evaluation phase.
+        _time: The time id to recognize the plot output.
     Returns:
+        Saves the plot in a jpeg file.
     """
-    plt.plot(accuracy_list, c="red", label="Accuracy")
-    plt.xlabel("Epochs")
-    plt.ylabel("Value")
-    plt.legend()
-    plt.show()
-
-    plt.plot(loss_list, c="blue", label="Loss")
-    plt.xlabel("Epochs")
-    plt.ylabel("Value")
-    plt.legend()
-    plt.show()
 
-if __name__ == '__main__':
+    # sns.set_style("whitegrid")
 
-    start_time = time.time()
+    fig, ax = plt.subplots(2, 1, figsize=(10, 10))
+    x_train = [a for a in range(len(train_loss_list))]
+    x_test = [a for a in range(len(test_loss_list))]
 
-    # hyper_parameters
-    EPOCHS = 200
-    WORD_EMBEDDING_DIM = 100
-    POS_EMBEDDING_DIM = 25
-    HIDDEN_DIM = 125
-    BATCH_SIZE = 10
-    LEARNING_RATE = 0.007
+    ax[0].plot(x_train, train_loss_list, label='Loss Train')
+    ax[0].plot(x_test, test_loss_list, label='Loss Test')
 
-    path_train = "Data/train.labeled"
-    path_test = "Data/test.labeled"
-    paths_list = [path_train]
+    ax[0].legend()
+    ax[0].set_title('Loss Convergence')
+    ax[0].set_xlabel('Num of Epochs')
+    ax[0].set_ylabel('Loss')
 
-    words_dict, pos_dict = get_vocabs(paths_list)  # Gets all known vocabularies.
+    ax[1].plot(x_train, train_acc_list, label='Train UAS')
+    ax[1].plot(x_test, test_acc_list, label='Test UAS')
+    ax[1].legend()
+    ax[1].set_title('UAS')
+    ax[1].set_xlabel('Num of Epochs')
+    ax[1].set_ylabel('UAS')
+    fig.savefig('plots_{}.png'.format(_time))
 
-    # Preparing the dataset
-    train = DependencyDataset(words_dict, pos_dict, path_train, padding=True)
-    train_data_loader = DataLoader(train, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
 
+class DependencyParser:
+    """
+    A dependency parser model object, includes all the hyper parameters mix and train phase of the selected model.
+    """
+    def __init__(self, epochs, word_embedding_dim, pos_embedding_dim, hidden_dim, batch_size, batch_accumulate,
+                 learning_rate, path_train, path_test, word_tag_dropout):
+        """
+        Args:
+            epochs: The number of epochs the model is trained on.
+            word_embedding_dim: The dimension of the word embedding.
+            pos_embedding_dim: The dimension of the POS tag embedding.
+            hidden_dim: The LSTM's hidden size dimension.
+            batch_size: The batch size - the number of sentences we get out of the data loader.
+            batch_accumulate: The accumulate batch size - The practical batch size, the number of  sentences  which we learn on parallel.
+            learning_rate: The learning rate of our optimizer.
+            path_train: The path to the train file.
+            path_test: The path to the test file.
+            word_tag_dropout: The probability to dropout a complete word\ag and replace it with it's matched word\tag.
+        """
+        self.epochs = epochs
+        self.word_embedding_dim = word_embedding_dim
+        self.pos_embedding_dim = pos_embedding_dim
+        self.hidden_dim = hidden_dim
+        self.batch_size = batch_size
+        self.batch_accumulate = batch_accumulate
+        self.learning_rate = learning_rate
+        self.path_train = path_train
+        self.path_test = path_test
+        self.word_tag_dropout = word_tag_dropout
+
+        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+
+    def train(self):
+        """ Runs the training phase of our model.
+            Returns an accuracy tracking on a validation set and a unique ID to identify this run.
+        """
 
-    word_vocab_size = len(words_dict)
-    pos_vocab_size = len(pos_dict)
+        start_time = timer()
+        torch.cuda.empty_cache()
 
-    OpTyParser = DependencyParser(WORD_EMBEDDING_DIM, POS_EMBEDDING_DIM, HIDDEN_DIM, word_vocab_size, pos_vocab_size)
-    use_cuda = torch.cuda.is_available()
-    device = torch.device("cuda:0" if use_cuda else "cpu")
+        paths_list = [self.path_train]
 
-    if use_cuda:
-        OpTyParser.cuda()
+        # Prepares the dataset.
+        words_dict, pos_dict = get_vocabs(paths_list)  # Gets all known vocabularies.
+        train = DependencyDataset(words_dict, pos_dict, self.path_train, padding=True)
+        train_data_loader = DataLoader(train, batch_size=self.batch_size, shuffle=True, num_workers=0)
+        word_vocab_size = len(words_dict)
+        pos_vocab_size = len(pos_dict)
+        word_to_idx, idx_to_word, _= train.get_words_embeddings()
 
-    optimizer = optim.Adam(OpTyParser.parameters(), lr=LEARNING_RATE)
+        # Initialize an instance of our encoder with the chosen hyper parameters.
+        encoder = LSTMEncoder(self.batch_size, words_dict, word_to_idx, idx_to_word, self.word_embedding_dim, self.pos_embedding_dim,
+                              self.hidden_dim, word_vocab_size, pos_vocab_size)
 
-    # Training start
-    print("Training Started")
-    accuracy_list = []
-    loss_list = []
-    for epoch in range(EPOCHS):
-        acc = 0  # to keep track of accuracy
-        printable_loss = 0  # To keep track of the loss value
-        i = 0
-        for input_data in tqdm(train_data_loader):
-            i += 1
+        if torch.cuda.is_available():
+            encoder.cuda()
 
-            words_idx_tensor, pos_idx_tensor, headers_idx_tensor, sentence_length = input_data
-            headers_idx_tensors = [headers[:sentence_length[i]] for i, headers in enumerate(headers_idx_tensor)]
-            max_length = max(sentence_length)
+        # Initialize the chosen optimizer.
+        optimizer = optim.Adam(encoder.parameters(), lr=self.learning_rate)
+
+        # Training start
+        print("Training Started")
+
+        # To keep track of the loss and accuracy values.
+        train_acc_list = []
+        train_loss_list = []
+        test_acc_list = []
+        test_loss_list = []
+
+        for epoch in range(self.epochs):
+
+            acc = 0
+            printable_loss = 0
+            num_of_words = 0
+
+            for input_data in tqdm(train_data_loader):
+
+                encoder.train()
+
+                words_idx_tensor, pos_idx_tensor, headers_idx_tensor, sentence_length = input_data
+
+                # In case we use batches (>1 sentences) we need to cut the padding out of the gold headers.
+                headers_idx_tensors = [headers[:sentence_length[i]] for i, headers in enumerate(headers_idx_tensor)]
+                max_length = max(sentence_length)
+
+                # Feeding our model with the current batch.
+                batched_weights = encoder(words_idx_tensor, pos_idx_tensor, max_length)
 
-            batched_weights = OpTyParser(words_idx_tensor, pos_idx_tensor, max_length, sentence_length)
-
-            loss = OpTyNLLLOSS(headers_idx_tensors, batched_weights, max_length)
-            loss.backward()
-            optimizer.step()
-            OpTyParser.zero_grad()
-
-            printable_loss += loss.item()
-
-            acc += get_acc(batched_weights, headers_idx_tensors, BATCH_SIZE, max_length, sentence_length)
-
-        printable_loss = printable_loss / len(train)
-        acc = acc/len(train)
-        accuracy_list.append(float(acc))
-        loss_list.append(float(printable_loss))
-        test_acc = evaluate(OpTyParser, words_dict, pos_dict, BATCH_SIZE)
-        e_interval = i
-        print("Epoch {} Completed,\tLoss {}\tAccuracy: {}\t Test Accuracy: {}".format(epoch + 1,
-                                                                                      np.mean(loss_list[-e_interval:]),
-                                                                                      np.mean(
-                                                                                          accuracy_list[-e_interval:]),
-                                                                                      test_acc))
-
-    print_plots(accuracy_list, loss_list)
-    end_time = time.time()
-    torch.save(OpTyParser.state_dict(), 'OpTyParser{}.pkl '.format(start_time))
-    print("the training took: ", end_time - start_time)
\ No newline at end of file
+                loss = OpTyNLLLOSS(headers_idx_tensors, batched_weights, max_length)
+                loss.backward()
+
+                optimizer.step()
+                encoder.zero_grad()
+
+                printable_loss += loss.item()
+
+                acc += get_acc(batched_weights, headers_idx_tensors, self.batch_size, max_length, sentence_length)
+                num_of_words += sentence_length.sum() - self.batch_size  # We don't count the root as we don't count it in the accuracy.
+
+            # Adds up the new tracking measures.
+            printable_loss = printable_loss / len(train)
+            acc = acc / num_of_words
+            train_acc_list.append(float(acc))
+            train_loss_list.append(float(printable_loss))
+            # Runs a validation phase.
+            test_acc, test_loss = evaluate(encoder, self.path_test, words_dict, pos_dict, self.batch_size)
+            test_acc_list.append(test_acc)
+            test_loss_list.append(test_loss)
+
+            time_id = datetime.now().strftime("%m_%d_%Y_%H_%M_%S")
+            with open(r"{}_basic_model_{}.pkl".format(epoch, time_id), "wb") as output_file:
+                torch.save(encoder.state_dict(), output_file)
+
+            print("Epoch {} Completed,\tLoss {}\tAccuracy: {}\t Test Accuracy: {}".format(epoch + 1, train_loss_list[-1],
+                                                                                          train_acc_list[-1], test_acc))
+
+        # Saves our learned model and plot some graphs.
+        time_id = datetime.now().strftime("%m_%d_%Y_%H_%M_%S")
+        print_plots(train_acc_list, train_loss_list, test_acc_list, test_loss_list, time_id)
+        end_time = timer()
+        with open(r"basic_model_{}.pkl".format(time_id), "wb") as output_file:
+            torch.save(encoder.state_dict(), output_file)
+        print("the training took: {} sec ".format(round(end_time - start_time, 2)))
+        return test_acc_list, time_id
+
+def get_hyper_parameters():
+    """Returns the hyper parameters of the model."""
+    path_train = "Data/combined.labeled"
+    path_test = "Data/val.labeled"
+    return (30, 100, 25, 125, 10, 1, 0.001, path_train, path_test, 0)
+
+
+def run_basic_model():
+    torch.manual_seed(0)
+
+    hyper_parameters_list = [get_hyper_parameters()]
+
+    for hyper_parameters in hyper_parameters_list:
+        EPOCHS, WORD_EMBEDDING_DIM, POS_EMBEDDING_DIM, HIDDEN_DIM, BATCH_SIZE, BATCH_ACCUMULATE, LEARNING_RATE, path_train, path_test, WORD_TAG_DROPOUT= hyper_parameters
+
+        parser = DependencyParser(EPOCHS, WORD_EMBEDDING_DIM, POS_EMBEDDING_DIM, HIDDEN_DIM, BATCH_SIZE, BATCH_ACCUMULATE,
+                                  LEARNING_RATE, path_train, path_test, WORD_TAG_DROPOUT)
+
+        test_acc_list, time_id = parser.train()
+        max_test_acc = round(max(test_acc_list).item(), 3)
+        epoch_max = np.argmax(test_acc_list)
+
+        # Saves our model hyper parameters settings ina csv file.
+        with open('parser_results_info.csv', 'a') as f:
+            writer = csv.writer(f)
+            writer.writerow([time_id, max_test_acc, epoch_max, EPOCHS, WORD_EMBEDDING_DIM, POS_EMBEDDING_DIM, HIDDEN_DIM,
+                             BATCH_SIZE, BATCH_ACCUMULATE, LEARNING_RATE, WORD_TAG_DROPOUT])
+
+        print("Finished training the model, based on the following hyper parameters mix: {}".format(hyper_parameters))
+
+
+if __name__ == '__main__':
+    run_basic_model()
diff --git a/generate_comp_tagged.py b/generate_comp_tagged.py
index 823573d..2484a2f 100644
--- a/generate_comp_tagged.py
+++ b/generate_comp_tagged.py
@@ -1,39 +1,56 @@
 import AdvancedModel
+import BasicModel
 from chu_liu_edmonds import decode_mst
 import torch
 from tqdm import tqdm
 import numpy as np
-from AdvancedModel import LSTMEncoder
+
 torch.manual_seed(0)
 
 
-def generate_comp_tagged_file(model_path, target_path):
+def generate_comp_tagged_file(model_path, target_path, path_comp, model_module):
     """
     Generates a tagged version of the competition file.
     Args:
+
         model_path (str): The path to the chosen model's pth file.
         target_path (str): The path for the tagged version's file.
-
+        path_comp: The path for the untagged version's file.
+        model_module: The module to use.
     """
-    torch.manual_seed(0)
-    words_dict, pos_dict = AdvancedModel.get_vocabs(trained_on_path_list)
-    comp = AdvancedModel.DependencyDataset(words_dict, pos_dict, path_comp, padding=True, competition=True)
-    comp_data_loader = AdvancedModel.DataLoader(comp, batch_size=1, shuffle=False, num_workers=0)
 
     with torch.no_grad():
-        model = torch.load(model_path)
+        words_dict, pos_dict = model_module.get_vocabs(trained_on_path_list)
+        word_vocab_size, pos_vocab_size = len(words_dict), len(pos_dict)
+        comp = model_module.DependencyDataset(words_dict, pos_dict, path_comp, padding=True, competition=True)
+        comp_data_loader = model_module.DataLoader(comp, batch_size=1, shuffle=False, num_workers=0)
+        word_to_idx, idx_to_word, _ = comp.get_words_embeddings()
+
+        if model_module == BasicModel:
+            _, word_embedding_dim, pos_embedding_dim, hidden_dim, batch_size, _, _, _, _, word_tag_dropout \
+                                                                = model_module.get_hyper_parameters()
+
+            model = model_module.LSTMEncoder(batch_size, words_dict, word_to_idx, idx_to_word, word_embedding_dim,
+                                             pos_embedding_dim, hidden_dim, word_vocab_size, pos_vocab_size)
+        else:
+            _, word_embedding_dim, pos_embedding_dim, hidden_dim, _, _, _, _, _, word_tag_dropout, embedding_dropout,\
+                                                                    lstm_dropout = model_module.get_hyper_parameters()
+
+            model = model_module.LSTMEncoder(word_embedding_dim, pos_embedding_dim, hidden_dim, word_vocab_size,
+                                             pos_vocab_size, word_tag_dropout, embedding_dropout, lstm_dropout)
+
+        model.load_state_dict(torch.load(model_path))
+        model = model.eval()
 
         if torch.cuda.is_available():
             model.cuda()
 
-        model = model.eval()
-
         trees = []
 
         for input_data in tqdm(comp_data_loader):
             words_idx_tensor, pos_idx_tensor, headers_idx_tensor, sentence_length = input_data
             max_length = max(sentence_length)
-            score_matrix = model(words_idx_tensor, pos_idx_tensor, max_length)
+            score_matrix = model(words_idx_tensor, pos_idx_tensor, max_length, _evaluate=True)
 
             trees.append(decode_mst(np.array(score_matrix[:, 0].detach().cpu()).reshape((max_length, max_length))
                                     [:sentence_length[0], :sentence_length[0]], sentence_length[0], has_labels=False)[0])
@@ -41,9 +58,6 @@ def generate_comp_tagged_file(model_path, target_path):
         current_tree_idx = 0
         current_word_idx = 1
 
-        # with open(target_path, 'w') as f_labeled:
-        #     pass
-
         with open(path_comp, 'r') as f_unlabeled:
             with open(target_path, 'w') as f_labeled:
 
@@ -63,22 +77,20 @@ def generate_comp_tagged_file(model_path, target_path):
 
 
 if __name__ == '__main__':
-
     print("Starting to evaluate")
     torch.cuda.empty_cache()
 
     path_train = "Data/train.labeled"
     path_test = "Data/test.labeled"
+    path_combined = "Data/combined.labeled"
     path_comp = "Data/comp.unlabeled"
-    trained_on_path_list = [path_train, path_test]
+    trained_on_path_list = [path_combined]
     path_comp_m1_labeled = 'comp_m1_203933551.labeled'
     path_comp_m2_labeled = 'comp_m2_203933551.labeled'
-    basic_model_path = 'encoder06_26_2020_14_55_17.pth'
-    advanced_model_path = 'encoder06_26_2020_14_55_17.pth'
+    basic_model_path = 'basic_model_full.pkl'
+    advanced_model_path = 'advanced_model_full.pkl'
 
-    generate_comp_tagged_file(basic_model_path, path_comp_m1_labeled)
-    generate_comp_tagged_file(advanced_model_path, path_comp_m2_labeled)
+    generate_comp_tagged_file(basic_model_path, path_comp_m1_labeled, path_comp, BasicModel)
+    generate_comp_tagged_file(advanced_model_path, path_comp_m2_labeled, path_comp, AdvancedModel)
 
     print("Evaluate end")
-
-