From d54d7ece95087c1a08e3fcc99ba5d4a0c1c62924 Mon Sep 17 00:00:00 2001 From: Tim Schmeier Date: Mon, 10 Jul 2017 17:26:04 +0000 Subject: [PATCH 1/3] added build/save to help preserve memory --- all2vec/__init__.py | 52 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) diff --git a/all2vec/__init__.py b/all2vec/__init__.py index 00bf739..f34b2f6 100644 --- a/all2vec/__init__.py +++ b/all2vec/__init__.py @@ -232,7 +232,7 @@ def get_similar_threshold(self, entity_type, entity_id, match_type, def get_entity_types(self): """Helper for getting entity types object.""" return [{ - 'num_entities': etype._ann_obj.get_n_items(), + 'num_entities': etype._nitems, 'entity_type_id': etype._entity_type_id, 'entity_type': etype._entity_type, 'metric': etype._metric, @@ -263,6 +263,56 @@ def save(self, folder): files.append(info_file) return files + def build_and_save(self, folder, verbose=False): + """Build and save all entities, preserves memory by deleting + each index after build/save. + """ + + if self._is_built: + return + + if not os.path.exists(folder): + os.makedirs(folder) + files = [] + + for annoy_object in self._annoy_objects.values(): + logging.info("Starting build for entity {} - {}...".format( + annoy_object._entity_type_id, + annoy_object._entity_type, + )) + annoy_object.build(verbose) + logging.info("Done build for entity {} - {}".format( + annoy_object._entity_type_id, + annoy_object._entity_type, + )) + annoy_filepath = os.path.join(folder, "{}.ann".format( + annoy_object._entity_type, + )) + annoy_object._ann_obj.save(annoy_filepath) + files.append(annoy_filepath) + logging.info("Done saving for entity {} - {}".format( + annoy_object._entity_type_id, + annoy_object._entity_type, + )) + + #Release memory + del annoy_object._ann_obj + + self._is_built = True + + pickle_filepath = os.path.join(folder, 'object.pickle') + with open(pickle_filepath, 'wb') as handle: + dill.dump(self, handle) + files.append(pickle_filepath) + + enttypes = self.get_entity_types() + + info_file = os.path.join(folder, 'entity_info.json') + with open(info_file, 'w') as handle: + json.dump(enttypes, handle) + files.append(info_file) + return files + def load_entities(self, entities, file_getter): """Load underlying entities.""" for k in entities: From f2bbec8bc3e37e55ed38e6e082bb09638552e6f4 Mon Sep 17 00:00:00 2001 From: Tim Schmeier Date: Mon, 10 Jul 2017 17:33:23 +0000 Subject: [PATCH 2/3] got pepped --- all2vec/__init__.py | 73 ++++++++++++++++++++++----------------------- 1 file changed, 35 insertions(+), 38 deletions(-) diff --git a/all2vec/__init__.py b/all2vec/__init__.py index f34b2f6..a9fe41d 100644 --- a/all2vec/__init__.py +++ b/all2vec/__init__.py @@ -264,54 +264,51 @@ def save(self, folder): return files def build_and_save(self, folder, verbose=False): - """Build and save all entities, preserves memory by deleting - each index after build/save. - """ - - if self._is_built: - return - - if not os.path.exists(folder): - os.makedirs(folder) - files = [] - - for annoy_object in self._annoy_objects.values(): - logging.info("Starting build for entity {} - {}...".format( - annoy_object._entity_type_id, - annoy_object._entity_type, - )) - annoy_object.build(verbose) - logging.info("Done build for entity {} - {}".format( - annoy_object._entity_type_id, - annoy_object._entity_type, - )) - annoy_filepath = os.path.join(folder, "{}.ann".format( - annoy_object._entity_type, - )) - annoy_object._ann_obj.save(annoy_filepath) - files.append(annoy_filepath) + """Preserves memory by deleting index after build and save.""" + if self._is_built: + return + + if not os.path.exists(folder): + os.makedirs(folder) + files = [] + + for annoy_object in self._annoy_objects.values(): + logging.info("Starting build for entity {} - {}...".format( + annoy_object._entity_type_id, + annoy_object._entity_type, + )) + annoy_object.build(verbose) + logging.info("Done build for entity {} - {}".format( + annoy_object._entity_type_id, + annoy_object._entity_type, + )) + annoy_filepath = os.path.join(folder, "{}.ann".format( + annoy_object._entity_type, + )) + annoy_object._ann_obj.save(annoy_filepath) + files.append(annoy_filepath) logging.info("Done saving for entity {} - {}".format( annoy_object._entity_type_id, annoy_object._entity_type, )) #Release memory - del annoy_object._ann_obj + del annoy_object._ann_obj - self._is_built = True + self._is_built = True - pickle_filepath = os.path.join(folder, 'object.pickle') - with open(pickle_filepath, 'wb') as handle: - dill.dump(self, handle) - files.append(pickle_filepath) + pickle_filepath = os.path.join(folder, 'object.pickle') + with open(pickle_filepath, 'wb') as handle: + dill.dump(self, handle) + files.append(pickle_filepath) - enttypes = self.get_entity_types() + enttypes = self.get_entity_types() - info_file = os.path.join(folder, 'entity_info.json') - with open(info_file, 'w') as handle: - json.dump(enttypes, handle) - files.append(info_file) - return files + info_file = os.path.join(folder, 'entity_info.json') + with open(info_file, 'w') as handle: + json.dump(enttypes, handle) + files.append(info_file) + return files def load_entities(self, entities, file_getter): """Load underlying entities.""" From d474d6c1335974b959026912dab5e5c0098fa43e Mon Sep 17 00:00:00 2001 From: Tim Schmeier Date: Mon, 10 Jul 2017 17:46:43 +0000 Subject: [PATCH 3/3] my docs were not "imperative" enough --- all2vec/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/all2vec/__init__.py b/all2vec/__init__.py index a9fe41d..28019a7 100644 --- a/all2vec/__init__.py +++ b/all2vec/__init__.py @@ -264,7 +264,7 @@ def save(self, folder): return files def build_and_save(self, folder, verbose=False): - """Preserves memory by deleting index after build and save.""" + """Preserve memory by deleting index after build and save.""" if self._is_built: return @@ -292,7 +292,7 @@ def build_and_save(self, folder, verbose=False): annoy_object._entity_type, )) - #Release memory + # Release memory del annoy_object._ann_obj self._is_built = True