Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 48 additions & 1 deletion all2vec/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ def get_similar_threshold(self, entity_type, entity_id, match_type,
def get_entity_types(self):
"""Helper for getting entity types object."""
return [{
'num_entities': etype._ann_obj.get_n_items(),
'num_entities': etype._nitems,
'entity_type_id': etype._entity_type_id,
'entity_type': etype._entity_type,
'metric': etype._metric,
Expand Down Expand Up @@ -263,6 +263,53 @@ def save(self, folder):
files.append(info_file)
return files

def build_and_save(self, folder, verbose=False):
"""Preserve memory by deleting index after build and save."""
if self._is_built:
return

if not os.path.exists(folder):
os.makedirs(folder)
files = []

for annoy_object in self._annoy_objects.values():
logging.info("Starting build for entity {} - {}...".format(
annoy_object._entity_type_id,
annoy_object._entity_type,
))
annoy_object.build(verbose)
logging.info("Done build for entity {} - {}".format(
annoy_object._entity_type_id,
annoy_object._entity_type,
))
annoy_filepath = os.path.join(folder, "{}.ann".format(
annoy_object._entity_type,
))
annoy_object._ann_obj.save(annoy_filepath)
files.append(annoy_filepath)
logging.info("Done saving for entity {} - {}".format(
annoy_object._entity_type_id,
annoy_object._entity_type,
))

# Release memory
del annoy_object._ann_obj

self._is_built = True

pickle_filepath = os.path.join(folder, 'object.pickle')
with open(pickle_filepath, 'wb') as handle:
dill.dump(self, handle)
files.append(pickle_filepath)

enttypes = self.get_entity_types()

info_file = os.path.join(folder, 'entity_info.json')
with open(info_file, 'w') as handle:
json.dump(enttypes, handle)
files.append(info_file)
return files

def load_entities(self, entities, file_getter):
"""Load underlying entities."""
for k in entities:
Expand Down