diff --git a/align_system/algorithms/outlines_baseline_adm_component.py b/align_system/algorithms/outlines_baseline_adm_component.py index 30b51f78..dee43384 100644 --- a/align_system/algorithms/outlines_baseline_adm_component.py +++ b/align_system/algorithms/outlines_baseline_adm_component.py @@ -165,3 +165,100 @@ def _generic_object_repr(obj): num_samples={self.num_samples}, vote_calculator_fn={_generic_object_repr(self.vote_calculator_fn)}, )""", flags=re.MULTILINE).strip() + + +class OutlinesRAGBaselineADMComponent(OutlinesBaselineADMComponent): + """ + Variant of OutlinesBaselineADMComponent that accepts rag_context from the + pipeline's working_output and passes it to system_prompt_template and + prompt_template calls. Templates that don't declare a rag_context parameter + will silently ignore it via call_with_coerced_args. + """ + + def run(self, + scenario_state, + choices, + rag_context=None): + if self.enable_caching: + scenario_state_copy = copy.deepcopy(scenario_state) + if hasattr(scenario_state, 'elapsed_time'): + scenario_state_copy.elapsed_time = 0 + + depends = '\n'.join(( + self.cache_repr(), + repr(scenario_state_copy), + repr(choices), + repr(rag_context))) + + cacher = ub.Cacher('outlines_rag_baseline_adm_component', depends, verbose=0) + log.debug(f'cacher.fpath={cacher.fpath}') + + cached_output = cacher.tryload() + if cached_output is not None: + log.info("Cache hit for `outlines_rag_baseline_adm_component`" + " returning cached output") + return cached_output + else: + log.info("Cache miss for `outlines_rag_baseline_adm_component` ..") + + scenario_description = call_with_coerced_args( + self.scenario_description_template, + {'scenario_state': scenario_state, + 'rag_context': rag_context}) + + dialog = [] + if self.system_prompt is not None: + system_prompt = self.system_prompt + dialog.insert(0, DialogElement(role='system', + content=system_prompt, + tags=['regression'])) + elif self.system_prompt_template is not None: + system_prompt = call_with_coerced_args( + self.system_prompt_template, + {'rag_context': rag_context}) + dialog.insert(0, DialogElement(role='system', + content=system_prompt)) + + prompt = call_with_coerced_args( + self.prompt_template, + {'scenario_state': scenario_state, + 'scenario_description': scenario_description, + 'choices': choices, + 'rag_context': rag_context}) + + dialog.append(DialogElement(role='user', content=prompt)) + + output_schema = call_with_coerced_args( + self.output_schema_template, + {'choices': choices}) + + dialog_prompt = self.structured_inference_engine.dialog_to_prompt(dialog) + + log.info("[bold]*RAG TAGGING DIALOG PROMPT*[/bold]", + extra={"markup": True}) + log.info(dialog_prompt) + + responses = self.structured_inference_engine.run_inference( + [dialog_prompt] * self.num_samples, output_schema) + + votes = self.vote_calculator_fn( + choices, [r['action_choice'] for r in responses]) + + log.explain("[bold]*VOTES*[/bold]", + extra={"markup": True}) + log.explain(votes, extra={"highlighter": JSON_HIGHLIGHTER}) + + top_choice, top_choice_score = max(votes.items(), key=lambda x: x[1]) + + top_choice_justification = "" + for response in responses: + if response['action_choice'] == top_choice: + top_choice_justification = response['detailed_reasoning'] + break + + outputs = (top_choice, top_choice_justification, dialog) + + if self.enable_caching: + cacher.save(outputs) + + return outputs diff --git a/align_system/algorithms/pipeline_adm.py b/align_system/algorithms/pipeline_adm.py index 95b56a46..6663f482 100644 --- a/align_system/algorithms/pipeline_adm.py +++ b/align_system/algorithms/pipeline_adm.py @@ -25,6 +25,8 @@ def choose_action(self, per_step_timing_stats = [] for i, step in enumerate(self.steps): + if step is None: + continue step_returns = step.run_returns() start_time = timer() diff --git a/align_system/algorithms/prompt_based_aligned_adm_component.py b/align_system/algorithms/prompt_based_aligned_adm_component.py index c821671c..1957fb71 100644 --- a/align_system/algorithms/prompt_based_aligned_adm_component.py +++ b/align_system/algorithms/prompt_based_aligned_adm_component.py @@ -183,3 +183,144 @@ def run(self, break return top_choice, top_choice_justification, positive_dialog + + +class PromptBasedRAGAlignedADMComponent(PromptBasedAlignedADMComponent): + """ + Variant of PromptBasedAlignedADMComponent that accepts rag_context from the + pipeline's working_output and passes it to system_prompt_template and + scenario_description_template calls. Templates that don't declare rag_context + will silently ignore it via call_with_coerced_args. + """ + + def run(self, + scenario_state, + choices, + alignment_target, + positive_icl_dialog_elements=[], + negative_icl_dialog_elements=[], + rag_context=None): + kdma_values = alignment_target.kdma_values + if len(kdma_values) != 1: + raise RuntimeError("This ADM assumes a single KDMA target, aborting!") + kdma_value = kdma_values[0] + if isinstance(kdma_value, KDMAValue): + kdma_value = kdma_value.to_dict() + + kdma = kdma_value['kdma'] + value = kdma_value['value'] + negative_value = 1 - value + + scenario_description = call_with_coerced_args( + self.scenario_description_template, + {'scenario_state': scenario_state, + 'rag_context': rag_context}) + + prompt = call_with_coerced_args( + self.prompt_template, + {'scenario_state': scenario_state, + 'scenario_description': scenario_description, + 'choices': choices, + 'rag_context': rag_context}) + + positive_dialog = [] + if self.system_prompt_template is not None: + positive_system_prompt = call_with_coerced_args( + self.system_prompt_template, + {'target_kdma': kdma, + 'target_value': value, + 'rag_context': rag_context}) + + positive_dialog.insert( + 0, DialogElement(role='system', + content=positive_system_prompt)) + + if len(positive_icl_dialog_elements) > 0: + positive_dialog.extend(positive_icl_dialog_elements) + + positive_dialog.append( + DialogElement(role='user', content=prompt)) + + positive_dialog_prompt = self.structured_inference_engine.dialog_to_prompt( + positive_dialog) + + log.info("[bold]*POSITIVE RAG DIALOG PROMPT*[/bold]", + extra={"markup": True}) + log.info(positive_dialog_prompt) + + if self.num_negative_samples > 0: + negative_dialog = [] + if self.system_prompt_template is not None: + negative_system_prompt = call_with_coerced_args( + self.system_prompt_template, + {'target_kdma': kdma, + 'target_value': negative_value, + 'rag_context': rag_context}) + + negative_dialog.insert( + 0, DialogElement(role='system', + content=negative_system_prompt)) + + if len(negative_icl_dialog_elements) > 0: + negative_dialog.extend(negative_icl_dialog_elements) + + negative_dialog.append( + DialogElement(role='user', content=prompt)) + + negative_dialog_prompt = self.structured_inference_engine.dialog_to_prompt( + negative_dialog) + + log.info("[bold]*NEGATIVE RAG DIALOG PROMPT*[/bold]", + extra={"markup": True}) + log.info(negative_dialog_prompt) + + output_schema = call_with_coerced_args( + self.output_schema_template, + {'choices': choices}) + + positive_responses = self.structured_inference_engine.run_inference( + [positive_dialog_prompt] * self.num_positive_samples, output_schema) + positive_choices = [r['action_choice'] for r in positive_responses] + for i, positive_response in enumerate(positive_responses): + log.info("[bold]*POSITIVE RAG RESPONSE ({}, sample #{})*[/bold]".format( + kdma, i), extra={"markup": True}) + log.info(positive_response, extra={"highlighter": JSON_HIGHLIGHTER}) + + if self.num_negative_samples > 0: + negative_responses = self.structured_inference_engine.run_inference( + [negative_dialog_prompt] * self.num_negative_samples, output_schema) + negative_choices = [r['action_choice'] for r in negative_responses] + for i, negative_response in enumerate(negative_responses): + log.info("[bold]*NEGATIVE RAG RESPONSE ({}, sample #{})*[/bold]".format( + kdma, i), extra={"markup": True}) + log.info(negative_response, extra={"highlighter": JSON_HIGHLIGHTER}) + else: + negative_choices = None + + votes = self.vote_calculator_fn( + choices, positive_choices, negative_choices) + + log.explain("[bold]*VOTES*[/bold]", + extra={"markup": True}) + log.explain(votes, extra={"highlighter": JSON_HIGHLIGHTER}) + + if self.filter_votes_to_positives: + filtered_votes = filter_votes_to_responses(votes, positive_choices) + if filtered_votes != votes: + log.explain("Filtering votes down to choices where we " + "have a positive response") + log.explain(filtered_votes, + extra={"highlighter": JSON_HIGHLIGHTER}) + final_votes = filtered_votes + else: + final_votes = votes + + top_choice, top_choice_score = max(final_votes.items(), key=lambda x: x[1]) + + top_choice_justification = "" + for response in positive_responses: + if response['action_choice'] == top_choice: + top_choice_justification = response['detailed_reasoning'] + break + + return top_choice, top_choice_justification, positive_dialog diff --git a/align_system/algorithms/rag_retreiver_adm_components.py b/align_system/algorithms/rag_retreiver_adm_components.py new file mode 100644 index 00000000..aa6d787e --- /dev/null +++ b/align_system/algorithms/rag_retreiver_adm_components.py @@ -0,0 +1,75 @@ +''' +Uses the LangChain framework to enable prompt-injected RAG with hydra-configurable +inference LLM model selection. The knowledge base vector store is dynamically +generated from the provided document files. + +Prompt-Injection RAG ADM Components +''' +from typing import Iterable, Union, List, Dict +from os import PathLike + +from langchain_text_splitters import RecursiveCharacterTextSplitter +from langchain_community.vectorstores import FAISS +from langchain_huggingface import HuggingFaceEmbeddings +from langchain_core.documents import Document + +from align_system.algorithms.abstracts import ADMComponent + +DocumentFileType = Union[str, bytes, PathLike] +DocumentFileListType = Iterable[DocumentFileType] + + +class LangChainRAGIndexerADMComponent(ADMComponent): + def __init__(self, + docs_files: DocumentFileListType, + embedding_model_name: str = "sentence-transformers/all-MiniLM-L6-v2", + chunk_size: int = 1000, + chunk_overlap: int = 200, + add_start_index: bool = True, + k: int = 5): + self.docs_files = docs_files + self.embedding_model_name = embedding_model_name + self.k = k + + docs = LangChainRAGIndexerADMComponent._load_docs(docs_files) + + text_splitter = RecursiveCharacterTextSplitter( + chunk_size=chunk_size, + chunk_overlap=chunk_overlap, + add_start_index=add_start_index, + ) + all_splits = text_splitter.split_documents(docs) + + embeddings = HuggingFaceEmbeddings(model_name=embedding_model_name) + vector_store = FAISS.from_documents(all_splits, embeddings) + + self.retriever = vector_store.as_retriever(search_kwargs={"k": k}) + + def run_returns(self): + return "rag_context" + + def run(self, scenario_state) -> str: + query = scenario_state.unstructured + docs = self.retriever.invoke(query) + passages = [f"[Passage {i}]\n{doc.page_content.strip()}" + for i, doc in enumerate(docs, start=1)] + return "\n\n".join(passages) + + def retrieve(self, query: str) -> Dict: + docs = self.retriever.invoke(query) + return { + "question": query, + "context": [ + {"content": d.page_content, "metadata": d.metadata} + for d in docs + ] + } + + @staticmethod + def _load_docs(docs_files: DocumentFileListType) -> List[Document]: + docs = [] + for d in docs_files: + with open(d, 'r') as f: + text = f.read() + docs.append(Document(page_content=text, metadata={"source": str(d)})) + return docs diff --git a/align_system/configs/action_based.yaml b/align_system/configs/action_based.yaml index d23dd7f9..4dfa1787 100644 --- a/align_system/configs/action_based.yaml +++ b/align_system/configs/action_based.yaml @@ -5,6 +5,7 @@ defaults: - interface: input_output_file - adm: outlines_transformers_structured_baseline - driver: itm_phase1 + # - hydra/launcher: basic - override hydra/job_logging: custom loglevel: "EXPLAIN" diff --git a/align_system/configs/adm/tagging_fewshot_aligned_rag.yaml b/align_system/configs/adm/tagging_fewshot_aligned_rag.yaml new file mode 100644 index 00000000..4a46b1ca --- /dev/null +++ b/align_system/configs/adm/tagging_fewshot_aligned_rag.yaml @@ -0,0 +1,52 @@ +name: tagging_fewshot_aligned_rag + +defaults: + # Import defaults into this namspace (adm) as @name, for further + # customization + + # Shared variables / components + - /attribute@start: start + - /attribute@salt: salt + - /attribute@bcd: bcd + - /inference_engine@structured_inference_engine: outlines_structured_multinomial + - /template/scenario_description@scenario_description_template: tagging + - /template/prompt@prompt_template: tagging + # ADM components to be used in "steps" + - /adm_component/misc@step_definitions.rag_indexer: rag_indexer + - /adm_component/icl@step_definitions.icl: tagging + - /adm_component/misc@step_definitions.format_choices: itm_format_choices + - /adm_component/direct@step_definitions.tagging_rag_aligned: tagging_rag_aligned + - /adm_component/misc@step_definitions.ensure_chosen_action: ensure_chosen_action + - /adm_component/misc@step_definitions.populate_choice_info: populate_choice_info + # Use definitions in this file to override defaults defined above + - _self_ + +attribute_definitions: + START: ${adm.start} + SALT: ${adm.salt} + BCD_SIEVE: ${adm.bcd} + +step_definitions: + rag_indexer: + docs_files: + - /data/users/yonatan.gefen/align-system/align_system/documents/start.md + - /data/users/yonatan.gefen/align-system/align_system/documents/start_triage_flowchart.md + - /data/users/yonatan.gefen/align-system/align_system/documents/Salt.md + icl: + scenario_description_template: ${ref:adm.scenario_description_template} + attributes: ${adm.attribute_definitions} + prompt_template: ${ref:adm.prompt_template} + icl_generator_partial: + scenario_description_template: ${ref:adm.scenario_description_template} + +instance: + _target_: align_system.algorithms.pipeline_adm.PipelineADM + + steps: + # Reference the step instances we want to use in order + - ${ref:adm.step_definitions.format_choices} + - ${ref:adm.step_definitions.rag_indexer} + - ${ref:adm.step_definitions.icl} + - ${ref:adm.step_definitions.tagging_rag_aligned} + - ${ref:adm.step_definitions.ensure_chosen_action} + - ${ref:adm.step_definitions.populate_choice_info} diff --git a/align_system/configs/adm/tagging_fewshot_aligned_rag_sweep.yaml b/align_system/configs/adm/tagging_fewshot_aligned_rag_sweep.yaml new file mode 100644 index 00000000..2b892eee --- /dev/null +++ b/align_system/configs/adm/tagging_fewshot_aligned_rag_sweep.yaml @@ -0,0 +1,66 @@ +name: tagging_fewshot_aligned_rag + +defaults: + - override hydra/launcher: submitit_slurm + # Import defaults into this namspace (adm) as @name, for further + # customization + + # Shared variables / components + - /attribute@start: start + - /attribute@salt: salt + - /attribute@bcd: bcd + - /inference_engine@structured_inference_engine: outlines_structured_multinomial + - /template/scenario_description@scenario_description_template: tagging + - /template/prompt@prompt_template: tagging + # ADM components to be used in "steps" + - /adm_component/misc@step_definitions.rag_indexer: rag_indexer + # - /adm_component/icl@step_definitions.icl: tagging + - /adm_component/misc@step_definitions.format_choices: itm_format_choices + - /adm_component/direct@step_definitions.tagging_rag_aligned: tagging_rag_aligned + - /adm_component/misc@step_definitions.ensure_chosen_action: ensure_chosen_action + - /adm_component/misc@step_definitions.populate_choice_info: populate_choice_info + # Use definitions in this file to override defaults defined above + - _self_ +hydra: + launcher: + nodes: 1 + timeout_min: 600 + cpus_per_task: 8 + gpus_per_node: 1 + mem_gb: 24G + account: itm + sweeper: + params: + icl: true, false + rag: ${ref:adm.step_definitions.rag_indexer}, null + prompt: true, false + +attribute_definitions: + START: ${adm.start} + SALT: ${adm.salt} + BCD_SIEVE: ${adm.bcd} + +step_definitions: + rag_indexer: + docs_files: + - /data/users/yonatan.gefen/align-system/align_system/documents/start.md + - /data/users/yonatan.gefen/align-system/align_system/documents/start_triage_flowchart.md + - /data/users/yonatan.gefen/align-system/align_system/documents/Salt.md + icl: + scenario_description_template: ${ref:adm.scenario_description_template} + attributes: ${adm.attribute_definitions} + prompt_template: ${ref:adm.prompt_template} + icl_generator_partial: + scenario_description_template: ${ref:adm.scenario_description_template} + +instance: + _target_: align_system.algorithms.pipeline_adm.PipelineADM + + steps: + # Reference the step instances we want to use in order + - ${ref:adm.step_definitions.format_choices} + - ${ref:adm.step_definitions.rag_indexer} + - ${ref:adm.step_definitions.icl} + - ${ref:adm.step_definitions.tagging_rag_aligned} + - ${ref:adm.step_definitions.ensure_chosen_action} + - ${ref:adm.step_definitions.populate_choice_info} diff --git a/align_system/configs/adm/tagging_rag_baseline.yaml b/align_system/configs/adm/tagging_rag_baseline.yaml new file mode 100644 index 00000000..2d8edc27 --- /dev/null +++ b/align_system/configs/adm/tagging_rag_baseline.yaml @@ -0,0 +1,27 @@ +name: tagging_rag_baseline + +defaults: + - /inference_engine@structured_inference_engine: outlines_structured_multinomial + - /adm_component/misc@step_definitions.format_choices: itm_format_choices + - /adm_component/misc@step_definitions.rag_indexer: rag_indexer + - /adm_component/direct@step_definitions.tagging_rag_baseline: tagging_rag_baseline + - /adm_component/misc@step_definitions.ensure_chosen_action: ensure_chosen_action + - /adm_component/misc@step_definitions.populate_choice_info: populate_choice_info + - _self_ + +step_definitions: + rag_indexer: + docs_files: + - /data/users/yonatan.gefen/align-system/align_system/documents/start.md + - /data/users/yonatan.gefen/align-system/align_system/documents/start_triage_flowchart.md + - /data/users/yonatan.gefen/align-system/align_system/documents/Salt.md + +instance: + _target_: align_system.algorithms.pipeline_adm.PipelineADM + + steps: + - ${ref:adm.step_definitions.format_choices} + - ${ref:adm.step_definitions.rag_indexer} + - ${ref:adm.step_definitions.tagging_rag_baseline} + - ${ref:adm.step_definitions.ensure_chosen_action} + - ${ref:adm.step_definitions.populate_choice_info} diff --git a/align_system/configs/adm/tagging_sweep_icl_no_rag_no_prompt.yaml b/align_system/configs/adm/tagging_sweep_icl_no_rag_no_prompt.yaml new file mode 100644 index 00000000..bfa17dbf --- /dev/null +++ b/align_system/configs/adm/tagging_sweep_icl_no_rag_no_prompt.yaml @@ -0,0 +1,40 @@ +name: tagging_sweep_icl_no_rag_no_prompt + +defaults: + - /attribute@start: start + - /attribute@salt: salt + - /attribute@bcd: bcd + - /inference_engine@structured_inference_engine: outlines_structured_multinomial + - /template/scenario_description@scenario_description_template: tagging + - /template/prompt@prompt_template: tagging + - /adm_component/icl@step_definitions.icl: tagging + - /adm_component/misc@step_definitions.format_choices: itm_format_choices + - /adm_component/direct@step_definitions.direct_step: tagging_aligned + - /adm_component/misc@step_definitions.ensure_chosen_action: ensure_chosen_action + - /adm_component/misc@step_definitions.populate_choice_info: populate_choice_info + - _self_ + +attribute_definitions: + START: ${adm.start} + SALT: ${adm.salt} + BCD_SIEVE: ${adm.bcd} + +step_definitions: + icl: + scenario_description_template: ${ref:adm.scenario_description_template} + attributes: ${adm.attribute_definitions} + prompt_template: ${ref:adm.prompt_template} + icl_generator_partial: + scenario_description_template: ${ref:adm.scenario_description_template} + direct_step: + system_prompt_template: null + +instance: + _target_: align_system.algorithms.pipeline_adm.PipelineADM + + steps: + - ${ref:adm.step_definitions.format_choices} + - ${ref:adm.step_definitions.icl} + - ${ref:adm.step_definitions.direct_step} + - ${ref:adm.step_definitions.ensure_chosen_action} + - ${ref:adm.step_definitions.populate_choice_info} diff --git a/align_system/configs/adm/tagging_sweep_icl_no_rag_with_prompt.yaml b/align_system/configs/adm/tagging_sweep_icl_no_rag_with_prompt.yaml new file mode 100644 index 00000000..06e11c0e --- /dev/null +++ b/align_system/configs/adm/tagging_sweep_icl_no_rag_with_prompt.yaml @@ -0,0 +1,38 @@ +name: tagging_sweep_icl_no_rag_with_prompt + +defaults: + - /attribute@start: start + - /attribute@salt: salt + - /attribute@bcd: bcd + - /inference_engine@structured_inference_engine: outlines_structured_multinomial + - /template/scenario_description@scenario_description_template: tagging + - /template/prompt@prompt_template: tagging + - /adm_component/icl@step_definitions.icl: tagging + - /adm_component/misc@step_definitions.format_choices: itm_format_choices + - /adm_component/direct@step_definitions.direct_step: tagging_aligned + - /adm_component/misc@step_definitions.ensure_chosen_action: ensure_chosen_action + - /adm_component/misc@step_definitions.populate_choice_info: populate_choice_info + - _self_ + +attribute_definitions: + START: ${adm.start} + SALT: ${adm.salt} + BCD_SIEVE: ${adm.bcd} + +step_definitions: + icl: + scenario_description_template: ${ref:adm.scenario_description_template} + attributes: ${adm.attribute_definitions} + prompt_template: ${ref:adm.prompt_template} + icl_generator_partial: + scenario_description_template: ${ref:adm.scenario_description_template} + +instance: + _target_: align_system.algorithms.pipeline_adm.PipelineADM + + steps: + - ${ref:adm.step_definitions.format_choices} + - ${ref:adm.step_definitions.icl} + - ${ref:adm.step_definitions.direct_step} + - ${ref:adm.step_definitions.ensure_chosen_action} + - ${ref:adm.step_definitions.populate_choice_info} diff --git a/align_system/configs/adm/tagging_sweep_icl_rag_no_prompt.yaml b/align_system/configs/adm/tagging_sweep_icl_rag_no_prompt.yaml new file mode 100644 index 00000000..5e3f7957 --- /dev/null +++ b/align_system/configs/adm/tagging_sweep_icl_rag_no_prompt.yaml @@ -0,0 +1,45 @@ +name: tagging_sweep_icl_rag_no_prompt + +defaults: + - /attribute@start: start + - /attribute@salt: salt + - /attribute@bcd: bcd + - /inference_engine@structured_inference_engine: outlines_structured_multinomial + - /template/scenario_description@scenario_description_template: tagging + - /template/prompt@prompt_template: tagging + - /adm_component/misc@step_definitions.rag_indexer: rag_indexer + - /adm_component/icl@step_definitions.icl: tagging + - /adm_component/misc@step_definitions.format_choices: itm_format_choices + - /adm_component/direct@step_definitions.direct_step: tagging_rag_aligned + - /adm_component/misc@step_definitions.ensure_chosen_action: ensure_chosen_action + - /adm_component/misc@step_definitions.populate_choice_info: populate_choice_info + - _self_ + +attribute_definitions: + START: ${adm.start} + SALT: ${adm.salt} + BCD_SIEVE: ${adm.bcd} + +step_definitions: + rag_indexer: + docs_files: + - /data/users/yonatan.gefen/align-system/align_system/documents/start.md + - /data/users/yonatan.gefen/align-system/align_system/documents/start_triage_flowchart.md + - /data/users/yonatan.gefen/align-system/align_system/documents/Salt.md + icl: + scenario_description_template: ${ref:adm.scenario_description_template} + attributes: ${adm.attribute_definitions} + prompt_template: ${ref:adm.prompt_template} + icl_generator_partial: + scenario_description_template: ${ref:adm.scenario_description_template} + +instance: + _target_: align_system.algorithms.pipeline_adm.PipelineADM + + steps: + - ${ref:adm.step_definitions.format_choices} + - ${ref:adm.step_definitions.rag_indexer} + - ${ref:adm.step_definitions.icl} + - ${ref:adm.step_definitions.direct_step} + - ${ref:adm.step_definitions.ensure_chosen_action} + - ${ref:adm.step_definitions.populate_choice_info} diff --git a/align_system/configs/adm/tagging_sweep_icl_rag_with_prompt.yaml b/align_system/configs/adm/tagging_sweep_icl_rag_with_prompt.yaml new file mode 100644 index 00000000..43a78bac --- /dev/null +++ b/align_system/configs/adm/tagging_sweep_icl_rag_with_prompt.yaml @@ -0,0 +1,48 @@ +name: tagging_sweep_icl_rag_with_prompt + +defaults: + - /attribute@start: start + - /attribute@salt: salt + - /attribute@bcd: bcd + - /inference_engine@structured_inference_engine: outlines_structured_multinomial + - /template/scenario_description@scenario_description_template: tagging + - /template/prompt@prompt_template: tagging + - /adm_component/misc@step_definitions.rag_indexer: rag_indexer + - /adm_component/icl@step_definitions.icl: tagging + - /adm_component/misc@step_definitions.format_choices: itm_format_choices + - /adm_component/direct@step_definitions.direct_step: tagging_rag_aligned + - /adm_component/misc@step_definitions.ensure_chosen_action: ensure_chosen_action + - /adm_component/misc@step_definitions.populate_choice_info: populate_choice_info + - _self_ + +attribute_definitions: + START: ${adm.start} + SALT: ${adm.salt} + BCD_SIEVE: ${adm.bcd} + +step_definitions: + rag_indexer: + docs_files: + - /data/users/yonatan.gefen/align-system/align_system/documents/start.md + - /data/users/yonatan.gefen/align-system/align_system/documents/start_triage_flowchart.md + - /data/users/yonatan.gefen/align-system/align_system/documents/Salt.md + icl: + scenario_description_template: ${ref:adm.scenario_description_template} + attributes: ${adm.attribute_definitions} + prompt_template: ${ref:adm.prompt_template} + icl_generator_partial: + scenario_description_template: ${ref:adm.scenario_description_template} + direct_step: + system_prompt_template: + _target_: align_system.prompt_engineering.tagging_prompts.TaggingSystemPrompt + +instance: + _target_: align_system.algorithms.pipeline_adm.PipelineADM + + steps: + - ${ref:adm.step_definitions.format_choices} + - ${ref:adm.step_definitions.rag_indexer} + - ${ref:adm.step_definitions.icl} + - ${ref:adm.step_definitions.direct_step} + - ${ref:adm.step_definitions.ensure_chosen_action} + - ${ref:adm.step_definitions.populate_choice_info} diff --git a/align_system/configs/adm/tagging_sweep_no_icl_no_rag_no_prompt.yaml b/align_system/configs/adm/tagging_sweep_no_icl_no_rag_no_prompt.yaml new file mode 100644 index 00000000..ca41772f --- /dev/null +++ b/align_system/configs/adm/tagging_sweep_no_icl_no_rag_no_prompt.yaml @@ -0,0 +1,32 @@ +name: tagging_sweep_no_icl_no_rag_no_prompt + +defaults: + - /attribute@start: start + - /attribute@salt: salt + - /attribute@bcd: bcd + - /inference_engine@structured_inference_engine: outlines_structured_multinomial + - /template/scenario_description@scenario_description_template: tagging + - /template/prompt@prompt_template: tagging + - /adm_component/misc@step_definitions.format_choices: itm_format_choices + - /adm_component/direct@step_definitions.direct_step: tagging_aligned + - /adm_component/misc@step_definitions.ensure_chosen_action: ensure_chosen_action + - /adm_component/misc@step_definitions.populate_choice_info: populate_choice_info + - _self_ + +attribute_definitions: + START: ${adm.start} + SALT: ${adm.salt} + BCD_SIEVE: ${adm.bcd} + +step_definitions: + direct_step: + system_prompt_template: null + +instance: + _target_: align_system.algorithms.pipeline_adm.PipelineADM + + steps: + - ${ref:adm.step_definitions.format_choices} + - ${ref:adm.step_definitions.direct_step} + - ${ref:adm.step_definitions.ensure_chosen_action} + - ${ref:adm.step_definitions.populate_choice_info} diff --git a/align_system/configs/adm/tagging_sweep_no_icl_no_rag_with_prompt.yaml b/align_system/configs/adm/tagging_sweep_no_icl_no_rag_with_prompt.yaml new file mode 100644 index 00000000..76a2c9ab --- /dev/null +++ b/align_system/configs/adm/tagging_sweep_no_icl_no_rag_with_prompt.yaml @@ -0,0 +1,28 @@ +name: tagging_sweep_no_icl_no_rag_with_prompt + +defaults: + - /attribute@start: start + - /attribute@salt: salt + - /attribute@bcd: bcd + - /inference_engine@structured_inference_engine: outlines_structured_multinomial + - /template/scenario_description@scenario_description_template: tagging + - /template/prompt@prompt_template: tagging + - /adm_component/misc@step_definitions.format_choices: itm_format_choices + - /adm_component/direct@step_definitions.direct_step: tagging_aligned + - /adm_component/misc@step_definitions.ensure_chosen_action: ensure_chosen_action + - /adm_component/misc@step_definitions.populate_choice_info: populate_choice_info + - _self_ + +attribute_definitions: + START: ${adm.start} + SALT: ${adm.salt} + BCD_SIEVE: ${adm.bcd} + +instance: + _target_: align_system.algorithms.pipeline_adm.PipelineADM + + steps: + - ${ref:adm.step_definitions.format_choices} + - ${ref:adm.step_definitions.direct_step} + - ${ref:adm.step_definitions.ensure_chosen_action} + - ${ref:adm.step_definitions.populate_choice_info} diff --git a/align_system/configs/adm/tagging_sweep_no_icl_rag_no_prompt.yaml b/align_system/configs/adm/tagging_sweep_no_icl_rag_no_prompt.yaml new file mode 100644 index 00000000..48d4210d --- /dev/null +++ b/align_system/configs/adm/tagging_sweep_no_icl_rag_no_prompt.yaml @@ -0,0 +1,37 @@ +name: tagging_sweep_no_icl_rag_no_prompt + +defaults: + - /attribute@start: start + - /attribute@salt: salt + - /attribute@bcd: bcd + - /inference_engine@structured_inference_engine: outlines_structured_multinomial + - /template/scenario_description@scenario_description_template: tagging + - /template/prompt@prompt_template: tagging + - /adm_component/misc@step_definitions.rag_indexer: rag_indexer + - /adm_component/misc@step_definitions.format_choices: itm_format_choices + - /adm_component/direct@step_definitions.direct_step: tagging_rag_aligned + - /adm_component/misc@step_definitions.ensure_chosen_action: ensure_chosen_action + - /adm_component/misc@step_definitions.populate_choice_info: populate_choice_info + - _self_ + +attribute_definitions: + START: ${adm.start} + SALT: ${adm.salt} + BCD_SIEVE: ${adm.bcd} + +step_definitions: + rag_indexer: + docs_files: + - /data/users/yonatan.gefen/align-system/align_system/documents/start.md + - /data/users/yonatan.gefen/align-system/align_system/documents/start_triage_flowchart.md + - /data/users/yonatan.gefen/align-system/align_system/documents/Salt.md + +instance: + _target_: align_system.algorithms.pipeline_adm.PipelineADM + + steps: + - ${ref:adm.step_definitions.format_choices} + - ${ref:adm.step_definitions.rag_indexer} + - ${ref:adm.step_definitions.direct_step} + - ${ref:adm.step_definitions.ensure_chosen_action} + - ${ref:adm.step_definitions.populate_choice_info} diff --git a/align_system/configs/adm/tagging_sweep_no_icl_rag_with_prompt.yaml b/align_system/configs/adm/tagging_sweep_no_icl_rag_with_prompt.yaml new file mode 100644 index 00000000..62d587f1 --- /dev/null +++ b/align_system/configs/adm/tagging_sweep_no_icl_rag_with_prompt.yaml @@ -0,0 +1,40 @@ +name: tagging_sweep_no_icl_rag_with_prompt + +defaults: + - /attribute@start: start + - /attribute@salt: salt + - /attribute@bcd: bcd + - /inference_engine@structured_inference_engine: outlines_structured_multinomial + - /template/scenario_description@scenario_description_template: tagging + - /template/prompt@prompt_template: tagging + - /adm_component/misc@step_definitions.rag_indexer: rag_indexer + - /adm_component/misc@step_definitions.format_choices: itm_format_choices + - /adm_component/direct@step_definitions.direct_step: tagging_rag_aligned + - /adm_component/misc@step_definitions.ensure_chosen_action: ensure_chosen_action + - /adm_component/misc@step_definitions.populate_choice_info: populate_choice_info + - _self_ + +attribute_definitions: + START: ${adm.start} + SALT: ${adm.salt} + BCD_SIEVE: ${adm.bcd} + +step_definitions: + rag_indexer: + docs_files: + - /data/users/yonatan.gefen/align-system/align_system/documents/start.md + - /data/users/yonatan.gefen/align-system/align_system/documents/start_triage_flowchart.md + - /data/users/yonatan.gefen/align-system/align_system/documents/Salt.md + direct_step: + system_prompt_template: + _target_: align_system.prompt_engineering.tagging_prompts.TaggingSystemPrompt + +instance: + _target_: align_system.algorithms.pipeline_adm.PipelineADM + + steps: + - ${ref:adm.step_definitions.format_choices} + - ${ref:adm.step_definitions.rag_indexer} + - ${ref:adm.step_definitions.direct_step} + - ${ref:adm.step_definitions.ensure_chosen_action} + - ${ref:adm.step_definitions.populate_choice_info} diff --git a/align_system/configs/adm_component/direct/tagging_rag_aligned.yaml b/align_system/configs/adm_component/direct/tagging_rag_aligned.yaml new file mode 100644 index 00000000..9c71595b --- /dev/null +++ b/align_system/configs/adm_component/direct/tagging_rag_aligned.yaml @@ -0,0 +1,16 @@ +_target_: align_system.algorithms.prompt_based_aligned_adm_component.PromptBasedRAGAlignedADMComponent + +structured_inference_engine: ${ref:adm.structured_inference_engine} + +scenario_description_template: + _target_: align_system.prompt_engineering.tagging_prompts.TaggingScenarioDescription +prompt_template: + _target_: align_system.prompt_engineering.tagging_prompts.TaggingSelectionPrompt +output_schema_template: + _target_: align_system.prompt_engineering.outlines_prompts.DefaultChoiceSelectionSchema +system_prompt_template: + _target_: align_system.prompt_engineering.tagging_prompts.SimpleTaggingSystemPromptAligned + +num_positive_samples: 1 +num_negative_samples: 0 +shuffle_choices: false diff --git a/align_system/configs/adm_component/direct/tagging_rag_baseline.yaml b/align_system/configs/adm_component/direct/tagging_rag_baseline.yaml new file mode 100644 index 00000000..538588de --- /dev/null +++ b/align_system/configs/adm_component/direct/tagging_rag_baseline.yaml @@ -0,0 +1,14 @@ +_target_: align_system.algorithms.outlines_baseline_adm_component.OutlinesRAGBaselineADMComponent + +structured_inference_engine: ${ref:adm.structured_inference_engine} + +scenario_description_template: + _target_: align_system.prompt_engineering.tagging_prompts.TaggingScenarioDescription +prompt_template: + _target_: align_system.prompt_engineering.tagging_prompts.TaggingSelectionPrompt +output_schema_template: + _target_: align_system.prompt_engineering.outlines_prompts.DefaultChoiceSelectionSchema +system_prompt_template: + _target_: align_system.prompt_engineering.tagging_prompts.SimpleTaggingSystemPrompt + +num_samples: 1 diff --git a/align_system/configs/adm_component/misc/rag_indexer.yaml b/align_system/configs/adm_component/misc/rag_indexer.yaml new file mode 100644 index 00000000..14094495 --- /dev/null +++ b/align_system/configs/adm_component/misc/rag_indexer.yaml @@ -0,0 +1,9 @@ +_target_: align_system.algorithms.rag_retreiver_adm_components.LangChainRAGIndexerADMComponent + +docs_files: ??? # required — must be overridden in the adm pipeline config or experiment config + +embedding_model_name: "sentence-transformers/all-MiniLM-L6-v2" +chunk_size: 2000 +chunk_overlap: 400 +add_start_index: true +k: 5 diff --git a/align_system/configs/experiment/tagging/tagging_fewshot_aligned_rag.yaml b/align_system/configs/experiment/tagging/tagging_fewshot_aligned_rag.yaml new file mode 100644 index 00000000..deedf9b0 --- /dev/null +++ b/align_system/configs/experiment/tagging/tagging_fewshot_aligned_rag.yaml @@ -0,0 +1,18 @@ +# @package _global_ +defaults: + - override /adm: tagging_fewshot_aligned_rag + - override /inference_engine@adm.structured_inference_engine: outlines_structured_greedy + +interface: + input_output_filepath: '/data/shared/tagging/eval3.json' + state_hydration_domain: "minimal" + +driver: + apply_action_filtering: false + +torch_random_seed: 0 +torch_use_deterministic_algorithms: true +random_seed: 0 +numpy_random_seed: 0 + +align_to_target: true diff --git a/align_system/configs/experiment/tagging/tagging_fewshot_sweep_slurm.yaml b/align_system/configs/experiment/tagging/tagging_fewshot_sweep_slurm.yaml new file mode 100644 index 00000000..8261ebeb --- /dev/null +++ b/align_system/configs/experiment/tagging/tagging_fewshot_sweep_slurm.yaml @@ -0,0 +1,50 @@ +# @package _global_ +# +# Multirun sweep over 8 combinations of: +# ICL (yes / no) +# prompt (hardcoded START protocol in system prompt / none) +# RAG (yes / no) +# +# Each combination is submitted as a separate SLURM job via hydra-submitit-launcher. +# +# Required override at runtime: +# +alignment_target=tagging/start (or tagging/bcd / tagging/salt) +# +# Run with: +# uv run run_align_system --multirun \ +# +experiment=tagging/tagging_fewshot_sweep_slurm \ +# +alignment_target=tagging/start + +defaults: + - override /adm: tagging_sweep_no_icl_no_rag_no_prompt # overridden by sweeper below + - override /inference_engine@adm.structured_inference_engine: outlines_structured_greedy + - override /hydra/launcher: submitit_slurm + - _self_ + +hydra: + mode: MULTIRUN + sweeper: + params: + adm: >- + tagging_sweep_no_icl_no_rag_no_prompt, + tagging_sweep_no_icl_no_rag_with_prompt, + tagging_sweep_icl_no_rag_no_prompt, + tagging_sweep_icl_no_rag_with_prompt, + tagging_sweep_no_icl_rag_no_prompt, + tagging_sweep_no_icl_rag_with_prompt, + tagging_sweep_icl_rag_no_prompt, + tagging_sweep_icl_rag_with_prompt + +interface: + input_output_filepath: '/data/shared/tagging/eval3.json' + state_hydration_domain: "minimal" + +driver: + apply_action_filtering: false + +torch_random_seed: 0 +torch_use_deterministic_algorithms: true +random_seed: 0 +numpy_random_seed: 0 + +align_to_target: true diff --git a/align_system/configs/experiment/tagging/tagging_rag_baseline.yaml b/align_system/configs/experiment/tagging/tagging_rag_baseline.yaml new file mode 100644 index 00000000..78197fb7 --- /dev/null +++ b/align_system/configs/experiment/tagging/tagging_rag_baseline.yaml @@ -0,0 +1,18 @@ +# @package _global_ +defaults: + - override /adm: tagging_rag_baseline + - override /inference_engine@adm.structured_inference_engine: outlines_structured_greedy + +interface: + input_output_filepath: '/data/shared/tagging/eval3.json' + state_hydration_domain: "minimal" + +driver: + apply_action_filtering: false + +torch_random_seed: 0 +torch_use_deterministic_algorithms: true +random_seed: 0 +numpy_random_seed: 0 + +align_to_target: false diff --git a/align_system/configs/hydra/launcher/submitit_slurm.yaml b/align_system/configs/hydra/launcher/submitit_slurm.yaml new file mode 100644 index 00000000..a18ca70b --- /dev/null +++ b/align_system/configs/hydra/launcher/submitit_slurm.yaml @@ -0,0 +1,13 @@ +_target_: hydra_plugins.hydra_submitit_launcher.submitit_launcher.SlurmLauncher + +submitit_folder: ${hydra.sweep.dir}/.submitit/%j + +name: ${hydra.job.name} +timeout_min: 10080 # 7 days +nodes: 1 +tasks_per_node: 1 +cpus_per_task: 8 +gpus_per_node: 1 +mem_gb: 24 +account: itm +stderr_to_stdout: false diff --git a/align_system/documents/Salt.md b/align_system/documents/Salt.md new file mode 100644 index 00000000..3eb66606 --- /dev/null +++ b/align_system/documents/Salt.md @@ -0,0 +1,104 @@ +When needs outstrip resources, it is appropriate to call for help, but rather than to wait for the resources to come to you, EMS providers can use SALT to [triage](https://www.ems1.com/mass-casualty-incidents-mci/articles/quiz-how-accurately-can-you-triage-10-mci-patients-RjR0YqgEfP6uwakz/) and move MCI patients forward to resources. + +SALT is a four-step process for first responders to manage mass casualty incidents, and stands for: + +* Sort +* Assess +* Lifesaving interventions +* Treatment and/or transport + +SALT was proposed by the National Association of EMS Physicians as part of a CDC-sponsored project to use the best available science and expert opinion to develop a standard guideline for mass casualty management \[2\]. SALT, endorsed by more than a dozen national emergency medicine and EMS organizations, along with the Model Uniform Core Criteria for Triage, provides a framework of clear, simple steps that field providers can use to [bring order to chaos](https://www.ems1.com/ems-training/articles/tedx-creating-calm-within-chaos-aQFhDeYC45s0H7cx/) and help improve patient outcome \[2\]. + +### **Establish command and control** + +The heart of SALT is the idea that providers focus on the prioritized movement of patients away from the incident that is making more patients and towards safety and the resources to care for them. For SALT to work, a mass casualty incident must first be identified and declared. Because MCI identification differs for every EMS service, each provider must know the MCI threshold for their system. + +Regardless of whether they are an MCI officer or a caregiver, each responder must focus on their role in facilitating the movement of patients away from the patient generator, which is the thing that is making more patients or making them sicker. Move patients forward to a [casualty collection point](https://www.ems1.com/ems-products/training-tools/articles/training-hard-and-fighting-easy-BmITua2IGLVEgY2z/), which is a safer place where they can be sorted and prioritized for further forward movement to treatment areas and transport to receiving facilities. + +As command and control is established, EMS providers should grab their mass casualty triage tags and any MCI equipment that they intend to use to begin patient triage. Here is how to apply SALT. + +### **Sort the Walkers, the Wavers and the Still** + +Rapidly prioritize the patients using global sorting. This will help efficiently assess, administer lifesaving interventions and ultimately treat and transport the patients. + +Announce to all involved, using a public address system or loudspeaker, “Everyone who can hear me, move to \[the safe area you have designated\] and we will help you.” + +Give strong, loud and clear visual and verbal commands. Those who respond first will be the last patients to assess, but they may be able to assist with moving more critical patients forward. These patients are the Walkers. + +To the patients who remain in place say, “If you need help, wave your arm or move your leg and we will be there to help you as soon as we can.” + +These patients who can follow commands but cannot move themselves are the Wavers and the second priority for assessment. + +Remember that some patients may be ambulatory, yet opt to stay with another injured patient. Other patients may be able to move and decide to assist a non-ambulatory patient. This is okay. Sorting is simply an easy way for the first arriving EMS units to begin moving patients forward to care. + +Those who are Still and have not yet moved or responded to you are the first patients that you need to assess and possibly care for. + +### **Assessment/Lifesaving interventions** + +With the SALT system, assessment and lifesaving interventions go hand in hand. There is no timing [radial pulses](https://www.ems1.com/ems-products/education/articles/how-to-find-and-assess-a-radial-pulse-nRGuOSLr9Syb74Kg/) or counting respirations in a SALT assessment, only answering simple yes-and-no questions. + +When you assess and find a life threat you should provide a lifesaving intervention as long as it does not take longer than a minute and does not require you or another EMS provider to stay with the patient. For example, if you find that a patient has massive hemorrhage, provide rapid bleeding control with a [tourniquet](https://www.ems1.com/tourniquets/). + +If a patient’s airway is closed, open it. If that patient is a child or infant, consider giving them two breaths. + +If you are an ALS provider, it may be appropriate to provide [needle decompression](https://www.ems1.com/ems-products/Capnography/articles/90282048-Tension-pneumothorax-How-capnography-and-ultrasound-can-improve-care/), auto-injector chemical toxin antidotes or other lifesaving interventions that take less than a minute to administer and do not require you to stay with the patient. + +Remember, to maintain forward movement of patients, you must also maintain forward movement of EMS providers. That is, if a patient needs lifesaving interventions and you are immediately ready to give them, do so and move on to the next patient. Do not stop during assessment and lifesaving interventions to fetch a piece of equipment or restock supplies. Patients must continue to move forward to the casualty collection point next, then to the treatment area and eventually to transport to receiving facilities. + +As you assess and provide lifesaving interventions, categorize or tag patients by priority. SALT and MUCC triage works as follows. + +### **Dead (black triage tape or tag)** + +Patients with injuries incompatible with life or without spontaneous respirations are triaged as deceased. Assess the following: + +* Adult patient is not breathing after opening airway +* Child is not breathing after opening airway and giving 2 breaths + +Patients tagged Dead do not move forward from the point of injury to the casualty collection point. + +### **Immediate (red triage tape or tag)** + +Patients with severe injuries, but high potential for survival with treatment such as victims of [tension pneumothorax](https://www.ems1.com/trauma/articles/957467-Tension-Pneumothorax-Identification-and-treatment/), assess the following: + +* Does the patient have a peripheral pulse? +* Is the patient not in respiratory distress? +* Is hemorrhage controlled? +* Does the patient follow commands or make purposeful movements? + +A “no” answer to any of these questions and a field provider judgement that the patient is likely to survive given the available resources means the patient should be tagged Immediate. + +Immediate patients move forward to the casualty collection point first. + +### **Expectant (gray triage tape or tag)** + +A “no” response to any of the questions about pulse, breathing, hemorrhage and mental status, but the patient is unlikely to survive given the available resources means the patient should be tagged Expectant. These patients should receive treatment resources only after the Immediate patients have been moved forward. + +Examples of expectant patients include head injury with exposed brain matter, carotid artery hemorrhage or [burns to 90 percent of the total body surface area](https://www.ems1.com/burns/articles/how-burn-patient-treatment-and-destination-are-affected-by-severity-9EuR10Nod3cvD48J/). + +### **Delayed (yellow triage tape or tag)** + +Patients with serious injuries, such as a long bone fracture, that will require eventual forward movement to definitive treatment, but not immediate forward movement and care are tagged Delayed. To determine if a patient is Delayed assess the following: + +* Does the patient have a peripheral pulse? +* Is the patient not in respiratory distress? +* Is hemorrhage controlled? +* Does the patient follow commands or make purposeful movements? + +A “yes” response to all of these, but the injuries are still significant, such as a proximal long bone fracture, then the patient should be tagged Delayed. + +### **Minimal (green triage tap or tag)** + +“Yes” to all of the same questions about pulse, breathing, hemorrhage and mental status, but the patient’s injuries are minor, such as minor abrasions and lacerations and the patient should be tagged Minimal. + +Most Minimal patients should have moved forward during the sort of Walkers from the Wavers and the Still. Remaining Minimal patients are the last to move forward and they may help move other patients forward to treatment and transport. + +### **Treatment and Transport** + +As patients receive their tags from the SALT process, they should move forward to a casualty collection point. Patients continue to move forward from there to a treatment area and eventually to an ambulance for transport to a receiving facility. + +The treatment area is the destination for all incoming personnel and equipment from responding EMS agencies. It is also only to temporarily hold patients until they can be transported forward to receiving facilities. + +### **the future of MCI triage** + +Even though the SALT and MUCC MCI recommendations have been around for years, the change from older triage systems proceeds slowly. As SALT is adopted by additional agencies, more EMS providers will find this simple, straight-forward, easy to learn system helpful to manage mass casualty incidents of all sizes. + diff --git a/align_system/documents/__init__.py b/align_system/documents/__init__.py new file mode 100644 index 00000000..aa29d40a --- /dev/null +++ b/align_system/documents/__init__.py @@ -0,0 +1,3 @@ +''' +This directory contains documents for RAG. +''' \ No newline at end of file diff --git a/align_system/documents/start.md b/align_system/documents/start.md new file mode 100644 index 00000000..d1744639 --- /dev/null +++ b/align_system/documents/start.md @@ -0,0 +1,173 @@ +**START** + +By using a casualty sorting system, you are focusing your activities in the middle of a chaotic and confusing environment. You must identify and separate patients rapidly, according to the severity of their injuries and their need for treatment. + +**En route** + +Even while you are responding to the scene of an incident, you should be preparing yourself mentally for what you may find. Perhaps you've been to the same location before. Where will help come from? How long will it take to arrive? + +**Initial Assessment \- Stay Calm** + +The first thing you should do upon arriving at the scene of an incident is to try to stay calm, look around, and get an overview of the scene. These visual surveys give you an initial impression of the overall situation, including the potential number of patients involved, and possibly, even the severity of their injuries. The visual survey should enable you to estimate initially the amount and type of help needed to handle the situation. + +**Your Initial Report \- Creating a Verbal Image** + +The initial report is often the most important message of a disaster because it sets the emotional and operational stage for everything that follows. As you prepare to give the first vital report, use clear language (no signals or radio jargon), be concise, be calm, and do not shout. You are trying to give the communications center a concise verbal picture of the scene. + +The key points to communicate are: + +• Location of the incident + +• Type of incident + +• Any hazards +• Approximate number of victims + +• Type of assistance required + +**Note:** Be as specific with your requests as possible. Field experience has shown that a good rule of thumb initially, in multiple- or mass-casualty situations, is to request one ambulance for every five patients. For example, for 35 patients, request seven ambulances; for 23 patients request five ambulances, and so forth. + +Before starting, take several deep breaths to give your mind time to catch up with your eyes and to try to calm your voice. You might give the following report: "This is a major accident involving a truck and a commercial bus on Highway 305, about 2 miles east of Route 610\. There are approximately 35 victims. There are people trapped. Repeat: This is a major accident. I am requesting the fire department, rescue squad, and seven ambulances at this time. Dispatch + +additional police units to assist." + + Los Angeles Fire Department – Disaster Preparedness Section – 08/12/05 – Page 2 of 7 +**Sorting the Patients** + +It is important not to become involved with the treatment of the first or second patient with whom you come in contact. Remember that your job is to get to each patient as quickly as possible, conduct a rapid assessment, and assign patients to broad categories based on their need for treatment. + +The patients who are left in place are the ones on whom you must now concentrate. **The START System: It really works\!** + +The Simple Triage And Rapid Treatment **(START**) system was developed to allow first responders to triage multiple victims in 30 seconds or less, based on three primary observations: **R**espiration, **P**erfusion, and **M**ental Status (**RPM**). The **START** system is designed to assist rescuers to find the most seriously injured patients. As more rescue personnel arrive on the scene, the patients will be re-triaged for further evaluation, treatment, stabilization, and transportation. This system allows first responders to open blocked airways and stop severe bleeding quickly. + +**Triage Tagging: To Tell Others What You've Found** + +Patients are tagged for easy recognition by other rescuers arriving on the scene. Tagging is done using a variety of methods determined by your local Emergency Services System. Colored surveyors' tape or colored paper tags may be used. + +**The Four Colors of Triage** + +**MINOR** delayed care / can delay up to three hours + +**DELAYED** urgent care / can delay up to one hour + +**IMMEDIATE** immediate care / life-threatening + +**DEAD** victim is dead / no care required + +**The First Step in START: Get up and Walk\!** + +The first step in **START** is to tell all the people who can get up and walk to move to a specific area. If patients can get up and walk, they are probably not at risk of immediate death. + +In order to make the situation more manageable, those victims who can walk are asked to move away from the immediate rescue scene to a specific designated safe area. These patients are now designated as **MINOR**. If a patient complains of pain on attempting to walk or move, do not force him or her to move. + +The patients who are left in place are the ones on whom you must now concentrate. + + Los Angeles Fire Department – Disaster Preparedness Section – 08/12/05 – Page 3 of 7 +**The Second Step in START: Begin Where You Stand** + +Begin the second step of **START** by moving from where you stand. Move in an orderly and systematic manner through the remaining victims, stopping at each person for a quick assessment and tagging. The stop at each patient should never take more than one minute. + +**REMEMBER:** Your job is to find and tag the patients \--those who require immediate attention. Examine each patient, correct life-threatening airways and breathing problems, tag the patient with an **IMMEDIATE** tag and MOVE ON\! + +**How To Evaluate Patients Using RPM** + +The **START** system is based on three observations: **RPM**\--**R**espiration, **P**erfusion and **M**ental Status. Each patient must be evaluated quickly, in a systematic manner, starting with **R**espiration (breathing). + +**Breathing: It all STARTS Here**. + +If the patient is breathing, you then need to determine the breathing rate. Patients with breathing rates **greater than 30 per minute** are tagged **IMMEDIATE**. These patients are showing one of the primary signs of shock and need immediate care. + +If the patient is breathing and the breathing rate is **less than 30 per minute**, move on to the circulation and mental status observations in order to complete your 30- second survey. + +If the patient is not breathing, quickly clear the mouth of foreign matter. Use a head tilt maneuver to open the airway. In this type of multiple- or mass-casualty situation, you may have to ignore the usual cervical spine guidelines when you are opening airways during the triage process. + +**SPECIAL NOTE:** The treatment of cervical spine injuries in multiple or mass casualty situations is different from anything that you've been taught before. This is the only time in emergency care when there may not be time to properly stabilize every injured patient's spine. + +Open the airway, position the patient to maintain the airway and — if the patient breathes —tag the patient **IMMEDIATE**. Patients who need help maintaining an open airway are **IMMEDIATE**. + +If you are in doubt as to the patient's ability to breathe, tag the patient as **IMMEDIATE**. If the patient is not breathing and does not start to breathe with simple airway maneuvers, the patient should be tagged **DEAD**. + +**Circulation: Is Oxygen Getting Around?** + +The second step of the R**P**M series of triage tests is the patient’s circulation. The best field method for checking circulation (to see if the heart is able to circulate blood adequately) is to check the radial pulse. + + Los Angeles Fire Department – Disaster Preparedness Section – 08/12/05 – Page 4 of 7 +It is not large and may not be easily felt in the wrist. The radial pulse is located on the palm side of the wrist, between the midline and the radius bone (forearm bone on the thumb side). To check the radial pulse, place your index and middle fingers on the bump in the wrist at the base of the thumb. Then slide it into the notch on the palm side of the wrist. You must keep your fingers there for five to ten seconds, to check for a pulse. + +If the radial pulse is absent or irregular the patient is tagged **IMMEDIATE**. If the radial pulse is present, move to the final observation of the RP**M** series: **M**ental Status. + +**Mental Status: Open Your Eyes:** + +The last part of the RP**M** series of triage tests is the mental status of the patient. This observation is done on patients who have adequate breathing and adequate circulation. + +"Open your eyes." "Close your eyes," "Squeeze my hand." Patients who can follow these simple commands and have adequate breathing and adequate circulation are tagged **DELAYED**. A patient who is unresponsive or cannot follow this type of simple command is tagged **IMMEDIATE**. (These patients are "unresponsive" to verbal stimuli.) + +**START is Used to Find IMMEDIATE Patients** + +This system is designed to assist rescuers to find the most seriously injured patients. As more rescue personnel arrive on the scene, the patients will be re-triaged for further evaluation, treatment, stabilization, and transportation. A patient may be re triaged as many times and as often as time allows. + +Remember that injured patients do not stay in the same condition. The process of shock may continue and some conditions will become more serious as time goes by. As time and resources permit, go back and recheck the condition of all patients to catch changes in condition that may require upgrading to attention. + +**Working at a Multiple- or Mass-Casualty Incident** + +You may or may not be the first person to arrive on the scene of a multiple- or mass casualty incident. If other rescuers are already at the scene when you arrive, be sure to report to the incident commander before going to work. Many events are happening at the same time and the incident commander will know where your help and skills can best be used. By virtue of training and local protocols, the incident commander is that person who is in charge of the rescue operation. + +In addition to initially sizing up an incident, clearly and accurately reporting the situation, and conducting the initial **START** triage, the first responder will probably also be called on to participate in many other ways during multiple- and mass casualty incidents. + + Los Angeles Fire Department – Disaster Preparedness Section – 08/12/05 –Page 5 of 7 +As more highly trained rescue and emergency personnel arrive on the scene, accurately report your findings to the person in charge by using a format similar to that used in the initial arrival report. Note the following: + + \* Approximate number of patients. + + \* Numbers that you've triaged into the four levels. + + \* Additional assistance required. + + \* Other important information. + +After you have reported this information, you may be assigned to use your skills and knowledge to provide patient care, traffic control, fire protection, or patient movement. You may also be assigned to provide emergency care to patients, to help move patients, or to assist with ambulance or helicopter transportation. + +In every situation involving casualty sorting, the goal is to find, stabilize and move Priority One patients first. + + +**Triage in Hazardous Materials Incidents** + +Hazardous materials (Hazmat) incidents involving chemicals occur every day, exposing many people to injury or contamination. During a hazardous materials incident, responders must protect themselves from injury and contamination. + +**REMEMBER:** A hazardous materials placard indicates a potential problem. But not all hazardous materials problems will be placarded. Be sure to find the proper response to the problem before beginning patient treatment. + +The single most important step when handling any hazardous materials incident is to identify the substance(s) involved. Federal law requires that hazardous materials placards be displayed on all vehicles that contain large quantities of hazardous materials. Manufacturers and transporters should display the appropriate placard, along with a four-digit identification number, for better identification of the hazardous substance. These numbers are used by professional agencies to identify the substance and to obtain emergency information. + +**IF THERE IS ANY SUSPICION OF A HAZARDOUS MATERIALS SPILL \- STAY AWAY\!** + +The U.S. Department of Transportation published the Emergency Response Guidebook, which lists the most common hazardous materials, their four-digit identification numbers, and proper emergency actions to control the scene. It also describes the emergency care of ill or injured patients. + +Unless you have received training in handling hazardous materials and can take the necessary precautions to protect yourself, you should keep far away from the contaminated area or "hot zone." + +Once the appropriate protection of the rescuers has been accomplished, triage in hazardous materials incidents has one major function--to identify victims who have sustained an acute injury as a result of exposure to hazardous materials. These patients should be removed from the contaminated area, decontaminated by trained personnel, given any necessary emergency care, and transported to a hospital. + + Los Angeles Fire Department – Disaster Preparedness Section – 08/12/05 – Page 6 of 7 +**REMEMBER: Contaminated patients will contaminate unprotected rescuers\!** + +Emergency treatment of patients who have been exposed to hazardous materials is usually aimed at supportive care, since there are very few specific antidotes or treatments for most hazardous materials injuries. Because most fatalities and serious injuries sustained in hazardous materials incidents result from breathing problems, constant reevaluation of the patients in Priorities Two and Three is necessary so that a patient whose condition worsens can be moved to a higher triage level. + +**Summary** + +Every responder must understand the principles and operations behind your casualty sorting system. The **START** system is an excellent and easily understood triage or casualty sorting method. + +Responders should be involved in periodic community disaster drills so that their skills and capabilities can be tested and improved. + + +**You Should Know:** + +\* The responder's role at multiple- or mass-casualty incidents. + +\* How to use the **START** system. + +\* How to recognize a hazardous materials placard. + +**You Should Practice:** + +\* Using the **START** system during a simulated multiple- or mass-casualty incident. + + Los Angeles Fire Department – Disaster Preparedness Section – 08/12/05 – Page 7 of 7 diff --git a/align_system/documents/start_triage_flowchart.md b/align_system/documents/start_triage_flowchart.md new file mode 100644 index 00000000..241b8859 --- /dev/null +++ b/align_system/documents/start_triage_flowchart.md @@ -0,0 +1,117 @@ +# START Triage Protocol — Decision Flowchart + +## Overview + +START (Simple Triage And Rapid Treatment) is a triage system used by first responders at multiple-casualty or mass-casualty incidents. It sorts patients into four categories based on three rapid observations: **Respiration, Perfusion, and Mental Status (RPM)**. Each per-patient assessment should take 30 seconds or less and must never exceed one minute. + +## Triage Categories (Tag Colors) + +| Tag | Color | Meaning | +|---|---|---| +| MINOR | Green | Delayed care; can delay up to three hours | +| DELAYED | Yellow | Urgent care; can delay up to one hour | +| IMMEDIATE | Red | Immediate care required; life-threatening | +| DEAD | Black | Victim is deceased; no care required | + +## Entry Point + +Begin by starting where you stand. Assess the scene, call for assistance, and determine safety. Then call out to all victims. + +## Step 1 — Ambulation Assessment (Call Out) + +Call out instructions asking all victims who can walk to move to a specific designated safe area. + +- **IF** the victim can walk (Walking Wounded & Uninjured) → Tag as **MINOR**. Hold them in a specific location and remember to fully triage them ASAP. +- **IF** the victim cannot walk (Non-Walking) → Proceed to Step 2 (Respiration Assessment). + +Note: If a patient complains of pain on attempting to walk or move, do not force them to move. + +## Step 2 — Respiration Assessment + +Evaluate whether the patient is breathing. + +### Branch A — Patient IS breathing + +Determine the respiratory rate. + +- **IF** breathing rate is **OVER 30 per minute** → Tag as **IMMEDIATE**. +- **IF** breathing rate is **UNDER 30 per minute** → Proceed to Step 3 (Perfusion Assessment). + +### Branch B — Patient is NOT breathing + +Position the airway using a head-tilt maneuver. Clear the mouth of foreign matter. Look, listen, and feel for breathing. + +- **IF** the patient begins breathing after airway positioning → Tag as **IMMEDIATE**. +- **IF** the patient does NOT begin breathing → Reposition the airway and reassess (Look, Listen & Feel again). + - **IF** the patient now breathes → Tag as **IMMEDIATE**. + - **IF** the patient still does not breathe → Tag as **DEAD**. + +Patients who need help maintaining an open airway are tagged IMMEDIATE. When in doubt about the patient's ability to breathe, tag as IMMEDIATE. + +## Step 3 — Perfusion Assessment + +Evaluate circulation using the radial pulse check and/or the blanch test (capillary refill). Either test alone is sufficient to make the triage decision at this step. + +### Radial Pulse Test + +Place index and middle fingers on the palm side of the wrist at the base of the thumb. Hold for 5–10 seconds. + +- **IF** radial pulse is **ABSENT** (or irregular) → Tag as **IMMEDIATE**. +- **IF** radial pulse is **PRESENT** → Proceed to Step 4 (Mental Status). + +### Blanch Test (Capillary Refill) + +- **IF** capillary refill is **OVER 2 seconds** → Tag as **IMMEDIATE**. +- **IF** capillary refill is **UNDER 2 seconds** → Proceed to Step 4 (Mental Status). + +## Step 4 — Mental Status Assessment + +Reached only by patients with adequate breathing and adequate circulation. Give simple commands such as "Open your eyes," "Close your eyes," or "Squeeze my hand." + +- **IF** the patient can **follow** simple commands → Tag as **DELAYED**. +- **IF** the patient **cannot follow** simple commands (unresponsive to verbal stimuli) → Tag as **IMMEDIATE**. + +## Complete Decision Paths (All Possible Outcomes) + +### Path to MINOR (Green) +1. Victim can walk on their own → **MINOR** + +### Paths to IMMEDIATE (Red) +1. Non-walking → breathing rate over 30/min → **IMMEDIATE** +2. Non-walking → not breathing → airway positioned → begins breathing → **IMMEDIATE** +3. Non-walking → not breathing → airway repositioned → begins breathing → **IMMEDIATE** +4. Non-walking → breathing under 30/min → radial pulse absent → **IMMEDIATE** +5. Non-walking → breathing under 30/min → blanch test over 2 seconds → **IMMEDIATE** +6. Non-walking → breathing under 30/min → adequate perfusion → cannot follow simple commands → **IMMEDIATE** + +### Path to DELAYED (Yellow) +1. Non-walking → breathing under 30/min → radial pulse present AND/OR blanch under 2 seconds → follows simple commands → **DELAYED** + +### Path to DEAD (Black) +1. Non-walking → not breathing → airway positioned (no breath) → airway repositioned (still no breath) → **DEAD** + +## Key Decision Thresholds + +- **Respiratory rate threshold:** 30 breaths per minute (over → IMMEDIATE; under → continue assessment) +- **Capillary refill (blanch test) threshold:** 2 seconds (over → IMMEDIATE; under → continue assessment) +- **Radial pulse:** present (continue) or absent/irregular (IMMEDIATE) +- **Mental status:** follows commands (DELAYED) or cannot follow commands (IMMEDIATE) + +## Key Principles + +- Target assessment time per patient: 30 seconds or less; never exceed one minute per patient. +- When in doubt about a patient's ability to breathe, tag as IMMEDIATE. +- In mass-casualty situations, standard cervical spine stabilization protocols may be bypassed in order to open airways during triage. +- The goal of START is to rapidly identify IMMEDIATE patients for priority care. +- Re-triage patients as time and resources permit; patient conditions change, especially as shock progresses. +- Do not become involved in treating the first or second patient you encounter — move through all victims and tag them first. + +## Node Reference (Flowchart Vocabulary) + +- **START**: Entry node — Assess scene, call for assistance, determine safety. +- **Call Out**: Verbal instruction separating walking from non-walking victims. +- **RESPIRATION**: Breathing assessment node for non-walking victims. +- **PERFUSION**: Circulation assessment node (radial pulse + blanch test). +- **MENTAL STATUS**: Responsiveness / command-following assessment node. +- **Position Airway, Look, Listen & Feel**: First airway intervention for non-breathing patients. +- **Reposition Airway, Look, Listen & Feel**: Second airway attempt if the first fails. diff --git a/align_system/prompt_engineering/tagging_prompts.py b/align_system/prompt_engineering/tagging_prompts.py index 2efc7940..e90721d3 100644 --- a/align_system/prompt_engineering/tagging_prompts.py +++ b/align_system/prompt_engineering/tagging_prompts.py @@ -1,6 +1,6 @@ -import outlines +from align_system.prompt_engineering.compat.outlines.templates import prompt as outlines_prompt -@outlines.prompt +@outlines_prompt def tagging_scenario_state_description(scenario_state): """ PATIENT DESCRIPTION: @@ -13,7 +13,7 @@ def __call__(self, scenario_state): return tagging_scenario_state_description(scenario_state) -@outlines.prompt +@outlines_prompt def tagging_selection_prompt(situation, choices): """ {{ situation }} @@ -30,7 +30,7 @@ def __call__(self, scenario_description, choices): return tagging_selection_prompt(scenario_description, choices) -@outlines.prompt +@outlines_prompt def baseline_tagging_system_prompt(): """ You are an army combat medic trained to classify patient conditions into four triage categories. Your task is to read \ @@ -56,7 +56,7 @@ def __call__(self): return baseline_tagging_system_prompt() -@outlines.prompt +@outlines_prompt def start_tagging_system_prompt(): """ You are an Army combat medic trained to triage patients using the START (Simple Triage and Rapid Treatment) protocol. \ @@ -95,7 +95,7 @@ def start_tagging_system_prompt(): """ -@outlines.prompt +@outlines_prompt def salt_tagging_system_prompt(): """ You are an Army combat medic trained to perform triage using the SALT protocol (Sort, Assess, Lifesaving Interventions, \ @@ -135,7 +135,7 @@ def salt_tagging_system_prompt(): """ -@outlines.prompt +@outlines_prompt def bcd_tagging_system_prompt(): """ You are an Army combat medic trained to perform primary triage using the BCD Sieve protocol—a rapid, systematic method \ @@ -190,3 +190,82 @@ def __call__(self, target_kdma, target_value): return bcd_tagging_system_prompt() else: raise ValueError(f"Unknown target tagging protocol: {target_kdma}") + + +class SimpleTaggingSystemPrompt: + """ + Minimal system prompt for RAG-augmented tagging. Instead of embedding a full + protocol decision tree, this provides only category definitions and relies on + rag_context (retrieved passages) to supply protocol-specific guidance. + """ + def __call__(self, rag_context=None): + prompt = ( + "You are an army combat medic trained to classify patient conditions " + "into four triage categories:\n" + " * Red (Immediate) - Life-threatening injuries requiring immediate " + "intervention. Patient is likely to survive if treated quickly.\n" + " * Yellow (Delayed) - Serious but not life-threatening injuries. " + "Treatment can be delayed without immediate risk.\n" + " * Green (Minor) - Minor injuries.\n" + " * Black (Expectant) - Chances of survival are low, even with treatment.\n\n" + "Instructions:\n" + "1. Evaluate the patient description.\n" + "2. Provide brief reasoning.\n" + "3. Assign only one triage category: Green, Yellow, Red, or Black.\n\n" + "If critical information is missing, select the most appropriate category " + "based on available details and clearly state the uncertainty." + ) + if rag_context: + prompt += ( + "\n\nRelevant protocol reference material:\n" + "---\n" + f"{rag_context}\n" + "---" + ) + return prompt + + +class SimpleTaggingSystemPromptAligned: + """ + RAG-aware system prompt for the aligned (PromptBasedRAGAlignedADMComponent) pipeline. + Accepts target_kdma and target_value (required by the aligned component) but ignores + them — protocol guidance comes from rag_context instead of hard-coded decision trees. + """ + def __call__(self, target_kdma, target_value, rag_context=None): + prompt = ( + "You are an army combat medic trained to classify patient conditions " + "into four triage categories:\n" + " * Red (Immediate) - Life-threatening injuries requiring immediate " + "intervention. Patient is likely to survive if treated quickly.\n" + " * Yellow (Delayed) - Serious but not life-threatening injuries. " + "Treatment can be delayed without immediate risk.\n" + " * Green (Minor) - Minor injuries.\n" + " * Black (Expectant) - Chances of survival are low, even with treatment.\n\n" + "Instructions:\n" + "1. Evaluate the patient description.\n" + "2. Provide brief reasoning.\n" + "3. Assign only one triage category: Green, Yellow, Red, or Black.\n\n" + "If critical information is missing, select the most appropriate category " + "based on available details and clearly state the uncertainty." + ) + if rag_context: + prompt += ( + "\n\nRelevant protocol reference material:\n" + "---\n" + f"{rag_context}\n" + "---" + ) + return prompt + + +class RAGTaggingScenarioDescription: + """ + Variant of TaggingScenarioDescription that appends retrieved passages to the + patient description. Use this to inject RAG context at the user-turn level + instead of the system-turn level (for ablation experiments). + """ + def __call__(self, scenario_state, rag_context=None): + description = f"PATIENT DESCRIPTION:\n{scenario_state.unstructured.rstrip()}" + if rag_context: + description += f"\n\nRELEVANT PROTOCOL PASSAGES:\n{rag_context}" + return description diff --git a/pyproject.toml b/pyproject.toml index 6a742be7..e24b9d8c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,7 @@ dependencies = [ "vllm>=0.19.0", "urllib3>=2.6.3", # Transitive dep pinned for Security "xgrammar>=0.1.32", # Transitive dep pinned for Security + "hydra-submitit-launcher>=1.2.0", ] [tool.uv.sources] @@ -41,6 +42,12 @@ llama-index-retriever = [ "langchain>=0.2.5", "llama-index>=0.13.0", ] +rag = [ + "faiss-cpu>=1.13.2", + "langchain-community>=0.4.1", + "langchain[huggingface]>=1.2.15", + "sentence-transformers>=5.4.1", +] [project.scripts] run_align_system = "align_system.cli.run_align_system:main" diff --git a/scripts/run-tagging-comparison.sh b/scripts/run-tagging-comparison.sh new file mode 100755 index 00000000..7937a9b3 --- /dev/null +++ b/scripts/run-tagging-comparison.sh @@ -0,0 +1,75 @@ +#!/bin/bash +# Runs the tagging baseline and RAG baseline experiments back-to-back for comparison. +# +# Usage: +# bash scripts/run-tagging-comparison.sh [output_dir] +# +# Output dir defaults to: tagging_comparison_results +# Each run gets its own subdirectory: /// + +set -e + +OUTPUT_BASEDIR=${1:-tagging_comparison_results} +DATE_NOW=$(date +"%Y-%m-%d__%H-%M-%S") + +# BASELINE Experiments + +# EXPERIMENTS=( +# "tagging/tagging_baseline" +# "tagging/tagging_rag_baseline" +# ) + +# for exp in "${EXPERIMENTS[@]}"; do +# exp_name=$(basename "$exp") +# out_dir="${OUTPUT_BASEDIR}/${exp_name}/${DATE_NOW}" +# echo "========================================" +# echo "Running: ${exp}" +# echo "Output: ${out_dir}" +# echo "========================================" + +# uv run run_align_system \ +# +experiment="${exp}" \ +# hydra.run.dir="${out_dir}" + +# echo "Done: ${exp_name}" +# echo +# done + +# ALIGNEMENT Experiments +EXPERIMENTS=( + # "tagging/tagging_fewshot_aligned" + "tagging/tagging_fewshot_aligned_rag" +) + +ALIGNMENT_TARGET=( + # "tagging/bcd" + "tagging/start" + # "tagging/salt" +) + +INTERFACE=( + "/data/users/yonatan.gefen/align-system/start-protocol-tagging-exp/itm_eval_align_tag_example_start_color_only_treated_interventions.json" +) + +for exp in "${EXPERIMENTS[@]}"; do + for at in "${ALIGNMENT_TARGET[@]}"; do + exp_name=$(basename "$exp") + at_name=$(basename "$at") + out_dir="${OUTPUT_BASEDIR}/${exp_name}/${at_name}/${DATE_NOW}" + echo "========================================" + echo "Running: ${exp} with alignment target ${at}" + echo "Output: ${out_dir}" + echo "========================================" + + uv run run_align_system \ + +experiment="${exp}" \ + +alignment_target="${at}" \ + interface.input_output_filepath="${INTERFACE}" \ + hydra.run.dir="${out_dir}" + + echo "Done: ${exp_name} with alignment taregt: ${at_name}" + echo + done +done + +echo "All runs complete. Results in: ${OUTPUT_BASEDIR}/" diff --git a/scripts/run-tagging-comparison.slurm b/scripts/run-tagging-comparison.slurm new file mode 100644 index 00000000..c2cc54dd --- /dev/null +++ b/scripts/run-tagging-comparison.slurm @@ -0,0 +1,14 @@ +#!/bin/bash +#SBATCH --job-name=align-integ-tests +#SBATCH --output=output/slurm-%j.out +#SBATCH --error=output/slurm-%j.err +#SBATCH --nodes=1 +#SBATCH --gpus-per-node=1 +#SBATCH --cpus-per-task=8 +#SBATCH --time=7-00:00:00 +#SBATCH --mem=24G +#SBATCH --account=itm + +cd /data/users/yonatan.gefen/align-system +mkdir -p tagging-output +bash ./scripts/run-tagging-comparison.sh "$@" diff --git a/uv.lock b/uv.lock index 46a3fef3..069e9463 100644 --- a/uv.lock +++ b/uv.lock @@ -146,6 +146,7 @@ dependencies = [ { name = "accelerate" }, { name = "bert-score" }, { name = "hydra-core" }, + { name = "hydra-submitit-launcher" }, { name = "ipdb" }, { name = "mistral-common" }, { name = "numpy" }, @@ -176,12 +177,19 @@ openai = [ { name = "httpx" }, { name = "openai" }, ] +rag = [ + { name = "faiss-cpu" }, + { name = "langchain", extra = ["huggingface"] }, + { name = "langchain-community" }, + { name = "sentence-transformers" }, +] [package.metadata] requires-dist = [ { name = "accelerate", specifier = ">=1.12.0" }, { name = "bert-score", specifier = ">=0.3.13" }, { name = "hydra-core", specifier = ">=1.3.2" }, + { name = "hydra-submitit-launcher", specifier = ">=1.2.0" }, { name = "ipdb", specifier = ">=0.13.13" }, { name = "mistral-common", specifier = ">=1.9.1" }, { name = "numpy", specifier = ">=1.26.4" }, @@ -209,6 +217,12 @@ openai = [ { name = "httpx", specifier = ">=0.28.1,<0.29.0" }, { name = "openai", specifier = ">=2.15.0,<3.0.0" }, ] +rag = [ + { name = "faiss-cpu", specifier = ">=1.13.2" }, + { name = "langchain", extras = ["huggingface"], specifier = ">=1.2.15" }, + { name = "langchain-community", specifier = ">=0.4.1" }, + { name = "sentence-transformers", specifier = ">=5.4.1" }, +] [[package]] name = "annotated-doc" @@ -316,11 +330,11 @@ wheels = [ [[package]] name = "async-timeout" -version = "5.0.1" +version = "4.0.3" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a5/ae/136395dfbfe00dfc94da3f3e136d0b13f394cba8f4841120e34226265780/async_timeout-5.0.1.tar.gz", hash = "sha256:d9321a7a3d5a6a5e187e824d2fa0793ce379a202935782d555d6e9d2735677d3", size = 9274, upload-time = "2024-11-06T16:41:39.6Z" } +sdist = { url = "https://files.pythonhosted.org/packages/87/d6/21b30a550dafea84b1b8eee21b5e23fa16d010ae006011221f33dcd8d7f8/async-timeout-4.0.3.tar.gz", hash = "sha256:4640d96be84d82d02ed59ea2b7105a0f7b33abe8703703cd0ab0bf87c427522f", size = 8345, upload-time = "2023-08-10T16:35:56.907Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/fe/ba/e2081de779ca30d473f21f5b30e0e737c438205440784c7dfc81efc2b029/async_timeout-5.0.1-py3-none-any.whl", hash = "sha256:39e3809566ff85354557ec2398b55e096c8364bacac9405a7a1fa429e77fe76c", size = 6233, upload-time = "2024-11-06T16:41:37.9Z" }, + { url = "https://files.pythonhosted.org/packages/a7/fa/e01228c2938de91d47b307831c62ab9e4001e747789d0b05baf779a6488c/async_timeout-4.0.3-py3-none-any.whl", hash = "sha256:7405140ff1230c310e51dc27b3145b9092d659ce68ff733fb0cefe3ee42be028", size = 5721, upload-time = "2023-08-10T16:35:55.203Z" }, ] [[package]] @@ -947,6 +961,28 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c1/ea/53f2148663b321f21b5a606bd5f191517cf40b7072c0497d3c92c4a13b1e/executing-2.2.1-py2.py3-none-any.whl", hash = "sha256:760643d3452b4d777d295bb167ccc74c64a81df23fb5e08eff250c425a4b2017", size = 28317, upload-time = "2025-09-01T09:48:08.5Z" }, ] +[[package]] +name = "faiss-cpu" +version = "1.13.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, + { name = "packaging" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/07/c9/671f66f6b31ec48e5825d36435f0cb91189fa8bb6b50724029dbff4ca83c/faiss_cpu-1.13.2-cp310-abi3-macosx_14_0_arm64.whl", hash = "sha256:a9064eb34f8f64438dd5b95c8f03a780b1a3f0b99c46eeacb1f0b5d15fc02dc1", size = 3452776, upload-time = "2025-12-24T10:27:01.419Z" }, + { url = "https://files.pythonhosted.org/packages/5a/4a/97150aa1582fb9c2bca95bd8fc37f27d3b470acec6f0a6833844b21e4b40/faiss_cpu-1.13.2-cp310-abi3-macosx_14_0_x86_64.whl", hash = "sha256:c8d097884521e1ecaea6467aeebbf1aa56ee4a36350b48b2ca6b39366565c317", size = 7896434, upload-time = "2025-12-24T10:27:03.592Z" }, + { url = "https://files.pythonhosted.org/packages/0b/d0/0940575f059591ca31b63a881058adb16a387020af1709dcb7669460115c/faiss_cpu-1.13.2-cp310-abi3-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0ee330a284042c2480f2e90450a10378fd95655d62220159b1408f59ee83ebf1", size = 11485825, upload-time = "2025-12-24T10:27:05.681Z" }, + { url = "https://files.pythonhosted.org/packages/e7/e1/a5acac02aa593809f0123539afe7b4aff61d1db149e7093239888c9053e1/faiss_cpu-1.13.2-cp310-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ab88ee287c25a119213153d033f7dd64c3ccec466ace267395872f554b648cd7", size = 23845772, upload-time = "2025-12-24T10:27:08.194Z" }, + { url = "https://files.pythonhosted.org/packages/9c/7b/49dcaf354834ec457e85ca769d50bc9b5f3003fab7c94a9dcf08cf742793/faiss_cpu-1.13.2-cp310-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:85511129b34f890d19c98b82a0cd5ffb27d89d1cec2ee41d2621ee9f9ef8cf3f", size = 13477567, upload-time = "2025-12-24T10:27:10.822Z" }, + { url = "https://files.pythonhosted.org/packages/f7/6b/12bb4037921c38bb2c0b4cfc213ca7e04bbbebbfea89b0b5746248ce446e/faiss_cpu-1.13.2-cp310-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:8b32eb4065bac352b52a9f5ae07223567fab0a976c7d05017c01c45a1c24264f", size = 25102239, upload-time = "2025-12-24T10:27:13.476Z" }, + { url = "https://files.pythonhosted.org/packages/be/3a/c215083d883173871f9b76719ca7696d832fc5255fb82358b0b25dd1d1af/faiss_cpu-1.13.2-cp310-cp310-win_amd64.whl", hash = "sha256:eb8bf5dd96465d043c22195afbe8276d5197b710704290d9b454144a0ad892ed", size = 18879081, upload-time = "2025-12-24T10:27:15.859Z" }, + { url = "https://files.pythonhosted.org/packages/14/6d/40439a05e4e60a0e889aa68b08ec70f5c8e32901f75f2be25c593a2e050e/faiss_cpu-1.13.2-cp311-cp311-win_amd64.whl", hash = "sha256:7c5944d7807d58fe7244b6aba06be710ee7ed99343365ed92699349efe979f51", size = 18879906, upload-time = "2025-12-24T10:27:19.041Z" }, + { url = "https://files.pythonhosted.org/packages/9f/f9/b97eadbdd9e00f945d1566c7101382344f504596bfb19219465b0fc61e6e/faiss_cpu-1.13.2-cp311-cp311-win_arm64.whl", hash = "sha256:19508a1badfb36e456c1c8664eeb948349f604db5c7545f277a0126b4a84b080", size = 8548280, upload-time = "2025-12-24T10:27:22.114Z" }, + { url = "https://files.pythonhosted.org/packages/87/ff/35ed875423200c17bdd594ce921abfc1812ddd21e09355290b9a94e170ab/faiss_cpu-1.13.2-cp312-cp312-win_amd64.whl", hash = "sha256:b82c01d30430dd7b1fa442001b9099735d1a82f6bb72033acdc9206d5ac66a64", size = 18890300, upload-time = "2025-12-24T10:27:24.194Z" }, + { url = "https://files.pythonhosted.org/packages/c5/3a/bbdf5deaf6feb34b46b469c0a0acd40216c3d3c6ecf5aeb71d56b8a650e3/faiss_cpu-1.13.2-cp312-cp312-win_arm64.whl", hash = "sha256:2c4f696ae76e7c97cbc12311db83aaf1e7f4f7be06a3ffea7e5b0e8ec1fd805b", size = 8553157, upload-time = "2025-12-24T10:27:26.38Z" }, +] + [[package]] name = "fastapi" version = "0.135.3" @@ -1505,6 +1541,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c6/50/e0edd38dcd63fb26a8547f13d28f7a008bc4a3fd4eb4ff030673f22ad41a/hydra_core-1.3.2-py3-none-any.whl", hash = "sha256:fa0238a9e31df3373b35b0bfb672c34cc92718d21f81311d8996a16de1141d8b", size = 154547, upload-time = "2023-02-23T18:33:40.801Z" }, ] +[[package]] +name = "hydra-submitit-launcher" +version = "1.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "hydra-core" }, + { name = "submitit" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/4f/93/8f19eba16318fe79f17ae5a86ed4efc9641c33151f67a0f172fbd15ddd41/hydra-submitit-launcher-1.2.0.tar.gz", hash = "sha256:e14c8eb46d020fac60ba25f82bcc368dc55851d2683dc95c88631ffcf15e4a34", size = 4875, upload-time = "2022-05-17T22:19:47.548Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/9b/0ea17f1780004c52f83566b5ca78a0844c93f5abdbbf625e40962af1ffcc/hydra_submitit_launcher-1.2.0-py3-none-any.whl", hash = "sha256:51ce468fbc91211c3a46677eefde94bbb9f721c9545af0be6dd0a95658515613", size = 5207, upload-time = "2022-05-17T22:19:45.664Z" }, +] + [[package]] name = "idna" version = "3.11" @@ -1932,6 +1981,53 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3f/e8/a3b8cb0005553f6a876865073c81ef93bd7c5b18381bcb9ba4013af96ebc/langchain-1.2.15-py3-none-any.whl", hash = "sha256:e349db349cb3e9550c4044077cf90a1717691756cc236438404b23500e615874", size = 112714, upload-time = "2026-04-03T14:26:02.557Z" }, ] +[package.optional-dependencies] +huggingface = [ + { name = "langchain-huggingface" }, +] + +[[package]] +name = "langchain-classic" +version = "1.0.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "async-timeout", marker = "python_full_version < '3.11'" }, + { name = "langchain-core" }, + { name = "langchain-text-splitters" }, + { name = "langsmith" }, + { name = "pydantic" }, + { name = "pyyaml" }, + { name = "requests" }, + { name = "sqlalchemy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/32/04/b01c09e37414bab9f209efa311502841a3c0de5bc6c35e729c8d8a9893c9/langchain_classic-1.0.3.tar.gz", hash = "sha256:168ef1dfbfb18cae5a9ff0accecc9413a5b5aa3464b53fa841561a3384b6324a", size = 10534933, upload-time = "2026-03-13T13:56:11.96Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ab/e6/cfdeedec0537ffbf5041773590d25beb7f2aa467cc6630e788c9c7c72c3e/langchain_classic-1.0.3-py3-none-any.whl", hash = "sha256:26df1ec9806b1fbff19d9085a747ea7d8d82d7e3fb1d25132859979de627ef79", size = 1041335, upload-time = "2026-03-13T13:56:09.677Z" }, +] + +[[package]] +name = "langchain-community" +version = "0.4.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "aiohttp" }, + { name = "dataclasses-json" }, + { name = "httpx-sse" }, + { name = "langchain-classic" }, + { name = "langchain-core" }, + { name = "langsmith" }, + { name = "numpy" }, + { name = "pydantic-settings" }, + { name = "pyyaml" }, + { name = "requests" }, + { name = "sqlalchemy" }, + { name = "tenacity" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/53/97/a03585d42b9bdb6fbd935282d6e3348b10322a24e6ce12d0c99eb461d9af/langchain_community-0.4.1.tar.gz", hash = "sha256:f3b211832728ee89f169ddce8579b80a085222ddb4f4ed445a46e977d17b1e85", size = 33241144, upload-time = "2025-10-27T15:20:32.504Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f0/a4/c4fde67f193401512337456cabc2148f2c43316e445f5decd9f8806e2992/langchain_community-0.4.1-py3-none-any.whl", hash = "sha256:2135abb2c7748a35c84613108f7ebf30f8505b18c3c18305ffaecfc7651f6c6a", size = 2533285, upload-time = "2025-10-27T15:20:30.767Z" }, +] + [[package]] name = "langchain-core" version = "1.2.30" @@ -1951,6 +2047,32 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/79/46/e988e9f024e762750f9f53878316980bdaea2ab1f19600df01a7c39eda89/langchain_core-1.2.30-py3-none-any.whl", hash = "sha256:26fa50894449b29b31b3712fa4975db679d26abe8241a966ea2c5978b68d8394", size = 513005, upload-time = "2026-04-15T20:37:12.396Z" }, ] +[[package]] +name = "langchain-huggingface" +version = "1.2.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "huggingface-hub" }, + { name = "langchain-core" }, + { name = "tokenizers" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/42/5b/4910551367de5c6ec246616fcc0ddb0bc6f9e5d353d4a22dcb5ab1f87e60/langchain_huggingface-1.2.1.tar.gz", hash = "sha256:33d52a30a56775380c6b4321b78136a410eb079132a80fe7120ddd4b954b4efa", size = 253106, upload-time = "2026-03-02T18:44:39.163Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/bc/90/a1440bfa467a6dd9025ad80f3c239554de28aec49dacfb369fda92871556/langchain_huggingface-1.2.1-py3-none-any.whl", hash = "sha256:0930c216a457d2c8dc7b39a756c39c567f1d88593bfee2c3441f3ae718435f0f", size = 30924, upload-time = "2026-03-02T18:44:37.745Z" }, +] + +[[package]] +name = "langchain-text-splitters" +version = "1.1.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "langchain-core" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/85/38/14121ead61e0e75f79c3a35e5148ac7c2fe754a55f76eab3eed573269524/langchain_text_splitters-1.1.1.tar.gz", hash = "sha256:34861abe7c07d9e49d4dc852d0129e26b32738b60a74486853ec9b6d6a8e01d2", size = 279352, upload-time = "2026-02-18T23:02:42.798Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/84/66/d9e0c3b83b0ad75ee746c51ba347cacecb8d656b96e1d513f3e334d1ccab/langchain_text_splitters-1.1.1-py3-none-any.whl", hash = "sha256:5ed0d7bf314ba925041e7d7d17cd8b10f688300d5415fb26c29442f061e329dc", size = 35734, upload-time = "2026-02-18T23:02:41.913Z" }, +] + [[package]] name = "langgraph" version = "1.1.6" @@ -2708,7 +2830,7 @@ name = "nvidia-cudnn-cu12" version = "9.10.2.21" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-cublas-cu12" }, + { name = "nvidia-cublas-cu12", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/ba/51/e123d997aa098c61d029f76663dedbfb9bc8dcf8c60cbd6adbe42f76d049/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:949452be657fa16687d0930933f032835951ef0892b37d2d53824d1a84dc97a8", size = 706758467, upload-time = "2025-06-06T21:54:08.597Z" }, @@ -2735,7 +2857,7 @@ name = "nvidia-cufft-cu12" version = "11.3.3.83" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-nvjitlink-cu12" }, + { name = "nvidia-nvjitlink-cu12", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/1f/13/ee4e00f30e676b66ae65b4f08cb5bcbb8392c03f54f2d5413ea99a5d1c80/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d2dd21ec0b88cf61b62e6b43564355e5222e4a3fb394cac0db101f2dd0d4f74", size = 193118695, upload-time = "2025-03-07T01:45:27.821Z" }, @@ -2762,9 +2884,9 @@ name = "nvidia-cusolver-cu12" version = "11.7.3.90" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-cublas-cu12" }, - { name = "nvidia-cusparse-cu12" }, - { name = "nvidia-nvjitlink-cu12" }, + { name = "nvidia-cublas-cu12", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" }, + { name = "nvidia-cusparse-cu12", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" }, + { name = "nvidia-nvjitlink-cu12", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/85/48/9a13d2975803e8cf2777d5ed57b87a0b6ca2cc795f9a4f59796a910bfb80/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:4376c11ad263152bd50ea295c05370360776f8c3427b30991df774f9fb26c450", size = 267506905, upload-time = "2025-03-07T01:47:16.273Z" }, @@ -2775,7 +2897,7 @@ name = "nvidia-cusparse-cu12" version = "12.5.8.93" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-nvjitlink-cu12" }, + { name = "nvidia-nvjitlink-cu12", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/c2/f5/e1854cb2f2bcd4280c44736c93550cc300ff4b8c95ebe370d0aa7d2b473d/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ec05d76bbbd8b61b06a80e1eaf8cf4959c3d4ce8e711b65ebd0443bb0ebb13b", size = 288216466, upload-time = "2025-03-07T01:48:13.779Z" }, @@ -4418,6 +4540,27 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/bc/98/fe9ae9ffb3b54b62559f52dedaebe204b408db8109a8c66fdd04869e6424/scipy-1.17.1-cp312-cp312-win_arm64.whl", hash = "sha256:f4115102802df98b2b0db3cce5cb9b92572633a1197c77b7553e5203f284a5b3", size = 24547340, upload-time = "2026-02-23T00:19:12.024Z" }, ] +[[package]] +name = "sentence-transformers" +version = "5.4.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "huggingface-hub" }, + { name = "numpy" }, + { name = "scikit-learn", version = "1.7.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "scikit-learn", version = "1.8.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "scipy", version = "1.17.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "torch" }, + { name = "tqdm" }, + { name = "transformers" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/4d/68/7f98c221940ce783b492ad6140384daf2e2918cd7175009d6a362c22b9ee/sentence_transformers-5.4.1.tar.gz", hash = "sha256:436bcb1182a0ff42a8fb2b1c43498a70d0a75b688d182f2cd0d1dd115af61ddc", size = 428910, upload-time = "2026-04-14T13:34:59.006Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c5/d9/3a9b6f2ccdedc9dc00fe37b2fc58f58f8efbff44565cf4bf39d8568bb13a/sentence_transformers-5.4.1-py3-none-any.whl", hash = "sha256:a6d640fc363849b63affb8e140e9d328feabab86f83d58ac3e16b1c28140b790", size = 571311, upload-time = "2026-04-14T13:34:57.731Z" }, +] + [[package]] name = "sentencepiece" version = "0.2.1" @@ -4636,6 +4779,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/81/0d/13d1d239a25cbfb19e740db83143e95c772a1fe10202dda4b76792b114dd/starlette-0.52.1-py3-none-any.whl", hash = "sha256:0029d43eb3d273bc4f83a08720b4912ea4b071087a3b48db01b7c839f7954d74", size = 74272, upload-time = "2026-01-18T13:34:09.188Z" }, ] +[[package]] +name = "submitit" +version = "1.5.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cloudpickle" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/47/86/497018fb3b74e71bef45df82762b176e6b3d159f29941c20d2f141ec4096/submitit-1.5.4.tar.gz", hash = "sha256:7100848bd1cdda79c7196e54ee830793ae75fd7adde0c5bef738d72360a07508", size = 81538, upload-time = "2025-12-17T19:20:03.396Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ea/bb/711e1c2ebd18a21202c972dd5d5c8e09a921f2d3560e3a53d6350c808ab7/submitit-1.5.4-py3-none-any.whl", hash = "sha256:c26f3a7c8d4150eaf70b1da71e2023e9e9936c93e8342ed7db910f29158561c5", size = 76043, upload-time = "2025-12-17T19:20:01.941Z" }, +] + [[package]] name = "supervisor" version = "4.3.0"