diff --git a/Dockerfile b/Dockerfile index e4ebbfa..f0dda2a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -9,6 +9,8 @@ FROM simonsobs/ocs:v0.11.3-19-gd729e04 RUN apt-get update -y && apt-get install -y iputils-ping \ curl +RUN apt-get install -y libusb-1.0-0-dev libusb-1.0-0 && rm -rf /var/lib/apt/lists/*1~RUN apt-get update && apt-get install -y libusb-1.0-0-dev libusb-1.0-0 && rm -rf /var/lib/apt/lists/* + # Copy in and install requirements COPY requirements.txt /app/pcs/requirements.txt WORKDIR /app/pcs/ diff --git a/pcs/agents/acu_interface/aculib.py b/pcs/agents/acu_interface/aculib.py index 189853a..301bee7 100644 --- a/pcs/agents/acu_interface/aculib.py +++ b/pcs/agents/acu_interface/aculib.py @@ -1,13 +1,21 @@ #!/bin/python -import os, yaml -import time +import os, yaml, json, sys +import time, datetime import socket, struct, requests #: Global variable to hold the most-recent config block from calling cache = None +#: Per-request HTTP timeout (seconds) for TCS calls. Without it a hung Go TCS +#: blocks the synchronous requests call indefinitely, freezing the reactor thread +#: and every Process on it, including the 200 Hz broadcast the constant_el_scan +#: slew gate polls. The (connect, read) tuple bounds both phases; the read budget +#: is generous so a slow-but-alive TCS is not killed mid-response (the scan loops +#: have their own wall-clock backstops on top). +TCS_HTTP_TIMEOUT = (5, 30) # (connect, read) seconds + def load_config(filename=None, update_cache=True): '''Load ACU configuration file and return the settings content as dict. @@ -113,7 +121,8 @@ def post(self, cmd, data): try: response = self.session.post( - f"{self.url}{cmd}", json=data, verify=self.verify_cert #allow_redirects=True + f"{self.url}{cmd}", json=data, verify=self.verify_cert, #allow_redirects=True + timeout=TCS_HTTP_TIMEOUT ) self.log.debug(f"response code: {response.status_code}") except requests.exceptions.RequestException as e: @@ -129,16 +138,23 @@ def post(self, cmd, data): def get_status(self): cmd = f"{self.url_prefix}/acu/status" - self.log.info(f"getting status from {self.url}{cmd}") + #cmd = "/Values?identifier=DataSets.StatusGeneral8100&format=JSON" + # cmd = "http://127.0.0.1:8100/Values?identifier=DataSets.StatusGeneral8100&format=JSON" + #self.log.info(f"getting status from {self.url}{cmd}") try: - self.status = self.session.get(self.url + cmd, verify=self.verify_cert).json() + self.status = self.session.get( + self.url + cmd, verify=self.verify_cert, timeout=TCS_HTTP_TIMEOUT + ).json() + #self.status = self.session.get(cmd, verify=self.verify_cert).json() except requests.exceptions.ConnectionError as e: self.log.error( f"failed to connect on {self.url} check is server up, exiting" + #f"failed to connect on {cmd} check is server up, exiting" ) sys.exit(-1) return self.status + def abort(self): cmd = f"{self.url_prefix}/abort" r = self.post(cmd, "") @@ -157,7 +173,11 @@ def move_to(self, azimuth: float, elevation: float): cmd = f"{self.url_prefix}/move-to" data = {"azimuth": azimuth, "elevation": elevation} response = self.post(cmd, data) - self.log.info(response.json()) + # ``post`` returns {} (not a Response) on HTTP 503; guard the .json() + # log so a 503 from /move-to does not raise AttributeError inside the + # client. The caller's status guard then handles the {} gracefully. + if hasattr(response, "json"): + self.log.info(response.json()) return response def azimuth_scan(self, start_time: float, elevation: float, @@ -185,7 +205,6 @@ def azimuth_scan(self, start_time: float, elevation: float, return response def scan_pattern(self, data): - dt = datetime.datetime.now() + datetime.timedelta(seconds=10) cmd = f"{self.url_prefix}/path" self.log.info(data) response = self.post(cmd, data) @@ -208,9 +227,11 @@ def scan_pattern_from_file(self, file_path): "coordsys": "Horizon", "points": points } - self.scan_pattern(data) - - return + # Return scan_pattern's value (Response on success, {} on a 503 + # short-circuit) instead of None, matching every other command method, so + # the caller can read the status via tcs_response_status. Returning None + # made msg.status_code crash on EVERY call. + return self.scan_pattern(data) diff --git a/pcs/agents/acu_interface/agent.py b/pcs/agents/acu_interface/agent.py index f349db4..61bd0d4 100644 --- a/pcs/agents/acu_interface/agent.py +++ b/pcs/agents/acu_interface/agent.py @@ -24,6 +24,10 @@ from autobahn.twisted.util import sleep as dsleep from ocs import ocs_agent, site_config +#from aculib import status_keys +from pcs.agents.acu_interface import status_keys +from pcs.agents.acu_interface import aculib +from pcs.agents.acu_interface import drivers as sh from ocs.ocs_twisted import TimeoutLock from twisted.internet.defer import DeferredList, inlineCallbacks from twisted.internet import protocol, reactor, threads @@ -40,6 +44,78 @@ from pcs.agents.acu_interface import aculib from pcs.agents.acu_interface import drivers as drv +# FYST typed scans: ocs-free dispatch cores + the completion +# latch and constants (so the Process poll loop and the unit-testable decision +# logic share one source of truth). See trajectory.py. +from fyst_trajectories import get_fyst_site +from pcs.agents.acu_interface.trajectory import ( + MAX_REFLOOR_DRIFT_SEC, + TCS_SPEED_TOL, + ScanCompletionLatch, + build_constant_el_payload, + build_daisy_payload, + build_pong_payload, + build_source_payload, + refloor_drift_seconds, + refloor_payload_start_time, + tcs_response_status, +) + +INIT_DEFAULT_SCAN_PARAMS = { + 'latp': { + 'az_speed': 2, + 'az_accel': 1, + 'el_freq': .15, + 'turnaround_method': 'standard', + 'el_mode': None, + }, +} + +MONITOR_STRUCTURE = [ + ('ACU_summary_output', 'summary', 'tick', None), + ('ACU_axis_faults', 'axis_faults_errors_overages', None, None), + ('ACU_position_errors', 'position_errors', None, None), + ('ACU_axis_limits', 'axis_limits', None, None), + ('ACU_axis_warnings', 'axis_warnings', None, None), + ('ACU_axis_failures', 'axis_failures', None, None), + ('ACU_axis_state', 'axis_state', None, None), + ('ACU_oscillation_alarm', 'osc_alarms', None, None), + ('ACU_command_status', 'commands', None, None), + ('ACU_general_errors', 'ACU_failures_errors', None, None), + ('ACU_platform_status', 'platform_status', None, None), + ('ACU_emergency', 'ACU_emergency', None, None), + ('ACU_tilt', 'tilt_slow', 'changed', 0.5), + (None, 'tilt_fast', None, None), + ('ACU_sun_avoidance', 'sun_avoidance', None, 1.), + ('ACU_corrections', 'corrections', None, 10.), +] + +#: Maximum update time (in s) for "monitor" process data, even with no changes +MONITOR_MAX_TIME_DELTA = 2. + +# --------------------------------------------------------------------------- +# FYST constant_el_scan tunables (completion poll + slew gate). +# Soft constants, adjust against commissioning experience. +# --------------------------------------------------------------------------- +SCAN_COMPLETION_POLL_SEC = 1.0 # cadence of the post-POST completion poll +SCAN_SETTLE_SEC = 10.0 # time backstop past the computed scan end + # (SO uses a 20 s graceful-stop window; 10 s is + # a lighter backstop since the stack-drained + # signal normally fires first) +# The drained count (strict 9999) + axis speed tol live in trajectory.py, +# encapsulated by ScanCompletionLatch (this loop delegates the decision to it). + +SLEW_ARRIVAL_TOL_DEG = 0.05 # arrival tolerance (SO used 0.01; 0.05 is robust + # against 200 Hz broadcast jitter) +SLEW_POLL_SEC = 0.2 # matches the prior abort-poll cadence +SLEW_TIMEOUT_SEC = 180.0 # > worst-case ~133 s 360deg wrap-slew + margin + +#: The typed scan ops that run as abortable Processes holding azel_lock. The +#: standalone ``abort`` task stops each so a running scan releases the lock (its +#: dispatch loop sees session status 'stopping' -> sends its own /abort and +#: returns). Deliberately NOT the always-on infrastructure Processes +#: (broadcast/monitor), which must keep running across an abort. +SCAN_PROCESS_OPS = ('constant_el_scan', 'source_scan', 'pong_scan', 'daisy_scan') class ACUAgent: """Interface agent to send pointing commands to ACU and @@ -56,23 +132,109 @@ class ACUAgent: """ def __init__(self, agent, config, device='acu_sim', startup=False): self.agent = agent + #logging + self.log = agent.log #get the config settings self.config = aculib.load_config(config) self.acu_conf = self.config['devices'][device] + #self.platform_type = self.config['devices']['platform'] + # The 'acu-sim' device block carries no 'platform' key; default it to + # 'latp', the only platform populated in INIT_DEFAULT_SCAN_PARAMS and + # status_keys.status_fields, so the simulator initializes cleanly. + self.platform_type = self.acu_conf.get('platform', 'latp') self.udp = self.acu_conf['streams']['main'] self.udp_schema = aculib.get_stream_schema(self.udp['schema']) + ########################################################################### + # Tried to add self.acu_read with the observatory_control_system class in pcs aculib + # Since it seems to have the same function as AcuControl + + self.acu_read = aculib.observatory_control_system(url=self.acu_conf['base_url'], + log=self.log, + server_cert=self.acu_conf['certs']['server_cert'], + client_cert=self.acu_conf['certs']['client_cert'], + client_key=self.acu_conf['certs']['client_key'], + verify_cert=False + ) + + ########################################################################### + #placeholder for data received from monitors # 'status' is populated by the monitor operation # 'broadcast' is populated by the udp stream self.data = {'status':{}, 'broadcast':{}} - #logging - self.log = agent.log + + for b, k, _, _ in MONITOR_STRUCTURE: + if b is None: + continue + self.data['status'][k] = {} + + #_dsets = self.acu_config['_datasets'] + _dsets = self.config['datasets'] + ''' + datasets: + ccat: + 'default_dataset': 'ccat' + 'datasets': + - ['ccat', 'DataSets.StatusCCatDetailed8100'] + - ['general', 'DataSets.StatusGeneral8100'] + - ['extra', 'DataSets.StatusExtra8100'] + - ['third', 'DataSets.Status3rdAxis'] + - ['faults', 'DataSets.StatusDetailedFaults'] + - ['pointing', 'DataSets.CmdPointingCorrection'] + - ['spem', 'DataSets.CmdSPEMParameter'] + - ['weather', 'DataSets.CmdWeatherStation'] + - ['azimuth', 'Antenna.SkyAxes.Azimuth'] + - ['elevation', 'Antenna.SkyAxes.Elevation'] + ''' + self.datasets = { + 'status': _dsets.get('default_dataset'), # this grabs 'DataSets.StatusDetailed' + # 'pointing': _dsets.get('pointing_dataset'), + } + for k, v in self.datasets.items(): + if v is not None: + self.datasets[k] = dict(_dsets['datasets'])[v] + self.log.info(f'self.datasets[k]:{self.datasets[k]}') #exclusive locks for telescope movements self.azel_lock = TimeoutLock() + '''#########################################''' + # Create a map from each status key (read through the + # self.datasets) to the output block and field name. + self.status_field_map = {} + for group, group_fields in \ + status_keys.status_fields[self.platform_type]['status_fields'].items(): + for block_name, block_group, _, _ in MONITOR_STRUCTURE: + if block_group == group: + break + else: + raise ValueError(f"status_key block '{group}' not found in MONITOR_STRUCTURE.") + for acu_key, block_key in group_fields.items(): + self.status_field_map[acu_key] = (group, block_name, block_key) + + # Motion limits (az / el / third ranges). + # self.motion_limits = self.acu_config['motion_limits'] + # if min_el: + # self.log.warn(f'Override: min_el={min_el}') + # self.motion_limits['elevation']['lower'] = min_el + # if max_el: + # self.log.warn(f'Override: max_el={max_el}') + # self.motion_limits['elevation']['upper'] = max_el + '''#########################################''' + + # Scan params (default vel / accel / el freq). + self.default_scan_params = \ + dict(INIT_DEFAULT_SCAN_PARAMS[self.platform_type]) + for _k in self.default_scan_params.keys(): + #_v = self.acu_config.get('scan_params', {}).get(_k) + _v = self.config.get('scan_params', {}).get(_k) + if _v is not None: + self.default_scan_params[_k] = _v + agent.log.info('On startup, default scan_params={scan_params}', + scan_params=self.default_scan_params) + self.scan_params = dict(self.default_scan_params) #register processes agent.register_process('broadcast', @@ -81,8 +243,34 @@ def __init__(self, agent, config, device='acu_sim', startup=False): blocking=False, startup=startup) - agent.register_process('execute_scan', - self.execute_scan, + agent.register_process('monitor', + self.monitor, + self._simple_process_stop, + blocking=False, + startup=startup) + + # FYST typed scan tasks. Registered as Processes (not + # Tasks) so they are abortable mid-scan via _simple_process_stop. + agent.register_process('constant_el_scan', + self.constant_el_scan, + self._simple_process_stop, + blocking=False, + startup=False) + + agent.register_process('source_scan', + self.source_scan, + self._simple_process_stop, + blocking=False, + startup=False) + + agent.register_process('pong_scan', + self.pong_scan, + self._simple_process_stop, + blocking=False, + startup=False) + + agent.register_process('daisy_scan', + self.daisy_scan, self._simple_process_stop, blocking=False, startup=False) @@ -100,8 +288,13 @@ def __init__(self, agent, config, device='acu_sim', startup=False): self.fromfile_scan, blocking=False, aborter=self._simple_task_abort) - - + # FYST dedicated abort task. Standalone /abort escape + # hatch (the shared _safe_abort path); safe to call with no scan + # running. Mid-scan abort normally goes via stopping the scan Process. + agent.register_task('abort', + self.abort, + blocking=False, + aborter=self._simple_task_abort) #agg. params basic_agg_params = {'frame_length': 60} fullstatus_agg_params = {'frame_length': 60, @@ -113,14 +306,20 @@ def __init__(self, agent, config, device='acu_sim', startup=False): 'exclude_aggregator': True} #register data feeds + # this is what the Monitor function talks to agent.register_feed('acu_status', record=True, agg_params=fullstatus_agg_params, buffer_time=1) + agent.register_feed('acu_udp_stream', record=True, agg_params=fullstatus_agg_params, buffer_time=1) + agent.register_feed('acu_error', + record=True, + agg_params=basic_agg_params, + buffer_time=1) #@inlineCallbacks def _simple_task_abort(self, session, params): @@ -134,9 +333,452 @@ def _simple_process_stop(self, session, params): # Trigger a process stop by updating state to "stopping" yield session.set_status('stopping') + @inlineCallbacks + def monitor(self, session, params): + """monitor() + + **Process** - Refresh the cache of CCAT FYST ACU status information and + report it on the 'acu_status' and 'acu_status_influx' HK feeds. + + Summary parameters are ACU-provided time code, Azimuth mode, + Azimuth position, Azimuth velocity, Elevation mode, Elevation position, + Elevation velocity, Boresight mode, and Boresight position. + + The session.data of this process is a nested dictionary. + Here's an example:: + + { + "StatusDetailed": { + "Time": 81.661170959322, + "Year": 2023, + "Azimuth mode": "Stop", + "Azimuth commanded position": -20.0012, + "Azimuth current position": -20.0012, + "Azimuth current velocity": 0.0002, + "Azimuth average position error": 0, + "Azimuth peak position error": 0, + "Azimuth computer disabled": false, + ... + }, + + "StatusResponseRate": 19.237531827325963, + "PlatformType": "satp", + "IgnoredAxes": [], + "NamedPositions": { + "home": [ + 180, + 40 + ] + }, + "DefaultScanParams": { + "az_speed": 2.0, + "az_accel": 1.0, + }, + "connected": True, + } + + """ + + # Note that session.data will get scanned, to assign data to + # feed blocks. We make an explicit list of items to ignore + # during that scan (not_data_keys). + session.data = {'PlatformType': self.platform_type, + 'DefaultScanParams': self.scan_params, + 'StatusResponseRate': 0., + # 'IgnoredAxes': self.ignore_axes, + #'NamedPositions': self.named_positions, + 'connected': False} + not_data_keys = list(session.data.keys()) + + + last_complaint = 0 + ######################################################################################## + # I'm commenting this out for now since it doesn't seem to affect anything else + + # while True: + # try: + # version = yield self.acu_read.http.Version() + # break + # except Exception as e: + # if time.time() - last_complaint > 3600: + # errormsg = {'aculib_error_message': str(e)} + # self.log.error(str(e)) + # self.log.error('monitor process failed to query version! Will keep trying.') + # last_complaint = time.time() + # yield dsleep(10) + + # self.log.info(version) + + ########################################################################################### + session.data['connected'] = True + + # Numbering as per ICD. + mode_key = { + 'Stop': 0, + 'Preset': 1, + 'ProgramTrack': 2, + 'Rate': 3, + 'SectorScan': 4, + 'SearchSpiral': 5, + 'SurvivalMode': 6, + 'StepTrack': 7, + 'GeoSync': 8, + 'OPT': 9, + 'TLE': 10, + 'Stow': 11, + 'StarTrack': 12, + 'SunTrack': 13, + 'MoonTrack': 14, + 'I11P': 15, + 'AutoTrack/Preset': 16, + 'AutoTrack/PositionMemory': 17, + 'AutoTrack/PT': 18, + 'AutoTrack/OPT': 19, + 'AutoTrack/PT/Search': 20, + 'AutoTrack/TLE': 21, + 'AutoTrack/TLE/Search': 22, + + # Currently we do not have ICD values for these, but they + # are included in the output of Meta. ElSync, at least, + # is a known third axis mode for the LAT. + 'ElSync': 100, + 'UnStow': 101, + 'MaintenanceStow': 102, + } + + # fault_key digital values taken from ICD (correspond to byte-encoding) + fault_key = { + 'No Fault': 0, + 'Warning': 1, + 'Fault': 2, + 'Critical': 3, + 'No Data': 4, + 'Latched Fault': 5, + 'Latched Critical Fault': 6, + } + pin_key = { + # Capitalization matches strings in ACU binary, not ICD. + # Are these needed for the SAT still? + 'Any Moving': 0, + 'All Inserted': 1, + 'All Retracted': 2, + 'Failure': 3, + } + lat_pin_key = { + # From "meta" output. + 'Moving': 0, + 'Inserted': 1, + 'Retracted': 2, + 'Error': 3, + } + tfn_key = {'None': float('nan'), + 'False': 0, + 'True': 1, + } + report_t = time.time() + report_period = 20 + n_ok = 0 + min_query_period = 0.05 # Seconds + query_t = 0 + + # Assist monitoring and logging changes in certain fields. + checkdata = [ + ('summary', 'ctime'), + ('platform_status', 'Remote_mode'), + ('summary', 'Azimuth_mode'), + ('summary', 'Elevation_mode'), + # ('summary', 'Boresight_mode'), + ] + prev_checkdata = {k: None for g, k in checkdata} + + @inlineCallbacks + def _get_status(): + output = {} + for short, collection in [ + ('status', 'StatusGeneral8100'), # this is what's in session.data already + #('pointing', 'CmdPointingCorrection'), #we commented this out above for some reason (see line 119) + ]: + #if self.datasets[short]: + # output[collection] = ( + # yield self.acu_read.Values(self.datasets[short])) + #else: + # output[collection] = {} + + # It looks like get_status has a similar function to Values + # since it queries https://127.0.0.1:5600/api/v1/telescope/acu/status + # so I commented the above out and replaced it + # not working perfectly still so should look more into this + output[collection] = (yield threads.deferToThread(self.acu_read.get_status)) + # output[collection] = yield self.acu_read.Values(self.datasets[short]) + # self.log.info(f'_get_status() dict :{output[collection]}') + return output + + #session.data['StatusResponseRate'] = n_ok / (query_t - report_t) + try: + session.data.update((yield _get_status())) # update() merges two dictionaries + except (Exception, SystemExit) as e: + self.log.error(f'monitor: initial status read failed: {e}') + session.data['connected'] = False + #session.data.update((yield self.acu_read.get_status())) + for key,value in session.data.items(): + #self.log.info("session.data after _get_status") + self.log.info(f'{key}:{value}') + + ''' based on the code above, I think _get_status() gets called + before session.data is set, so we can just use "output" + but it formats the dictionary differently + ''' + # qual_pacer = Pacemaker(.1) + + # last_resp_rate = None + data_blocks = {} + # influx_blocks = {} + unknown_fields = set() + + while session.status in ['running']: + + # THIS WHOLE SECTION IS JUST A WAY TO MAKE SURE THAT THEY ARE CONSISTENTLY GETTING STATUS MESSAGES + # AT A SPECIFIC CADENCE. THEY ARE USING A THING CALLED THE 'PACEMAKER' (SEE ABOVE), WHICH + # IS PART OF THE SO OCS CODE IN ocs.ocs_twisted + # ITS NOT CLEAR WHAT THIS IS DOING THAT time.sleep() COULDN'T ALSO ACCOMPLISH, SO + # I'M REMOVING IT FOR NOW + ''' #################################################################################### + now = time.time() + if now - query_t < min_query_period: + yield dsleep(min_query_period - (now - query_t)) + + query_t = time.time() + if query_t > report_t + report_period: + resp_rate = n_ok / (query_t - report_t) + if last_resp_rate is None or (abs(resp_rate - last_resp_rate) + > max(0.1, last_resp_rate * .01)): + self.log.info('Data rate for "monitor" stream is now %.3f Hz' % (resp_rate)) + last_resp_rate = resp_rate + report_t = query_t + n_ok = 0 + session.data.update({'StatusResponseRate': resp_rate}) + + if qual_pacer.next_sample <= time.time(): + # Publish UDP data health feed + qual_pacer.sleep() # should be instantaneous, just update counters + bq = self._broadcast_qual + bq_offset = bq['time_offset'] + if bq_offset is None: + bq_offset = 0. + bq_ok = (bq['active'] and (now - bq['timestamp'] < 5) + and abs(bq_offset) < 1.) + block = { + 'timestamp': time.time(), + 'block_name': 'qual0', + 'data': { + 'Broadcast_stream_ok': int(bq_ok), + 'Broadcast_recv_offset': bq_offset, + } + } + self.agent.publish_to_feed('data_qual', block) + #################################################################################### + ''' + now = time.time() #now is used below so I pulled it out of the commented code block above + + try: + session.data.update((yield _get_status())) + #for key,value in session.data.items(): + #self.log.info("session.data after _get_status") + # self.log.info(f'{key}:{value}') + #session.data.update((yield self.acu_read.get_status())) + session.data['connected'] = True + n_ok += 1 + last_complaint = 0 + except (Exception, SystemExit) as e: + if now - last_complaint > 3600: + errormsg = {'aculib_error_message': str(e)} + self.log.error(str(e)) + acu_error = {'timestamp': time.time(), + 'block_name': 'ACU_error', + 'data': errormsg + } + self.agent.publish_to_feed('acu_error', acu_error) + last_complaint = time.time() + session.data['connected'] = False + yield dsleep(1) + continue + + # this section uses the status_field_map to match the key,val pairs from the MONITOR STRUCTURE + # and status_keys used by get_status() to parse the session.data and self.data dictionaries + for k, v in session.data.items(): + if k in not_data_keys: + continue + for (key, value) in v.items(): + try: + group, block, field = self.status_field_map[key] + except KeyError: + if key not in unknown_fields: + self.log.warn( + 'unknown status field (ignored hereafter): "%s"' % key) + unknown_fields.add(key) + continue + if block is None: + continue + # Cast value to saveable type. + if isinstance(value, bool): + value = int(value) + elif isinstance(value, int) or isinstance(value, float): + pass + elif value is None: + value = float('nan') + else: + value = str(value) + # Store. + #self.data['status'][k][v] = value + self.data['status'][group][field] = value + #self.data[k][v] = value + + self.data['status']['summary']['ctime'] = \ + sh.timecode(self.data['status']['summary']['Time']) + + # Check for state changes in some key fields. + new_checkdata = {k: self.data['status'][g].get(k) + for g, k in checkdata} + + if new_checkdata['Remote_mode'] != prev_checkdata['Remote_mode']: + if new_checkdata['Remote_mode']: + self.log.warn('ACU now in remote mode.') + else: + self.log.warn('ACU in local mode!') + + for axis_mode, v in new_checkdata.items(): + if 'mode' not in axis_mode or 'Remote' in axis_mode: + continue + if v != prev_checkdata[axis_mode]: + self.log.info('{axis_mode} is now "{v}"', + axis_mode=axis_mode, v=v) + + if new_checkdata['ctime'] == prev_checkdata['ctime']: + self.log.warn('ACU time has not changed from previous data point!') + continue + + prev_checkdata = new_checkdata + + + ''' + ########################################################## + I THINK THIS SECTION IS FOR INFLUX PUBLISHING WHICH WE ARE + ALREADY DOING THROUGH OUR 'broadcast' FUNCTION, SO I'M + COMMENTING IT OUT FOR NOW + ########################################################## + ''' + # influx_blocks are constructed based on refers to all + # other self.data['status'] keys. Do not add more keys to + # any self.data['status'] categories beyond this point + + # new_influx_blocks = {} + # for category in self.data['status']: + # new_influx_blocks[category] = { + # 'timestamp': self.data['status']['summary']['ctime'], + # 'block_name': category, + # 'data': {}} + + # if category != 'commands': + # for statkey, statval in self.data['status'][category].items(): + # if isinstance(statval, float): + # influx_val = statval + # elif isinstance(statval, str): + # for key_map in [tfn_key, mode_key, fault_key, pin_key, + # lat_pin_key]: + # if statval in key_map: + # influx_val = key_map[statval] + # break + # else: + # raise ValueError('Could not convert value for %s="%s"' % + # (statkey, statval)) + # elif isinstance(statval, int): + # if statkey in ['Year', 'Free_upload_positions']: + # influx_val = float(statval) + # else: + # influx_val = int(statval) + # new_influx_blocks[category]['data'][statkey + '_influx'] = influx_val + # else: # i.e. category == 'commands': + # if str(self.data['status']['commands']['Azimuth_commanded_position']) != 'nan': + # acucommand_az = {'timestamp': self.data['status']['summary']['ctime'], + # 'block_name': 'ACU_commanded_positions_az', + # 'data': {'Azimuth_commanded_position_influx': self.data['status']['commands']['Azimuth_commanded_position']} + # } + # self.agent.publish_to_feed('acu_commands_influx', acucommand_az) + # if str(self.data['status']['commands']['Elevation_commanded_position']) != 'nan': + # acucommand_el = {'timestamp': self.data['status']['summary']['ctime'], + # 'block_name': 'ACU_commanded_positions_el', + # 'data': {'Elevation_commanded_position_influx': self.data['status']['commands']['Elevation_commanded_position']} + # } + # self.agent.publish_to_feed('acu_commands_influx', acucommand_el) + # if self.acu_config['platform'] == 'satp': + # if str(self.data['status']['commands']['Boresight_commanded_position']) != 'nan': + # acucommand_bs = {'timestamp': self.data['status']['summary']['ctime'], + # 'block_name': 'ACU_commanded_positions_boresight', + # 'data': {'Boresight_commanded_position_influx': self.data['status']['commands']['Boresight_commanded_position']} + # } + # self.agent.publish_to_feed('acu_commands_influx', acucommand_bs) + + # Only keep blocks that have changed or have new data. + # block_keys = list(new_influx_blocks.keys()) + # for k in block_keys: + # if k not in influx_blocks: + # continue + # B, N = influx_blocks[k], new_influx_blocks[k] + # overdue = (N['timestamp'] - B['timestamp'] > MONITOR_MAX_TIME_DELTA) + # changes = any([B['data'][_k] != _v for _k, _v in N['data'].items()]) + # if overdue or changes: + # continue + # del new_influx_blocks[k] + + # for block in new_influx_blocks.values(): + # # Check that we have data (commands and corotator often don't) + # if len(block['data']) > 0: + # self.agent.publish_to_feed('acu_status_influx', block) + # influx_blocks.update(new_influx_blocks) + + # Assemble data for aggregator ... + new_blocks = {} + for block_name, data_key, _, _ in MONITOR_STRUCTURE: + if block_name is None: + continue + new_blocks[block_name] = { + 'timestamp': self.data['status']['summary']['ctime'], + 'block_name': block_name, + 'data': self.data['status'][data_key], + } + + # Only keep blocks that have changed or have new data. + for k, _, policy, delta in MONITOR_STRUCTURE: + if k is None: + continue + B, N = data_blocks.get(k), new_blocks[k] + if len(N['data']) == 0: + del new_blocks[k] + continue + if B is None: + continue + if policy == 'tick': # always store. + continue + underdue = delta is not None and \ + (N['timestamp'] - B['timestamp'] < delta) + overdue = (N['timestamp'] - B['timestamp'] > MONITOR_MAX_TIME_DELTA) \ + and not underdue + changes = any([B['data'][_k] != _v for _k, _v in N['data'].items()]) + if (overdue and policy != 'changed') or changes and not underdue: + continue + del new_blocks[k] + + for block in new_blocks.values(): + self.agent.publish_to_feed('acu_status', block) + + data_blocks.update(new_blocks) + + return True, 'Acquisition exited cleanly.' + + @ocs_agent.param('auto_enable', type=bool, default=True) @inlineCallbacks - def broadcast(self, session, params): + def broadcast(self, session, params): #this is called _udp_stream_handler in SOCS """broadcast(auto_enable=True) **Process** - Read UDP data from the port specified by @@ -292,13 +934,20 @@ def go_to(self, session, params): ) self.log.info('Executing telescope movement') msg = tcs.move_to(target_az,target_el) - self.log.info(f"HTTP request executed with respose code: {msg.status_code}") + # 503 -> {} in aculib.post() (move_to passes it through); read via + # tcs_response_status so a rejection fails gracefully rather than + # crashing on {}.status_code. + code = tcs_response_status(msg) + self.log.info(f"HTTP request executed with response code: {code}") + if code != 200: + return False, ( + f"go_to: Go TCS rejected move_to (HTTP {code}); not moved.") - - return True, msg.text + return True, getattr(msg, "text", "") @ocs_agent.param('scan_params', type=dict) - def az_scan(): + #def az_scan(): + def az_scan(self, session, params): """az_scan(start_time, turnaround_time, elevation, speed, num_scans, azimuth_range) @@ -337,13 +986,21 @@ def az_scan(): ) self.log.info('Executing telescope movement') msg = tcs.azimuth_scan(**params['scan_params']) - self.log.info(f"HTTP request executed with respose code: {msg.status_code}") + # 503 -> {} in aculib.post(); read via tcs_response_status so a + # rejection fails gracefully rather than crashing on {}.status_code. + code = tcs_response_status(msg) + self.log.info(f"HTTP request executed with response code: {code}") + if code != 200: + return False, ( + f"az_scan: Go TCS rejected azimuth-scan (HTTP {code}); " + "not launched.") - return True, msg.text + return True, getattr(msg, "text", "") @ocs_agent.param('scan_filename', type=str) - def fromfile_scan(): + #def fromfile_scan(): + def fromfile_scan(self, session, params): """fromfile_scan(scan_filename) **Task** - Send scan commands for a predefined arbitrary path which @@ -373,17 +1030,508 @@ def fromfile_scan(): ) self.log.info('Executing telescope movement') msg = tcs.scan_pattern_from_file(params['scan_filename']) + # scan_pattern_from_file now returns the /path response (or {} on a + # 503); read via tcs_response_status so a rejection fails gracefully + # rather than crashing on {}/None .status_code. + code = tcs_response_status(msg) + self.log.info(f"HTTP request executed with response code: {code}") + if code != 200: + return False, ( + f"fromfile_scan: Go TCS rejected /path (HTTP {code}); " + "scan not launched.") + + return True, getattr(msg, "text", "") + + def _make_tcs(self): + """Build a FRESH per-scan Go TCS client from this device's config. + + Each typed scan Process (and the standalone ``abort`` task) builds its own + ``aculib.observatory_control_system`` instead of sharing ``self.acu_read``: + ``requests.Session`` is not thread-safe, and the always-on ``monitor`` + Process (reactor thread) + the standalone ``abort`` task can hit the shared + client's one Session concurrently with a scan's thread-pool calls. A fresh + client per scan gives it its own Session/connection-pool, matching legacy + ``go_to`` / ``az_scan`` / ``fromfile_scan``. + + Reads ``self.acu_conf['certs']`` defensively (``.get`` + per-field + defaults), so the cert-less device blocks resolve to ``verify_cert=False`` + with empty cert paths instead of ``KeyError``-ing like the legacy builds. + Empty cert paths route ``start_session`` down its cert-less branch + (``aculib.py:103-107``, ``session.verify = False``), correct for the + plain-HTTP / loopback-self-signed devices. + """ + certs = self.acu_conf.get('certs', {}) + return aculib.observatory_control_system( + self.acu_conf['base_url'], + self.log, + server_cert=certs.get('server_cert', ''), + client_cert=certs.get('client_cert', ''), + client_key=certs.get('client_key', ''), + verify_cert=certs.get('verify', False), + ) + + def _safe_abort(self, tcs): + """Send a Go TCS ``/abort``, swallowing transport failures. + + ``aculib...abort()`` calls ``post()``, which raises ``SystemExit`` on a + ``requests.RequestException`` and also does ``json.loads(r.content)``; + either would otherwise escape the abort path and tear down the Process + mid-cleanup. Trap ``(Exception, SystemExit)`` (NOT ``BaseException``, so + KeyboardInterrupt/GeneratorExit keep propagating), mirroring the + ``_current_encoder_azel`` / completion status-read traps, so an abort + always returns cleanly. + + The single shared abort path: every typed scan's in-Process stop handler + AND the standalone ``abort`` task (:meth:`abort`) route through here, so + they agree on the transport-failure trapping. Returns ``True`` if + ``/abort`` was sent without raising, ``False`` if a transport failure was + swallowed. The standalone task surfaces that; the in-Process handlers + ignore it (already tearing down). + """ + try: + tcs.abort() + return True + except (Exception, SystemExit) as e: + self.log.warn(f'/abort failed (continuing cleanup): {e}') + return False + + def _current_encoder_azel(self, tcs): + """Return the current encoder ``(az, el, source)`` in degrees, or ``None``. + + Prefers the 200 Hz position broadcast (``self.data['broadcast']``, keys + ``'Azimuth'`` / ``'Elevation'``). If the stream is not warm yet, falls back + to a one-shot ``tcs.get_status()`` on the per-scan client (raw ACU keys + ``'Azimuth current position'`` / ``'Elevation current position'``). Returns + ``None`` when neither source can supply a live position, so the caller + refuses to dispatch rather than guess (a wrong guess feeds + ``choose_encoder_solution`` and can produce a wrong-wrap slew). + + Called off the reactor via ``deferToThread``; the broadcast fast-path is a + benign concurrent read of ``self.data['broadcast']`` keys, which are + set-only/monotonic and hold immutable floats. Each read is GIL-atomic; a + cross-batch az/el skew of one broadcast interval is possible and negligible + for wrap selection. + """ + bcast = self.data.get('broadcast', {}) + if 'Azimuth' in bcast and 'Elevation' in bcast: + return float(bcast['Azimuth']), float(bcast['Elevation']), 'broadcast' + # Broadcast not warm; one-shot status read. tcs.get_status() calls + # sys.exit(-1) on ConnectionError (aculib.py:150-155) -> SystemExit, and + # cold start is exactly when that is most likely; trap (Exception, + # SystemExit) but NOT BaseException (KeyboardInterrupt/GeneratorExit + # keep propagating). + try: + status = tcs.get_status() + az = status.get('Azimuth current position') + el = status.get('Elevation current position') + if az is not None and el is not None: + return float(az), float(el), 'status' + except (Exception, SystemExit) as e: + self.log.warn(f'_current_encoder_azel: get_status() fallback failed: {e}') + # Neither source live: return None so the caller refuses to dispatch + # rather than guess (see docstring; a guess can feed a wrong-wrap slew). + self.log.warn('no live position available; ' + 'cannot determine current encoder az/el.') + return None + + def _axes_stopped(self, status): + """Return ``True`` iff both axis velocities in ``status`` are stopped. + + Reads ``'Azimuth current velocity'`` / ``'Elevation current velocity'`` + from a raw ACU ``/acu/status`` dict; ``True`` only when both are present + and below :data:`TCS_SPEED_TOL` (``commands.go:15``). A missing velocity + returns ``False`` ("not known to be stopped"), so the slew-arrival gate + keeps waiting rather than POSTing into a still-settling mount. Needed + because the 200 Hz broadcast carries no velocity, so the velocity half of + the Go TCS arrival condition can only come from a status read. + """ + vaz = status.get('Azimuth current velocity') + vel = status.get('Elevation current velocity') + return (vaz is not None and abs(vaz) < TCS_SPEED_TOL + and vel is not None and abs(vel) < TCS_SPEED_TOL) + + @ocs_agent.param('scan_params', type=dict) + @ocs_agent.param('scheduled_t0_unix', type=float, default=None) + @inlineCallbacks + def constant_el_scan(self, session, params): + """constant_el_scan(scan_params, scheduled_t0_unix=None) + + **Process** - FYST constant-elevation scan. At dispatch it + builds a full az/el trajectory with ``fyst_trajectories`` from the + *current* encoder position (OCS-free core + :func:`~pcs.agents.acu_interface.trajectory.build_constant_el_payload`, + astropy ephemeris math built off-reactor), slews to the sun-safe scan + start, and POSTs to the Go TCS ``/path`` endpoint. Registered as a Process + so it can be aborted mid-scan (aborter -> session ``'stopping'`` -> Go TCS + ``/abort``). + + Parameters: + scan_params (dict): Constant-elevation scan specification, modeled + on ``fyst_trajectories.plan_constant_el_scan``. Required keys: + ``ra_center``, ``dec_center``, ``width``, ``height`` (deg), + ``elevation`` (deg), ``velocity`` (azimuth-coordinate deg/s, + mount frame, sent to the ACU as-is, NOT cos(el)-scaled). + Optional: ``rising`` (bool), ``angle``, ``az_accel``, + ``timestep``, ``az_padding``, ``max_search_hours``, + ``step_seconds``, ``lsa_window``. + scheduled_t0_unix (float): Scheduled scan start in Unix seconds, or + None to start as soon as the dispatch buffer allows. The + effective start is ``max(scheduled_t0_unix, now + 10 s)``. + """ + result = yield self._dispatch_scan_process( + session, params, build_fn=build_constant_el_payload, + job='constant_el_scan', tcs=self._make_tcs()) + return result + + @inlineCallbacks + def _dispatch_scan_process(self, session, params, *, build_fn, job, tcs): + """Shared dispatch-and-run body for a typed scan Process. + + One implementation for ``constant_el_scan`` / ``source_scan`` / + ``pong_scan`` / ``daisy_scan`` instead of four copies: dispatch-buffer + floor, position-unknown refusal, off-reactor trajectory build, sun-safe + slew, slew-arrival gate, post-slew re-floor, 503-safe POST, completion- + latch / abort loop. + + ``params`` must carry ``scan_params`` (dict) and ``scheduled_t0_unix`` + (float or None). ``build_fn`` is one of the ocs-free ``build_*_payload`` + cores in :mod:`pcs.agents.acu_interface.trajectory`, called off the reactor + via ``deferToThread`` and returning ``{"encoder_az", "encoder_el", + "payload"}``. ``job`` is the lock job name / log prefix. ``tcs`` is a fresh + per-scan client (see :meth:`_make_tcs`), not shared with the monitor or the + abort task, so its ``requests.Session`` cannot be hit concurrently. + """ + with self.azel_lock.acquire_timeout(0, job=job) as acquired: + if not acquired: + return False, f"Operation failed: {self.azel_lock.job} is running." - self.log.info(f"HTTP request executed with respose code: {msg.status_code}") + # Current encoder position (200 Hz broadcast, else one-shot status). + # Refuse to dispatch on an unknown position; the False is retryable + # once the broadcast warms (sub-second). + pos = yield threads.deferToThread(self._current_encoder_azel, tcs) + if pos is None: + return False, ( + f"{job}: refusing to dispatch, current telescope position " + "unknown (200 Hz broadcast cold and ACU status unavailable). " + "Start/await the 'broadcast' Process and retry.") + current_az, current_el, src = pos + self.log.info( + f'{job}: current position az={current_az:.4f}, ' + f'el={current_el:.4f} (from {src})') + + # Build the trajectory + sun-safe slew target off the reactor thread. + site = get_fyst_site() + try: + result = yield threads.deferToThread( + build_fn, + scan_params=params['scan_params'], + current_az=current_az, + current_el=current_el, + site=site, + scheduled_t0_unix=params.get('scheduled_t0_unix'), + now_unix=time.time(), + ) + except Exception as e: + self.log.error(f'{job}: trajectory build failed: {e}') + return False, f'Trajectory build failed: {e}' + + enc_az = result['encoder_az'] + enc_el = result['encoder_el'] + payload = result['payload'] + self.log.info( + f'{job}: slew target az={enc_az:.4f}, el={enc_el:.4f}; ' + f'{len(payload["points"])} trajectory points, ' + f'start_time={payload["start_time"]:.3f}') + + # Honor an abort requested during the (potentially long) build. + if session.status == 'stopping': + return True, 'Aborted before slew.' + + # Slew to the sun-safe start (fresh-per-scan client; see _make_tcs), + # then gate + POST the scan. + self.log.info(f'{job}: slewing to sun-safe scan start') + slew_msg = yield threads.deferToThread(tcs.move_to, enc_az, enc_el) + # 503 -> {} in aculib.post(); read via tcs_response_status so a + # rejection fails gracefully rather than crashing on {}.status_code. + slew_code = tcs_response_status(slew_msg) + self.log.info(f'{job}: move_to response code {slew_code}') + if slew_code != 200: + return False, ( + f'{job}: Go TCS rejected move_to to scan start ' + f'(HTTP {slew_code}); scan not launched.') + + # Gate the /path POST on the slew physically completing. move_to is + # fire-and-forget and Go TCS runs one command to completion before + # dequeuing the next, so POSTing /path while the Preset move runs + # returns HTTP 503. Go's moveToCmd.isDone (commands.go:127-130) requires + # both position within tolerance and |velocity| < speedTol on both axes; + # mirror that so the dish has settled before we POST. Position comes from + # the broadcast (no velocity), so the velocity half is a status read + # (trapped like the completion loop), read only once the position + # arrives. Abort-aware; yield dsleep, not time.sleep (freezes reactor). + slew_deadline = time.time() + SLEW_TIMEOUT_SEC + while True: + if session.status == 'stopping': + self.log.info(f'{job}: abort requested during slew, sending /abort') + yield threads.deferToThread(self._safe_abort, tcs) + return True, f'{job} aborted during slew; /abort sent.' + pos = yield threads.deferToThread(self._current_encoder_azel, tcs) + if pos is not None: + az_now, el_now, _ = pos + daz = abs((az_now - enc_az + 180.0) % 360.0 - 180.0) + if daz <= SLEW_ARRIVAL_TOL_DEG and abs(el_now - enc_el) <= SLEW_ARRIVAL_TOL_DEG: + # Position arrived; confirm both axes stopped before + # POSTing (closes the 503 race at the source). A failed or + # velocity-less read leaves the axes "not known to be + # stopped", so keep polling until the timeout. + try: + status = yield threads.deferToThread(tcs.get_status) + if self._axes_stopped(status): + break + except (Exception, SystemExit) as e: + self.log.warn( + f'{job}: velocity read during slew gate failed: {e}') + if time.time() > slew_deadline: + return False, ( + f'{job}: slew to scan start timed out ' + f'(target az={enc_az:.3f}, el={enc_el:.3f}).') + yield dsleep(SLEW_POLL_SEC) + + # Re-floor start_time now the slew is done: the slew may have eaten into + # a near-now scan's lead, leaving start_time below the Go TCS minimum + # (commands.go:253). The completion end-time below reads this same + # start_time, so it stays consistent. + original_start = payload["start_time"] + payload = refloor_payload_start_time(payload, time.time()) + drift = refloor_drift_seconds(original_start, payload) + if drift > MAX_REFLOOR_DRIFT_SEC: + # The slew ate so far into the lead that the baked build-time az/el + # track is stale: it tracks the source's old sky position, and the + # re-floor only shifts when it plays, not where. Refuse rather than + # POST a boresight lagging the sky by ~az_rate*drift. Retryable: the + # next dispatch rebuilds from current ephemeris. + return False, ( + f'{job}: post-slew re-floor advanced start_time by ' + f'{drift:.1f} s (> {MAX_REFLOOR_DRIFT_SEC:.0f} s); the slew ' + f'consumed the scan lead and the trajectory geometry is ' + f'stale. Refusing to POST; redispatch.') + + # Final abort gate before the POST. The slew gate's last status read is + # off-reactor, so an abort can flip the session to 'stopping' after the + # loop breaks; without this re-check we'd POST /path for a just-aborted + # scan (then the completion loop sends /abort, racing it at the ACU). + # Mirror the other gates: /abort and return. + if session.status == 'stopping': + self.log.info(f'{job}: abort requested before /path POST, sending /abort') + yield threads.deferToThread(self._safe_abort, tcs) + return True, f'{job} aborted before /path POST; /abort sent.' + + self.log.info(f'{job}: posting trajectory to /path') + scan_msg = yield threads.deferToThread(tcs.scan_pattern, payload) + # 503 -> {} again (the slew-gate's anticipated "slew not settled"); + # read via tcs_response_status for a graceful "scan not launched". + scan_code = tcs_response_status(scan_msg) + self.log.info(f'{job}: /path response code {scan_code}') + if scan_code != 200: + return False, ( + f'{job}: Go TCS rejected /path ' + f'(HTTP {scan_code}); scan not launched.') + + # Completion-aware, abort-aware wait. Go TCS /path is fire-and-forget + # (HTTP 200 == accepted, not done; Go never calls back). Detect + # completion via the stack-drained signal (parity with + # commands.go:195-197), backstopped by the absolute scan end time so a + # status-read fault can never hang the Process holding azel_lock. + # start_time is absolute Unix; points[-1][0] is relative seconds. The + # drained decision (strict 9999 + running-observed latch) is delegated + # to ScanCompletionLatch. + end_unix = payload['start_time'] + payload['points'][-1][0] + deadline = end_unix + SCAN_SETTLE_SEC + latch = ScanCompletionLatch(payload['start_time']) + while True: + if session.status == 'stopping': + self.log.info(f'{job}: abort requested, sending /abort') + yield threads.deferToThread(self._safe_abort, tcs) + return True, f'{job} aborted; /abort sent.' + try: + status = yield threads.deferToThread(tcs.get_status) + # RAW /acu/status free-stack key is the long ACU alias 'Qty of + # free program track stack positions'; accept either that or + # the post-mapping 'Free_upload_positions'. + free = status.get('Qty of free program track stack positions') + if free is None: + free = status.get('Free_upload_positions') + vaz = status.get('Azimuth current velocity') + vel = status.get('Elevation current velocity') + az_mode = status.get('Azimuth mode') + el_mode = status.get('Elevation mode') + if latch.update(free=free, vaz=vaz, vel=vel, now_unix=time.time(), + az_mode=az_mode, el_mode=el_mode): + self.log.info(f'{job}: scan complete (stack drained).') + break + except (Exception, SystemExit) as e: + # aculib.get_status() raises SystemExit on ConnectionError + # (aculib.py:138-143), which `except Exception` would NOT catch. + # Trap it so a transient blip cannot kill the Process; the time + # backstop still bounds the wait. + self.log.warn(f'{job}: status read failed during wait: {e}') + if time.time() >= deadline: + self.log.info(f'{job}: scan end reached (time backstop).') + break + yield dsleep(SCAN_COMPLETION_POLL_SEC) + + return True, scan_msg.text - return True, msg.text + @ocs_agent.param('scan_params', type=dict) + @ocs_agent.param('scheduled_t0_unix', type=float, default=None) + @inlineCallbacks + def source_scan(self, session, params): + """source_scan(scan_params, scheduled_t0_unix=None) + + **Process** - FYST source-tracking constant-elevation scan. + At dispatch it drags a moving source (planet or sidereal point) across the + *centred* PrimeCam focal plane at fixed boresight elevation with + ``fyst_trajectories.plan_source_ces`` (OCS-free core + :func:`~pcs.agents.acu_interface.trajectory.build_source_payload`, built + off-reactor), slews to the sun-safe scan start, and POSTs to Go TCS + ``/path``. Abortable mid-scan (aborter -> session ``'stopping'`` -> Go TCS + ``/abort``). + + Velocity frame: the commanded az velocity is the planner's solved + MOUNT-frame drift (same frame as ``constant_el_scan``, NOT the on-sky frame + pong/daisy take); ``cos(el)`` is implicit in the elevation-fixed az track + and NOT re-applied. + + **Centred only.** Dispatches the on-axis, full-array case (``footprint="c"``, + uncommanded boresight rotator). The off-centre single-module case and a + commanded ``boresight_rot`` are gated off in ``build_source_payload`` + (Nasmyth port direction + ``boresight_rot`` value both unconfirmed); + requesting either raises ``ValueError``. + + Parameters: + scan_params (dict): Source-CES spec, modeled on + ``fyst_trajectories.plan_source_ces``. Source: ``body`` (str) OR + ``ra`` + ``dec`` (deg); plus ``el_bore`` (deg). Optional: ``mode`` + ('rising'/'setting'), ``footprint`` (must be 'c'/'center' while the + gate stands), ``boresight_rot`` (must be None), ``pm_ra``/``pm_dec``, + ``ref_epoch``, ``timestep``, ``sampling_step_seconds``, ``az_accel``, + ``az_padding``, ``az_branch``, ``allow_partial``, ``v_az``, + ``window``. + scheduled_t0_unix (float): Scheduled start in Unix seconds (the + plan_source_ces search anchor), or None to start as soon as the + dispatch buffer allows. + """ + result = yield self._dispatch_scan_process( + session, params, build_fn=build_source_payload, job='source_scan', + tcs=self._make_tcs()) + return result - def execute_scan(): - #this function plans to implement the automated scans for the telescope, - #by coordinating with schedular and other factors like sun avoidance, - #should be self-contained operation with both telescope movement commands - #as well as DAQ controls - pass + @ocs_agent.param('scan_params', type=dict) + @ocs_agent.param('scheduled_t0_unix', type=float, default=None) + @inlineCallbacks + def pong_scan(self, session, params): + """pong_scan(scan_params, scheduled_t0_unix=None) + + **Process** - FYST Pong (curvy-box) scan over a rectangular + RA/Dec field. At dispatch it builds the trajectory with + ``fyst_trajectories.plan_pong_scan`` (OCS-free core + :func:`~pcs.agents.acu_interface.trajectory.build_pong_payload`, built + off-reactor), slews to the sun-safe scan start, and POSTs to Go TCS + ``/path``. Abortable mid-scan (aborter -> session ``'stopping'`` -> Go TCS + ``/abort``). + + Velocity frame: ``scan_params['velocity']`` is ON-SKY (tangent-plane) + deg/s, a DIFFERENT frame from ``constant_el_scan`` / ``source_scan``. The + planner maps it to the mount frame via the field geometry; pass the + astronomer's on-sky scan speed directly (do NOT pre-scale by cos(el)). + + Parameters: + scan_params (dict): Pong specification, modeled on + ``fyst_trajectories.plan_pong_scan``. Required: ``ra_center``, + ``dec_center``, ``width``, ``height`` (deg); ``velocity`` + (on-sky deg/s), ``spacing`` (deg), ``num_terms`` (int). Optional: + ``angle`` (deg), ``n_cycles`` (int), ``timestep`` (s). + scheduled_t0_unix (float): Scheduled start in Unix seconds, or None + to start as soon as the dispatch buffer allows. Pong uses + start_time literally (no forward search). + """ + result = yield self._dispatch_scan_process( + session, params, build_fn=build_pong_payload, job='pong_scan', + tcs=self._make_tcs()) + return result + + @ocs_agent.param('scan_params', type=dict) + @ocs_agent.param('scheduled_t0_unix', type=float, default=None) + @inlineCallbacks + def daisy_scan(self, session, params): + """daisy_scan(scan_params, scheduled_t0_unix=None) + + **Process** - FYST Daisy (constant-velocity petal) scan + centred on a point source. At dispatch it builds the trajectory with + ``fyst_trajectories.plan_daisy_scan`` (OCS-free core + :func:`~pcs.agents.acu_interface.trajectory.build_daisy_payload`, built + off-reactor), slews to the sun-safe scan start, and POSTs to Go TCS + ``/path``. Abortable mid-scan (aborter -> session ``'stopping'`` -> Go TCS + ``/abort``). + + Velocity frame: ``scan_params['velocity']`` is ON-SKY (tangent-plane) + deg/s, the SAME frame as Pong, DIFFERENT from ``constant_el_scan`` / + ``source_scan``. The planner maps it to the mount frame; pass the + astronomer's on-sky scan speed directly. + + Parameters: + scan_params (dict): Daisy specification, modeled on + ``fyst_trajectories.plan_daisy_scan``. Required: ``ra``, ``dec`` + (deg); ``radius`` (deg), ``velocity`` (on-sky deg/s), + ``turn_radius`` (deg), ``avoidance_radius`` (deg), and + ``start_acceleration`` (deg/s^2), ``duration`` (s). Optional: + ``y_offset`` (deg), ``timestep`` (s). + scheduled_t0_unix (float): Scheduled start in Unix seconds, or None + to start as soon as the dispatch buffer allows. Daisy uses + start_time literally (no forward search). + """ + result = yield self._dispatch_scan_process( + session, params, build_fn=build_daisy_payload, job='daisy_scan', + tcs=self._make_tcs()) + return result + + @inlineCallbacks + def abort(self, session, params): + """abort() + + **Task** - Send a Go TCS ``/abort`` to stop any in-progress telescope + motion. Routes through the shared :meth:`_safe_abort` path (agrees with the + in-Process stop handlers) and is SAFE with no scan running. The Go TCS + accepts ``/abort`` regardless, and a transport failure is swallowed + + reported. Builds a fresh per-call client (see :meth:`_make_tcs`) so its + ``/abort`` POST cannot collide with a running scan's status reads on a + shared ``requests.Session``. + + Mid-scan abort is normally driven by stopping the running scan Process + directly (aborter -> session ``'stopping'`` -> Process sends its own + ``/abort`` and returns, releasing ``azel_lock``). This task ALSO does that + for any running typed scan Process (``self.agent.stop`` over + :data:`SCAN_PROCESS_OPS`) so an out-of-band abort cannot strand + ``azel_lock``: Go TCS ``/abort`` only cancel+Stops (NOT ProgramTrackClear), + so on an early abort the stack-drained latch never fires and, absent the + ``'stopping'`` signal, the scan loop would hold ``azel_lock`` to its + absolute scan-end backstop. ``OCSAgent.stop`` is safe + idempotent here: a + not-running/unknown op returns an error tuple we log and skip (no raise), + an already-stopping op is left alone, and it mutates session status only on + the reactor thread (touches no ``requests.Session``, no shared-Session + hazard). The immediate ``/abort`` below still fires so the mount stops even + with no Process running. + """ + for op in SCAN_PROCESS_OPS: + try: + status, msg, _ = self.agent.stop(op) + self.log.info(f'abort: stop({op}) -> {status}: {msg}') + except Exception as e: + self.log.warn(f'abort: stop({op}) failed (continuing): {e}') + ok = yield threads.deferToThread(self._safe_abort, self._make_tcs()) + if ok: + return True, 'abort: Go TCS /abort sent; stop requested on any running scan.' + return False, 'abort: Go TCS /abort failed (transport error; see log).' def add_agent_args(parser_in=None): diff --git a/pcs/agents/acu_interface/drivers.py b/pcs/agents/acu_interface/drivers.py index 6370329..2330a64 100644 --- a/pcs/agents/acu_interface/drivers.py +++ b/pcs/agents/acu_interface/drivers.py @@ -33,5 +33,3 @@ def timecode(acutime, now=None): gyear = calendar.timegm(time.strptime(str(year), '%Y')) comptime = gyear + sec_of_day return comptime - - diff --git a/pcs/agents/acu_interface/monitor.py b/pcs/agents/acu_interface/monitor.py new file mode 100644 index 0000000..02e0b98 --- /dev/null +++ b/pcs/agents/acu_interface/monitor.py @@ -0,0 +1,413 @@ +@inlineCallbacks +def monitor(self, session, params): + """monitor() + + **Process** - Refresh the cache of SATP ACU status information and + report it on the 'acu_status' and 'acu_status_influx' HK feeds. + + Summary parameters are ACU-provided time code, Azimuth mode, + Azimuth position, Azimuth velocity, Elevation mode, Elevation position, + Elevation velocity, Boresight mode, and Boresight position. + + The session.data of this process is a nested dictionary. + Here's an example:: + + { + "StatusDetailed": { + "Time": 81.661170959322, + "Year": 2023, + "Azimuth mode": "Stop", + "Azimuth commanded position": -20.0012, + "Azimuth current position": -20.0012, + "Azimuth current velocity": 0.0002, + "Azimuth average position error": 0, + "Azimuth peak position error": 0, + "Azimuth computer disabled": false, + ... + }, + + "StatusResponseRate": 19.237531827325963, + "PlatformType": "satp", + "IgnoredAxes": [], + "NamedPositions": { + "home": [ + 180, + 40 + ] + }, + "DefaultScanParams": { + "az_speed": 2.0, + "az_accel": 1.0, + }, + "connected": True, + } + + Differences between SATP and LAT structures: + + - The PlatformType reports "satp" for SATP and "ccat" for LAT. + - In the case of an SATP, the Status3rdAxis is not populated; + the Boresight info can be found in StatusDetailed. In the + case of the LAT, the corotator info is queried separately + and stored under Status3rdAxis. + - The StatusShutter and Hvac entries will be populated for the + LAT, but empty for SATP. + + """ + + # Note that session.data will get scanned, to assign data to + # feed blocks. We make an explicit list of items to ignore + # during that scan (not_data_keys). + session.data = {'PlatformType': self.acu_config['platform'], + 'DefaultScanParams': self.scan_params, + 'StatusResponseRate': 0., + 'IgnoredAxes': self.ignore_axes, + 'NamedPositions': self.named_positions, + 'connected': False} + not_data_keys = list(session.data.keys()) + + last_complaint = 0 + while True: + try: + version = yield self.acu_read.http.Version() + break + except Exception as e: + if time.time() - last_complaint > 3600: + errormsg = {'aculib_error_message': str(e)} + self.log.error(str(e)) + self.log.error('monitor process failed to query version! Will keep trying.') + last_complaint = time.time() + yield dsleep(10) + + self.log.info(version) + session.data['connected'] = True + + # Numbering as per ICD. + mode_key = { + 'Stop': 0, + 'Preset': 1, + 'ProgramTrack': 2, + 'Rate': 3, + 'SectorScan': 4, + 'SearchSpiral': 5, + 'SurvivalMode': 6, + 'StepTrack': 7, + 'GeoSync': 8, + 'OPT': 9, + 'TLE': 10, + 'Stow': 11, + 'StarTrack': 12, + 'SunTrack': 13, + 'MoonTrack': 14, + 'I11P': 15, + 'AutoTrack/Preset': 16, + 'AutoTrack/PositionMemory': 17, + 'AutoTrack/PT': 18, + 'AutoTrack/OPT': 19, + 'AutoTrack/PT/Search': 20, + 'AutoTrack/TLE': 21, + 'AutoTrack/TLE/Search': 22, + + # Currently we do not have ICD values for these, but they + # are included in the output of Meta. ElSync, at least, + # is a known third axis mode for the LAT. + 'ElSync': 100, + 'UnStow': 101, + 'MaintenanceStow': 102, + } + + # fault_key digital values taken from ICD (correspond to byte-encoding) + fault_key = { + 'No Fault': 0, + 'Warning': 1, + 'Fault': 2, + 'Critical': 3, + 'No Data': 4, + 'Latched Fault': 5, + 'Latched Critical Fault': 6, + } + pin_key = { + # Capitalization matches strings in ACU binary, not ICD. + # Are these needed for the SAT still? + 'Any Moving': 0, + 'All Inserted': 1, + 'All Retracted': 2, + 'Failure': 3, + } + lat_pin_key = { + # From "meta" output. + 'Moving': 0, + 'Inserted': 1, + 'Retracted': 2, + 'Error': 3, + } + tfn_key = {'None': float('nan'), + 'False': 0, + 'True': 1, + } + report_t = time.time() + report_period = 20 + n_ok = 0 + min_query_period = 0.05 # Seconds + query_t = 0 + + # Assist monitoring and logging changes in certain fields. + checkdata = [ + ('summary', 'ctime'), + ('platform_status', 'Remote_mode'), + ('summary', 'Azimuth_mode'), + ('summary', 'Elevation_mode'), + ('summary', 'Boresight_mode'), + ('corotator', 'Corotator_mode'), + ] + prev_checkdata = {k: None for g, k in checkdata} + + @inlineCallbacks + def _get_status(): + output = {} + for short, collection in [ + ('status', 'StatusDetailed'), + ('third', 'Status3rdAxis'), + ('shutter', 'StatusShutter'), + ('pointing', 'CmdPointingCorrection'), + ('hvac', 'Hvac'), + ]: + if self.datasets[short]: + output[collection] = ( + yield self.acu_read.Values(self.datasets[short])) + else: + output[collection] = {} + return output + + session.data['StatusResponseRate'] = n_ok / (query_t - report_t) + session.data.update((yield _get_status())) + qual_pacer = Pacemaker(.1) + + hvm = hvac.HvacManager() + + last_resp_rate = None + data_blocks = {} + influx_blocks = {} + unknown_fields = set() + + while session.status in ['running']: + + now = time.time() + if now - query_t < min_query_period: + yield dsleep(min_query_period - (now - query_t)) + + query_t = time.time() + if query_t > report_t + report_period: + resp_rate = n_ok / (query_t - report_t) + if last_resp_rate is None or (abs(resp_rate - last_resp_rate) + > max(0.1, last_resp_rate * .01)): + self.log.info('Data rate for "monitor" stream is now %.3f Hz' % (resp_rate)) + last_resp_rate = resp_rate + report_t = query_t + n_ok = 0 + session.data.update({'StatusResponseRate': resp_rate}) + + if qual_pacer.next_sample <= time.time(): + # Publish UDP data health feed + qual_pacer.sleep() # should be instantaneous, just update counters + bq = self._broadcast_qual + bq_offset = bq['time_offset'] + if bq_offset is None: + bq_offset = 0. + bq_ok = (bq['active'] and (now - bq['timestamp'] < 5) + and abs(bq_offset) < 1.) + block = { + 'timestamp': time.time(), + 'block_name': 'qual0', + 'data': { + 'Broadcast_stream_ok': int(bq_ok), + 'Broadcast_recv_offset': bq_offset, + } + } + self.agent.publish_to_feed('data_qual', block) + + try: + session.data.update((yield _get_status())) + session.data['connected'] = True + n_ok += 1 + last_complaint = 0 + except Exception as e: + if now - last_complaint > 3600: + errormsg = {'aculib_error_message': str(e)} + self.log.error(str(e)) + acu_error = {'timestamp': time.time(), + 'block_name': 'ACU_error', + 'data': errormsg + } + self.agent.publish_to_feed('acu_error', acu_error) + last_complaint = time.time() + session.data['connected'] = False + yield dsleep(1) + continue + + for k, v in session.data.items(): + if k in not_data_keys: + continue + + if k == 'Hvac' and len(v) > 0 and (hvm.grouped_fields is None): + # Runs when first HVAC data are received. These + # fields aren't listed explicitly in soaculib so + # they're analyzed here. + hvm.parse_fields(v) + assert len(hvm.grouped_fields['unclassified']) == 0 + self.status_field_map.update(hvm.get_block_info()) + + for (key, value) in v.items(): + try: + group, block, field = self.status_field_map[key] + except KeyError: + if key not in unknown_fields: + self.log.warn( + 'unknown status field (ignored hereafter): "%s"' % key) + unknown_fields.add(key) + continue + if block is None: + continue + # Cast value to saveable type. + if isinstance(value, bool): + value = int(value) + elif isinstance(value, int) or isinstance(value, float): + pass + elif value is None: + value = float('nan') + else: + value = str(value) + # Store. + self.data['status'][group][field] = value + + self.data['status']['summary']['ctime'] = \ + sh.timecode(self.data['status']['summary']['Time']) + + # Check for state changes in some key fields. + new_checkdata = {k: self.data['status'][g].get(k) + for g, k in checkdata} + + if new_checkdata['Remote_mode'] != prev_checkdata['Remote_mode']: + if new_checkdata['Remote_mode']: + self.log.warn('ACU now in remote mode.') + else: + self.log.warn('ACU in local mode!') + + for axis_mode, v in new_checkdata.items(): + if 'mode' not in axis_mode or 'Remote' in axis_mode: + continue + if v != prev_checkdata[axis_mode]: + self.log.info('{axis_mode} is now "{v}"', + axis_mode=axis_mode, v=v) + + if new_checkdata['ctime'] == prev_checkdata['ctime']: + self.log.warn('ACU time has not changed from previous data point!') + continue + + prev_checkdata = new_checkdata + + # influx_blocks are constructed based on refers to all + # other self.data['status'] keys. Do not add more keys to + # any self.data['status'] categories beyond this point + new_influx_blocks = {} + for category in self.data['status']: + new_influx_blocks[category] = { + 'timestamp': self.data['status']['summary']['ctime'], + 'block_name': category, + 'data': {}} + + if category != 'commands': + for statkey, statval in self.data['status'][category].items(): + if isinstance(statval, float): + influx_val = statval + elif isinstance(statval, str): + for key_map in [tfn_key, mode_key, fault_key, pin_key, + lat_pin_key]: + if statval in key_map: + influx_val = key_map[statval] + break + else: + raise ValueError('Could not convert value for %s="%s"' % + (statkey, statval)) + elif isinstance(statval, int): + if statkey in ['Year', 'Free_upload_positions']: + influx_val = float(statval) + else: + influx_val = int(statval) + new_influx_blocks[category]['data'][statkey + '_influx'] = influx_val + else: # i.e. category == 'commands': + if str(self.data['status']['commands']['Azimuth_commanded_position']) != 'nan': + acucommand_az = {'timestamp': self.data['status']['summary']['ctime'], + 'block_name': 'ACU_commanded_positions_az', + 'data': {'Azimuth_commanded_position_influx': self.data['status']['commands']['Azimuth_commanded_position']} + } + self.agent.publish_to_feed('acu_commands_influx', acucommand_az) + if str(self.data['status']['commands']['Elevation_commanded_position']) != 'nan': + acucommand_el = {'timestamp': self.data['status']['summary']['ctime'], + 'block_name': 'ACU_commanded_positions_el', + 'data': {'Elevation_commanded_position_influx': self.data['status']['commands']['Elevation_commanded_position']} + } + self.agent.publish_to_feed('acu_commands_influx', acucommand_el) + if self.acu_config['platform'] == 'satp': + if str(self.data['status']['commands']['Boresight_commanded_position']) != 'nan': + acucommand_bs = {'timestamp': self.data['status']['summary']['ctime'], + 'block_name': 'ACU_commanded_positions_boresight', + 'data': {'Boresight_commanded_position_influx': self.data['status']['commands']['Boresight_commanded_position']} + } + self.agent.publish_to_feed('acu_commands_influx', acucommand_bs) + + # Only keep blocks that have changed or have new data. + block_keys = list(new_influx_blocks.keys()) + for k in block_keys: + if k not in influx_blocks: + continue + B, N = influx_blocks[k], new_influx_blocks[k] + overdue = (N['timestamp'] - B['timestamp'] > MONITOR_MAX_TIME_DELTA) + changes = any([B['data'][_k] != _v for _k, _v in N['data'].items()]) + if overdue or changes: + continue + del new_influx_blocks[k] + + for block in new_influx_blocks.values(): + # Check that we have data (commands and corotator often don't) + if len(block['data']) > 0: + self.agent.publish_to_feed('acu_status_influx', block) + influx_blocks.update(new_influx_blocks) + + # Assemble data for aggregator ... + new_blocks = {} + for block_name, data_key, _, _ in MONITOR_STRUCTURE: + if block_name is None: + continue + new_blocks[block_name] = { + 'timestamp': self.data['status']['summary']['ctime'], + 'block_name': block_name, + 'data': self.data['status'][data_key], + } + + # Only keep blocks that have changed or have new data. + for k, _, policy, delta in MONITOR_STRUCTURE: + if k is None: + continue + B, N = data_blocks.get(k), new_blocks[k] + if len(N['data']) == 0: + del new_blocks[k] + continue + if B is None: + continue + if policy == 'tick': # always store. + continue + underdue = delta is not None and \ + (N['timestamp'] - B['timestamp'] < delta) + overdue = (N['timestamp'] - B['timestamp'] > MONITOR_MAX_TIME_DELTA) \ + and not underdue + changes = any([B['data'][_k] != _v for _k, _v in N['data'].items()]) + if (overdue and policy != 'changed') or changes and not underdue: + continue + del new_blocks[k] + + for block in new_blocks.values(): + self.agent.publish_to_feed('acu_status', block) + + data_blocks.update(new_blocks) + + return True, 'Acquisition exited cleanly.' diff --git a/pcs/agents/acu_interface/so_agent.py b/pcs/agents/acu_interface/so_agent.py new file mode 100644 index 0000000..a38927d --- /dev/null +++ b/pcs/agents/acu_interface/so_agent.py @@ -0,0 +1,3725 @@ +import argparse +import random +import struct +import time +from enum import Enum + +import numpy as np +import ocs +import soaculib as aculib +import soaculib.status_keys as status_keys +import twisted.web.client as tclient +import yaml +from autobahn.twisted.util import sleep as dsleep +from ocs import ocs_agent, site_config +from ocs.ocs_twisted import Pacemaker, TimeoutLock +from soaculib.retwisted_backend import RetwistedHttpBackend +from soaculib.twisted_backend import TwistedHttpBackend +from twisted.internet import protocol, reactor, threads +from twisted.internet.defer import DeferredList, inlineCallbacks + +from socs.agents.acu import avoidance +from socs.agents.acu import drivers as sh +from socs.agents.acu import exercisor, hvac, hwp_iface + +#: The number of free ProgramTrack positions, when stack is empty. +FULL_STACK = 10000 + + +#: Initial default scan params by platform type. +INIT_DEFAULT_SCAN_PARAMS = { + 'ccat': { + 'az_speed': 2, + 'az_accel': 1, + 'el_freq': .15, + 'turnaround_method': 'standard', + 'el_mode': None, + }, + 'satp': { + 'az_speed': 1, + 'az_accel': 1, + 'el_freq': 0, + 'turnaround_method': 'standard', + 'el_mode': None, + }, +} + + +#: Default Sun avoidance configuration blocks, by platform type. +#: Individual settings can be overridden in the platform config file. +#: The full "policy" is constructed from these settings, the +#: motion_limits, and the DEFAULT_POLICY in avoidance.py. When +#: active Sun Avoidance is not enabled, policy parameters are still +#: useful for assessing Sun safety. +INIT_SUN_CONFIGS = { + 'ccat': { + 'enabled': False, + 'exclusion_radius': 20, + 'el_horizon': 0, + 'el_dodging': True, + 'min_sun_time': 1800, + 'response_time': 7200, + }, + 'satp': { + 'enabled': True, + 'exclusion_radius': 20, + 'el_horizon': 10, + 'min_sun_time': 1800, + 'response_time': 7200, + }, +} + +#: How often to refresh to Sun Safety map (valid up to 2x this time) +SUN_MAP_REFRESH = 6 * avoidance.HOUR + + +#: Things the ACU might say when you've done things properly +OK_RESPONSES = [ + b'OK, Command executed.', + b'OK, Command send.', +] + + +# The data structure below defines how data fields are organized in +# the "status" readout of the "monitor" process. Each entry in the +# list is a tuple: +# +# (block_name, fields_key, policy, sample_period) +# +# The "block_name" is the block name in the sense of housekeeping data +# format. The "fields_key" corresponds a labeled group of ACU fields, +# as defined in soaculib.status_keys. The "policy" is a description, +# for the monitor process, of how to store the data. The values for +# policy are: +# +# - None: publish the data at least every X seconds (see +# MONITOR_MAX_TIME_DELTA), or if any of the values have changed. +# - 'tick': publish every sample (as a reference tick). +# - 'changed': publish only when values change (otherwise, stale). +# +# The sample_period is the minimum spacing between samples, even if +# values have changed. +# +# Note that *all groups found in soaculib.status_keys* must be listed +# here -- or an error will be raised on startup. To drop a block of +# data (as defined by fields_key) from the output data, set the +# block_name to None. + +#: Block names and update policy for status fields in monitor process. +MONITOR_STRUCTURE = [ + ('ACU_summary_output', 'summary', 'tick', None), + ('ACU_axis_faults', 'axis_faults_errors_overages', None, None), + ('ACU_position_errors', 'position_errors', None, None), + ('ACU_axis_limits', 'axis_limits', None, None), + ('ACU_axis_warnings', 'axis_warnings', None, None), + ('ACU_axis_failures', 'axis_failures', None, None), + ('ACU_axis_state', 'axis_state', None, None), + ('ACU_oscillation_alarm', 'osc_alarms', None, None), + ('ACU_command_status', 'commands', None, None), + ('ACU_general_errors', 'ACU_failures_errors', None, None), + ('ACU_platform_status', 'platform_status', None, None), + ('ACU_emergency', 'ACU_emergency', None, None), + ('ACU_corotator', 'corotator', None, None), + ('ACU_shutter', 'shutter', None, None), + ('ACU_tilt', 'tilt_slow', 'changed', 0.5), + (None, 'tilt_fast', None, None), + ('ACU_sun_avoidance', 'sun_avoidance', None, 1.), + ('ACU_corrections', 'corrections', None, 10.), + ('ACU_hvac_data', 'hvac_data', None, 10.), + ('ACU_hvac_ctrl', 'hvac_ctrl', None, None), + ('ACU_hvac_faults', 'hvac_faults', None, None), +] + + +#: Maximum update time (in s) for "monitor" process data, even with no changes +MONITOR_MAX_TIME_DELTA = 2. + + +class ACUAgent: + """Agent to acquire data from an ACU and control telescope pointing with the + ACU. + + Parameters: + acu_config (str): + The configuration for the ACU, as referenced in aculib.configs. + Default value is 'guess'. + startup (bool): + If True, immediately start the main monitoring processes + for status and UDP data. + ignore_axes (list of str): + List of axes to "ignore". "ignore" means that the axis + will not be commanded. If a user requests an action that + would otherwise move the axis, it is not moved but the + action is assumed to have succeeded. The values in this + list should be drawn from "az", "el", "third", and "none". + This argument *replaces* the setting from the config file. + ("none" entries will simply be ignored.) + disable_idle_reset (bool): + If True, don't auto-start idle_reset process for LAT. + disable_sun_avoidance (bool): If set, start up with Sun + Avoidance completely disabled. + disable_hwp_interlocks (bool): If set, start up with HWP + Interlocks disabled. + min_el (float): If not None, override the default configured + elevation lower limit. + max_el (float): If not None, override the default configured + elevation upper limit. + + """ + + def __init__(self, agent, acu_config='guess', + startup=False, ignore_axes=None, + disable_idle_reset=False, + disable_sun_avoidance=False, + disable_hwp_interlocks=False, + min_el=None, max_el=None, + ): + + # Agent support + + self.agent = agent + self.log = agent.log + + # Separate locks for exclusive access to az/el, and boresight motions. + self.azel_lock = TimeoutLock() + self.boresight_lock = TimeoutLock() + + # Config file processing + + self.acu_config_name = acu_config + self.acu_config = aculib.guess_config(acu_config) + self.platform_type = self.acu_config['platform'] # ccat, satp. + + # List of datasets to read as "status". The 'status' dataset + # is necessary; the 'third' axis can be None (in SATP all the + # boresight info is included in the status dataset); the + # 'shutter' is for LAT shutter and 'pointing' is for LAT + # tiltmeter. + _dsets = self.acu_config['_datasets'] + self.datasets = { + 'status': _dsets.get('default_dataset'), + 'third': _dsets.get('third_axis_dataset'), + 'shutter': _dsets.get('shutter_dataset'), + 'pointing': _dsets.get('pointing_dataset'), + 'hvac': _dsets.get('hvac_dataset'), + } + for k, v in self.datasets.items(): + if v is not None: + self.datasets[k] = dict(_dsets['datasets'])[v] + + # Create a map from each status key (read through the + # self.datasets) to the output block and field name. + self.status_field_map = {} + for group, group_fields in \ + status_keys.status_fields[self.platform_type]['status_fields'].items(): + for block_name, block_group, _, _ in MONITOR_STRUCTURE: + if block_group == group: + break + else: + raise ValueError(f"status_key block '{group}' not found in MONITOR_STRUCTURE.") + for acu_key, block_key in group_fields.items(): + self.status_field_map[acu_key] = (group, block_name, block_key) + + # Config file + overrides processing + + # Motion limits (az / el / third ranges). + self.motion_limits = self.acu_config['motion_limits'] + if min_el: + self.log.warn(f'Override: min_el={min_el}') + self.motion_limits['elevation']['lower'] = min_el + if max_el: + self.log.warn(f'Override: max_el={max_el}') + self.motion_limits['elevation']['upper'] = max_el + + # Sun avoidance (must be set up *after* finalizing motion limits) + self.sun_config = INIT_SUN_CONFIGS[self.platform_type] + self.sun_config.update(self.acu_config.get('sun_avoidance', {})) + if disable_sun_avoidance: + self.sun_config['enabled'] = False + self.log.info('On startup, sun_config={sun_config}', + sun_config=self.sun_config) + self._reset_sun_params() + + # Scan params (default vel / accel / el freq). + self.default_scan_params = \ + dict(INIT_DEFAULT_SCAN_PARAMS[self.platform_type]) + for _k in self.default_scan_params.keys(): + _v = self.acu_config.get('scan_params', {}).get(_k) + if _v is not None: + self.default_scan_params[_k] = _v + agent.log.info('On startup, default scan_params={scan_params}', + scan_params=self.default_scan_params) + self.scan_params = dict(self.default_scan_params) + + # Axes to ignore. + self.ignore_axes = self.acu_config.get('ignore_axes', []) + if ignore_axes is not None: + self.ignore_axes = [x for x in ignore_axes if x != 'none'] + if len(self.ignore_axes): + assert all([x in ['az', 'el', 'third'] for x in self.ignore_axes]) + agent.log.warn('Note ignore_axes={i}', i=self.ignore_axes) + + # Named positions. + self.named_positions = self.acu_config.get('named_positions', {}) + for k, v in self.named_positions.items(): + agent.log.info(f'Using named position {k}: {v[0]},{v[1]}') + try: + str(k), float(v[0]), float(v[1]) + except Exception: + agent.log.error('Failed to parse named position "{k}"', k=k) + + # HWP interlocks. + self.hwp_rules = hwp_iface.HWPInterlocks.from_dict( + self.acu_config.get('hwp_interlocks')) + if disable_hwp_interlocks: + self.hwp_rules.enabled = False + startup_monitor_hwp = (startup and self.hwp_rules.configured) + + # Exercise plan. + self.exercise_plan = self.acu_config.get('exercise_plan') + + # Other flags. + startup_idle_reset = (self.platform_type in ['lat', 'ccat'] + and not disable_idle_reset) + + # The connections to the ACU. + + tclient._HTTP11ClientFactory.noisy = False + + self.acu_control = aculib.AcuControl( + acu_config, backend=RetwistedHttpBackend(persistent=True)) + self.acu_read = aculib.AcuControl( + acu_config, backend=TwistedHttpBackend(persistent=True), readonly=True) + + # Structures for passing status data around + + # self.data provides a place to reference data from the monitors. + # 'status' is populated by the monitor operation + # 'broadcast' is populated by the udp_monitor operation + # 'hwp' is populated by the monitor_hwp operation + + self.data = { + 'status': {}, + 'broadcast': {}, + 'hwp': {}, + } + for b, k, _, _ in MONITOR_STRUCTURE: + if b is None: + continue + self.data['status'][k] = {} + + # Structure for the broadcast process to communicate state to + # the monitor process, for a data quality feed. + self._broadcast_qual = { + 'timestamp': time.time(), + 'active': False, + 'time_offset': 0, + } + + # Task, Process, Feed registration. + + agent.register_process('monitor', + self.monitor, + self._simple_process_stop, + blocking=False, + startup=startup) + agent.register_process('broadcast', + self.broadcast, + self._simple_process_stop, + blocking=False, + startup=startup) + if 'ext' in self.acu_control.streams: + agent.register_process('broadcast_ext', + self.broadcast_ext, + self._simple_process_stop, + blocking=False, + startup=False) + agent.register_process('monitor_sun', + self.monitor_sun, + self._simple_process_stop, + blocking=False, + startup=startup) + agent.register_process('monitor_hwp', + self.monitor_hwp, + self._simple_process_stop, + blocking=False, + startup=startup_monitor_hwp) + agent.register_process('generate_scan', + self.generate_scan, + self._simple_process_stop, + blocking=False, + startup=False) + agent.register_process('idle_reset', + self.idle_reset, + self._simple_process_stop, + blocking=False, + startup=startup_idle_reset) + agent.register_process('fromfile_scan', + self.fromfile_scan, + self._simple_process_stop, + blocking=False) + + basic_agg_params = {'frame_length': 60} + fullstatus_agg_params = {'frame_length': 60, + 'exclude_influx': True, + 'exclude_aggregator': False + } + influx_agg_params = {'frame_length': 60, + 'exclude_influx': False, + 'exclude_aggregator': True + } + agent.register_feed('acu_status', + record=True, + agg_params=fullstatus_agg_params, + buffer_time=1) + agent.register_feed('acu_status_influx', + record=True, + agg_params=influx_agg_params, + buffer_time=1) + agent.register_feed('acu_commands_influx', + record=True, + agg_params=influx_agg_params, + buffer_time=1) + agent.register_feed('acu_udp_stream', + record=True, + agg_params=fullstatus_agg_params, + buffer_time=1) + agent.register_feed('acu_broadcast_influx', + record=True, + agg_params=influx_agg_params, + buffer_time=1) + if 'ext' in self.acu_control.streams: + agent.register_feed('acu_ext_stream', + record=True, + agg_params=fullstatus_agg_params, + buffer_time=1) + agent.register_feed('acu_ext_influx', + record=True, + agg_params=influx_agg_params, + buffer_time=1) + agent.register_feed('acu_error', + record=True, + agg_params=basic_agg_params, + buffer_time=1) + agent.register_feed('sun', + record=True, + agg_params=basic_agg_params, + buffer_time=0) + agent.register_feed('data_qual', + record=True, + agg_params=basic_agg_params, + buffer_time=0) + agent.register_feed('scan_params', + record=True, + agg_params=basic_agg_params, + buffer_time=0) + agent.register_task('go_to', + self.go_to, + blocking=False, + aborter=self._simple_task_abort) + agent.register_task('go_to_named', + self.go_to_named, + blocking=False) + agent.register_task('set_scan_params', + self.set_scan_params, + blocking=False) + agent.register_task('set_boresight', + self.set_boresight, + blocking=False, + aborter=self._simple_task_abort) + agent.register_task('set_speed_mode', + self.set_speed_mode, + blocking=False) + agent.register_task('stop_and_clear', + self.stop_and_clear, + blocking=False) + agent.register_task('clear_faults', + self.clear_faults, + blocking=False) + agent.register_task('special_action', + self.special_action, + blocking=False, + aborter=self._simple_task_abort) + agent.register_task('update_sun', + self.update_sun, + blocking=False) + agent.register_task('escape_sun_now', + self.escape_sun_now, + blocking=False, + aborter=self._simple_task_abort) + if self.datasets['shutter']: + agent.register_task('set_shutter', + self.set_shutter, + blocking=False, + aborter=self._simple_task_abort) + agent.register_task('update_hwp', + self.update_hwp, + blocking=False) + + # Automatic exercise program... + if self.exercise_plan: + agent.register_process( + 'exercise', self.exercise, self._simple_process_stop, + stopper_blocking=False) + # Use longer default frame length ... very low volume feed. + agent.register_feed('activity', + record=True, + buffer_time=0, + agg_params={ + 'frame_length': 600, + }) + + @inlineCallbacks + def _simple_task_abort(self, session, params): + # Trigger a task abort by updating state to "stopping" + yield session.set_status('stopping') + + @inlineCallbacks + def _simple_process_stop(self, session, params): + # Trigger a process stop by updating state to "stopping" + yield session.set_status('stopping') + + @ocs_agent.param('_') + @inlineCallbacks + def idle_reset(self, session, params): + """idle_reset() + + **Process** - To prevent LAT from going into Survival mode, + do something on the command interface every so often. (The + default inactivity timeout is 1 minute.) + + """ + IDLE_RESET_TIMEOUT = 60 # The watchdog timeout in ACU + + next_action = 0 + + while session.status in ['starting', 'running']: + if time.time() < next_action: + yield dsleep(IDLE_RESET_TIMEOUT / 10) + continue + success = True + try: + yield self.acu_control.http.Values(self.datasets['status']) + except Exception as e: + self.log.info(' -- failed to reset Idle Stow time: {err}', err=e) + success = False + session.data.update({ + 'timestamp': time.time(), + 'reset_ok': success}) + if not success: + next_action = time.time() + 4 + else: + next_action = time.time() + IDLE_RESET_TIMEOUT / 2 + + return True, 'Process "idle_reset" exited cleanly.' + + @inlineCallbacks + def monitor(self, session, params): + """monitor() + + **Process** - Refresh the cache of SATP ACU status information and + report it on the 'acu_status' and 'acu_status_influx' HK feeds. + + Summary parameters are ACU-provided time code, Azimuth mode, + Azimuth position, Azimuth velocity, Elevation mode, Elevation position, + Elevation velocity, Boresight mode, and Boresight position. + + The session.data of this process is a nested dictionary. + Here's an example:: + + { + "StatusDetailed": { + "Time": 81.661170959322, + "Year": 2023, + "Azimuth mode": "Stop", + "Azimuth commanded position": -20.0012, + "Azimuth current position": -20.0012, + "Azimuth current velocity": 0.0002, + "Azimuth average position error": 0, + "Azimuth peak position error": 0, + "Azimuth computer disabled": false, + ... + }, + "Status3rdAxis": { + "3rd axis Mode": "Stop", + "3rd axis commanded position": 77, + "3rd axis current position": 77, + "3rd axis computer disabled": "No Fault", + ... + }, + "StatusShutter": { + "Shutter Closed": false, + ... + }, + "Hvac": { + "Booster EL Housing Failure": false, + "Booster EL Housing on": false, + ... + }, + "StatusResponseRate": 19.237531827325963, + "PlatformType": "satp", + "IgnoredAxes": [], + "NamedPositions": { + "home": [ + 180, + 40 + ] + }, + "DefaultScanParams": { + "az_speed": 2.0, + "az_accel": 1.0, + }, + "connected": True, + } + + Differences between SATP and LAT structures: + + - The PlatformType reports "satp" for SATP and "ccat" for LAT. + - In the case of an SATP, the Status3rdAxis is not populated; + the Boresight info can be found in StatusDetailed. In the + case of the LAT, the corotator info is queried separately + and stored under Status3rdAxis. + - The StatusShutter and Hvac entries will be populated for the + LAT, but empty for SATP. + + """ + + # Note that session.data will get scanned, to assign data to + # feed blocks. We make an explicit list of items to ignore + # during that scan (not_data_keys). + session.data = {'PlatformType': self.acu_config['platform'], + 'DefaultScanParams': self.scan_params, + 'StatusResponseRate': 0., + 'IgnoredAxes': self.ignore_axes, + 'NamedPositions': self.named_positions, + 'connected': False} + not_data_keys = list(session.data.keys()) + + last_complaint = 0 + while True: + try: + version = yield self.acu_read.http.Version() + break + except Exception as e: + if time.time() - last_complaint > 3600: + errormsg = {'aculib_error_message': str(e)} + self.log.error(str(e)) + self.log.error('monitor process failed to query version! Will keep trying.') + last_complaint = time.time() + yield dsleep(10) + + self.log.info(version) + session.data['connected'] = True + + # Numbering as per ICD. + mode_key = { + 'Stop': 0, + 'Preset': 1, + 'ProgramTrack': 2, + 'Rate': 3, + 'SectorScan': 4, + 'SearchSpiral': 5, + 'SurvivalMode': 6, + 'StepTrack': 7, + 'GeoSync': 8, + 'OPT': 9, + 'TLE': 10, + 'Stow': 11, + 'StarTrack': 12, + 'SunTrack': 13, + 'MoonTrack': 14, + 'I11P': 15, + 'AutoTrack/Preset': 16, + 'AutoTrack/PositionMemory': 17, + 'AutoTrack/PT': 18, + 'AutoTrack/OPT': 19, + 'AutoTrack/PT/Search': 20, + 'AutoTrack/TLE': 21, + 'AutoTrack/TLE/Search': 22, + + # Currently we do not have ICD values for these, but they + # are included in the output of Meta. ElSync, at least, + # is a known third axis mode for the LAT. + 'ElSync': 100, + 'UnStow': 101, + 'MaintenanceStow': 102, + } + + # fault_key digital values taken from ICD (correspond to byte-encoding) + fault_key = { + 'No Fault': 0, + 'Warning': 1, + 'Fault': 2, + 'Critical': 3, + 'No Data': 4, + 'Latched Fault': 5, + 'Latched Critical Fault': 6, + } + pin_key = { + # Capitalization matches strings in ACU binary, not ICD. + # Are these needed for the SAT still? + 'Any Moving': 0, + 'All Inserted': 1, + 'All Retracted': 2, + 'Failure': 3, + } + lat_pin_key = { + # From "meta" output. + 'Moving': 0, + 'Inserted': 1, + 'Retracted': 2, + 'Error': 3, + } + tfn_key = {'None': float('nan'), + 'False': 0, + 'True': 1, + } + report_t = time.time() + report_period = 20 + n_ok = 0 + min_query_period = 0.05 # Seconds + query_t = 0 + + # Assist monitoring and logging changes in certain fields. + checkdata = [ + ('summary', 'ctime'), + ('platform_status', 'Remote_mode'), + ('summary', 'Azimuth_mode'), + ('summary', 'Elevation_mode'), + ('summary', 'Boresight_mode'), + ('corotator', 'Corotator_mode'), + ] + prev_checkdata = {k: None for g, k in checkdata} + + @inlineCallbacks + def _get_status(): + output = {} + for short, collection in [ + ('status', 'StatusDetailed'), + ('third', 'Status3rdAxis'), + ('shutter', 'StatusShutter'), + ('pointing', 'CmdPointingCorrection'), + ('hvac', 'Hvac'), + ]: + if self.datasets[short]: + output[collection] = ( + yield self.acu_read.Values(self.datasets[short])) + else: + output[collection] = {} + return output + + session.data['StatusResponseRate'] = n_ok / (query_t - report_t) + session.data.update((yield _get_status())) + qual_pacer = Pacemaker(.1) + + hvm = hvac.HvacManager() + + last_resp_rate = None + data_blocks = {} + influx_blocks = {} + unknown_fields = set() + + while session.status in ['running']: + + now = time.time() + if now - query_t < min_query_period: + yield dsleep(min_query_period - (now - query_t)) + + query_t = time.time() + if query_t > report_t + report_period: + resp_rate = n_ok / (query_t - report_t) + if last_resp_rate is None or (abs(resp_rate - last_resp_rate) + > max(0.1, last_resp_rate * .01)): + self.log.info('Data rate for "monitor" stream is now %.3f Hz' % (resp_rate)) + last_resp_rate = resp_rate + report_t = query_t + n_ok = 0 + session.data.update({'StatusResponseRate': resp_rate}) + + if qual_pacer.next_sample <= time.time(): + # Publish UDP data health feed + qual_pacer.sleep() # should be instantaneous, just update counters + bq = self._broadcast_qual + bq_offset = bq['time_offset'] + if bq_offset is None: + bq_offset = 0. + bq_ok = (bq['active'] and (now - bq['timestamp'] < 5) + and abs(bq_offset) < 1.) + block = { + 'timestamp': time.time(), + 'block_name': 'qual0', + 'data': { + 'Broadcast_stream_ok': int(bq_ok), + 'Broadcast_recv_offset': bq_offset, + } + } + self.agent.publish_to_feed('data_qual', block) + + try: + session.data.update((yield _get_status())) + session.data['connected'] = True + n_ok += 1 + last_complaint = 0 + except Exception as e: + if now - last_complaint > 3600: + errormsg = {'aculib_error_message': str(e)} + self.log.error(str(e)) + acu_error = {'timestamp': time.time(), + 'block_name': 'ACU_error', + 'data': errormsg + } + self.agent.publish_to_feed('acu_error', acu_error) + last_complaint = time.time() + session.data['connected'] = False + yield dsleep(1) + continue + + for k, v in session.data.items(): + if k in not_data_keys: + continue + + if k == 'Hvac' and len(v) > 0 and (hvm.grouped_fields is None): + # Runs when first HVAC data are received. These + # fields aren't listed explicitly in soaculib so + # they're analyzed here. + hvm.parse_fields(v) + assert len(hvm.grouped_fields['unclassified']) == 0 + self.status_field_map.update(hvm.get_block_info()) + + for (key, value) in v.items(): + try: + group, block, field = self.status_field_map[key] + except KeyError: + if key not in unknown_fields: + self.log.warn( + 'unknown status field (ignored hereafter): "%s"' % key) + unknown_fields.add(key) + continue + if block is None: + continue + # Cast value to saveable type. + if isinstance(value, bool): + value = int(value) + elif isinstance(value, int) or isinstance(value, float): + pass + elif value is None: + value = float('nan') + else: + value = str(value) + # Store. + self.data['status'][group][field] = value + + self.data['status']['summary']['ctime'] = \ + sh.timecode(self.data['status']['summary']['Time']) + + # Check for state changes in some key fields. + new_checkdata = {k: self.data['status'][g].get(k) + for g, k in checkdata} + + if new_checkdata['Remote_mode'] != prev_checkdata['Remote_mode']: + if new_checkdata['Remote_mode']: + self.log.warn('ACU now in remote mode.') + else: + self.log.warn('ACU in local mode!') + + for axis_mode, v in new_checkdata.items(): + if 'mode' not in axis_mode or 'Remote' in axis_mode: + continue + if v != prev_checkdata[axis_mode]: + self.log.info('{axis_mode} is now "{v}"', + axis_mode=axis_mode, v=v) + + if new_checkdata['ctime'] == prev_checkdata['ctime']: + self.log.warn('ACU time has not changed from previous data point!') + continue + + prev_checkdata = new_checkdata + + # influx_blocks are constructed based on refers to all + # other self.data['status'] keys. Do not add more keys to + # any self.data['status'] categories beyond this point + new_influx_blocks = {} + for category in self.data['status']: + new_influx_blocks[category] = { + 'timestamp': self.data['status']['summary']['ctime'], + 'block_name': category, + 'data': {}} + + if category != 'commands': + for statkey, statval in self.data['status'][category].items(): + if isinstance(statval, float): + influx_val = statval + elif isinstance(statval, str): + for key_map in [tfn_key, mode_key, fault_key, pin_key, + lat_pin_key]: + if statval in key_map: + influx_val = key_map[statval] + break + else: + raise ValueError('Could not convert value for %s="%s"' % + (statkey, statval)) + elif isinstance(statval, int): + if statkey in ['Year', 'Free_upload_positions']: + influx_val = float(statval) + else: + influx_val = int(statval) + new_influx_blocks[category]['data'][statkey + '_influx'] = influx_val + else: # i.e. category == 'commands': + if str(self.data['status']['commands']['Azimuth_commanded_position']) != 'nan': + acucommand_az = {'timestamp': self.data['status']['summary']['ctime'], + 'block_name': 'ACU_commanded_positions_az', + 'data': {'Azimuth_commanded_position_influx': self.data['status']['commands']['Azimuth_commanded_position']} + } + self.agent.publish_to_feed('acu_commands_influx', acucommand_az) + if str(self.data['status']['commands']['Elevation_commanded_position']) != 'nan': + acucommand_el = {'timestamp': self.data['status']['summary']['ctime'], + 'block_name': 'ACU_commanded_positions_el', + 'data': {'Elevation_commanded_position_influx': self.data['status']['commands']['Elevation_commanded_position']} + } + self.agent.publish_to_feed('acu_commands_influx', acucommand_el) + if self.acu_config['platform'] == 'satp': + if str(self.data['status']['commands']['Boresight_commanded_position']) != 'nan': + acucommand_bs = {'timestamp': self.data['status']['summary']['ctime'], + 'block_name': 'ACU_commanded_positions_boresight', + 'data': {'Boresight_commanded_position_influx': self.data['status']['commands']['Boresight_commanded_position']} + } + self.agent.publish_to_feed('acu_commands_influx', acucommand_bs) + + # Only keep blocks that have changed or have new data. + block_keys = list(new_influx_blocks.keys()) + for k in block_keys: + if k not in influx_blocks: + continue + B, N = influx_blocks[k], new_influx_blocks[k] + overdue = (N['timestamp'] - B['timestamp'] > MONITOR_MAX_TIME_DELTA) + changes = any([B['data'][_k] != _v for _k, _v in N['data'].items()]) + if overdue or changes: + continue + del new_influx_blocks[k] + + for block in new_influx_blocks.values(): + # Check that we have data (commands and corotator often don't) + if len(block['data']) > 0: + self.agent.publish_to_feed('acu_status_influx', block) + influx_blocks.update(new_influx_blocks) + + # Assemble data for aggregator ... + new_blocks = {} + for block_name, data_key, _, _ in MONITOR_STRUCTURE: + if block_name is None: + continue + new_blocks[block_name] = { + 'timestamp': self.data['status']['summary']['ctime'], + 'block_name': block_name, + 'data': self.data['status'][data_key], + } + + # Only keep blocks that have changed or have new data. + for k, _, policy, delta in MONITOR_STRUCTURE: + if k is None: + continue + B, N = data_blocks.get(k), new_blocks[k] + if len(N['data']) == 0: + del new_blocks[k] + continue + if B is None: + continue + if policy == 'tick': # always store. + continue + underdue = delta is not None and \ + (N['timestamp'] - B['timestamp'] < delta) + overdue = (N['timestamp'] - B['timestamp'] > MONITOR_MAX_TIME_DELTA) \ + and not underdue + changes = any([B['data'][_k] != _v for _k, _v in N['data'].items()]) + if (overdue and policy != 'changed') or changes and not underdue: + continue + del new_blocks[k] + + for block in new_blocks.values(): + self.agent.publish_to_feed('acu_status', block) + + data_blocks.update(new_blocks) + + return True, 'Acquisition exited cleanly.' + + @ocs_agent.param('auto_enable', type=bool, default=True) + def broadcast(self, session, params): + """broadcast(auto_enable=True) + + **Process** - Read UDP data from the port specified by + self.acu_config, decode it, and publish to HK feeds. Full + resolution (200 Hz) data are written to feed "acu_udp_stream" + while 1 Hz decimated are written to "acu_broadcast_influx". + The 1 Hz decimated output are also stored in session.data. + + Args: + auto_enable (bool): If True, the Process will try to + configure and (re-)enable the UDP stream if at any point + the stream seems to drop out. + + Notes: + The session.data looks like this (this is for a SATP running + with servo details in the UDP output):: + + { + "Time": 1679499948.8234625, + "Corrected_Azimuth": -20.00112176010607, + "Corrected_Elevation": 50.011521050839434, + "Corrected_Boresight": 29.998428712246067, + "Raw_Azimuth": -20.00112176010607, + "Raw_Elevation": 50.011521050839434, + "Raw_Boresight": 29.998428712246067, + "Azimuth_Current_1": -0.000384521484375, + "Azimuth_Current_2": -0.0008331298828125, + "Elevation_Current_1": 0.003397979736328125, + "Boresight_Current_1": -0.000483856201171875, + "Boresight_Current_2": -0.000105743408203125, + "Azimuth_Vel_1": -0.000002288818359375, + "Azimuth_Vel_2": 0, + "Az_Vel_Act": -0.0000011444091796875, + "Az_Vel_Des": 0, + "Az_Vffw": 0, + "Az_Pos_Des": -20.00112176010607, + "Az_Pos_Err": 0 + } + + """ + control = self.acu_control.streams['main'] + return self._udp_stream_handler( + session, 'main', control, self.data['broadcast'], + 'acu_udp_stream', 'acu_broadcast_influx', + auto_enable=params['auto_enable'], + influx_suffix='_bcast_influx') + + @ocs_agent.param('auto_enable', type=bool, default=True) + def broadcast_ext(self, session, params): + """broadcast_ext(auto_enable=True) + + **Process** - Read UDP data from the "ext" UDP stream, as + defined in self.acu_config. Like the broadcast process, this + will write full rate data to an aggregator feed and + downsampled data to an influx feed. + + Args: + auto_enable (bool): If True, the Process will try to + configure and (re-)enable the UDP stream if at any point + the stream seems to drop out. + + Notes: + The session.data is as you would find for the broadcast + process with a "Time" field and a bunch of readings, updated + about once per second. + + """ + control = self.acu_control.streams['ext'] + data_store = {} + return self._udp_stream_handler( + session, 'ext', control, data_store, + 'acu_ext_stream', 'acu_ext_influx', + auto_enable=params['auto_enable'], + influx_suffix='_ext') + + @inlineCallbacks + def _udp_stream_handler(self, session, stream_name, stream_control, + data_store, agg_feed, influx_feed, + auto_enable=True, influx_suffix=''): + """Collect data from UDP (200 Hz) stream. This is a helper + function that can be used to monitor either PositionBroadcast + or PositionBroadcastExt. + + Args: + session: session object for parent Process + stream_name (str): stream identifier string for log messages + stream_control: soaculib BroadcastStreamControl instance + data_store (dict): place to put the latest readings + agg_feed (str): feed name for full rate data + influx_feed (str): feed name for downsampled data + auto_enable (bool): whether to use http API to turn stream on/off + if needed. + influx_suffix (str): suffix to append to all influx fields. + + """ + session.data = {} + + UDP_PORT = stream_control.p['Port'] + schema = stream_control.p['schema'] + + # For unpacking + FMT = schema['format'] + FMT_LEN = struct.calcsize(FMT) + + # Confirm that first two fields are the timecode. + fields = list(schema['fields']) + assert fields[:2] == ['Day', 'Time'] + fields = [f.replace(' ', '_') for f in fields[2:]] + + # The udp_data list is used as a queue; it contains + # struct-unpacked samples from the UDP stream in the form + # (time_received, data). + udp_data = [] + + class MonitorUDP(protocol.DatagramProtocol): + def datagramReceived(self, data, src_addr): + now = time.time() + host, port = src_addr + offset = 0 + while len(data) - offset >= FMT_LEN: + d = struct.unpack(FMT, data[offset:offset + FMT_LEN]) + udp_data.append((now, d)) + offset += FMT_LEN + + handler = reactor.listenUDP(int(UDP_PORT), MonitorUDP()) + + influx_data = {k: [] for k in ['Time'] + fields} + + best_dt = None + + active = True + last_packet_time = time.time() + + while session.status in ['running']: + now = time.time() + + if len(udp_data) >= 200: + if not active: + self.log.info(f'UDP packets are being received [{stream_name}].') + active = True + last_packet_time = now + best_dt = None + + process_data = udp_data[:200] + udp_data = udp_data[200:] + for recv_time, d in process_data: + time_d, fields_d = d[:2], d[2:] + data_ctime = sh.timecode(time_d[0] + time_d[1] / sh.DAY) + if best_dt is None or abs(recv_time - data_ctime) < best_dt: + best_dt = recv_time - data_ctime + + data_store['Time'] = data_ctime + influx_data['Time'].append(data_ctime) + for _f, _d in zip(fields, fields_d): + data_store[_f] = _d + influx_data[_f].append(_d) + acu_udp_stream = {'timestamp': data_store['Time'], + 'block_name': 'ACU_broadcast', + 'data': data_store + } + self.agent.publish_to_feed(agg_feed, acu_udp_stream) + influx_means = {} + for key, vals in influx_data.items(): + influx_means[key] = np.mean(vals) + influx_data[key] = [] + acu_broadcast_influx = { + 'timestamp': influx_means['Time'], + 'block_name': 'ACU_bcast_influx', + 'data': {k + influx_suffix: v for k, v in influx_means.items()}, + } + self.agent.publish_to_feed(influx_feed, acu_broadcast_influx) + session.data.update(influx_means) + else: + # Consider logging an outage, attempting reconfig. + if active and now - last_packet_time > 3: + self.log.info(f'No UDP packets are being received [{stream_name}].') + active = False + next_reconfig = time.time() + if not active and auto_enable and next_reconfig <= time.time(): + self.log.info(f'Requesting UDP stream enable [{stream_name}].') + try: + cfg, raw = yield stream_control.safe_enable() + except Exception as err: + self.log.info('Exception while trying to enable stream' + '[{stream_name}]: {err}', stream_name=stream_name, err=err) + next_reconfig += 60 + + self._broadcast_qual = { + 'timestamp': now, + 'active': active, + 'time_offset': best_dt, + } + yield dsleep(.01) + + handler.stopListening() + return True, 'Acquisition exited cleanly.' + + @inlineCallbacks + def _check_daq_streams(self, stream): + yield + session = self.agent.sessions[stream] + if session.status != 'running': + self.log.warn("Process '%s' is not running" % stream) + return False + if stream == 'broadcast': + timestamp = self.data['broadcast'].get('Time') + else: + timestamp = self.data['status']['summary'].get('ctime') + if timestamp is None: + self.log.warn('%s daq stream has no data yet.' % stream) + return False + delta = time.time() - timestamp + if delta > 2: + self.log.warn(f'{stream} daq stream has old data ({delta} seconds)') + return False + return True + + def _current_azel(self): + try: + az0, el0 = [self.data['status']['summary'][f'{ax}_current_position'] + for ax in ['Azimuth', 'Elevation']] + if az0 is None or el0 is None: + raise KeyError + except KeyError: + return (None, None), 'Current position could not be determined.' + return (az0, el0), f'Current (az, el) = ({az0:.4f},{el0:.4f})' + + @inlineCallbacks + def _check_ready_motion(self, session): + bcast_check = yield self._check_daq_streams('broadcast') + if not bcast_check: + return False, 'Motion blocked; problem with "broadcast" data acq process.' + + monitor_check = yield self._check_daq_streams('monitor') + if not monitor_check: + return False, 'Motion blocked; problem with "monitor" data acq process.' + + if self.data['status']['platform_status']['Remote_mode'] == 0: + self.log.warn('ACU in local mode, cannot perform motion with OCS.') + return False, 'ACU not in remote mode.' + + return True, 'Agent state ok for motion.' + + @inlineCallbacks + def _set_modes(self, az=None, el=None, third=None): + """Helper for changing individual axis modes. Respects ignore_axes. + + When setting one axis it is often necessary to write others as + well. The current mode is first queried, and written back + unmodified. + + """ + modes = list((yield self.acu_control.mode(size=3))) + changes = [False, False, False] + for i, (k, v) in enumerate([('az', az), ('el', el), ('third', third)]): + if k not in self.ignore_axes and v is not None: + changes[i] = True + modes[i] = v + if not any(changes): + return + if not changes[2]: + yield self.acu_control.mode(modes[:2]) + else: + yield self.acu_control.mode(modes) + + @inlineCallbacks + def _stop(self, all_axes=False): + """Helper for putting all axes in Stop. This will normally just issue + acu_control.stop(); but if any axes are being "ignored", and + the user has not passed all_axes=True, then it will avoid + changing the mode of those axes. + + """ + if all_axes or len(self.ignore_axes) == 0: + yield self.acu_control.stop() + return + yield self._set_modes('Stop', 'Stop', 'Stop') + + def _get_limit_func(self, axis): + """Construct a function limit(x) that will enforce that x is within + the configured limits for axis. Returns the funcion and the + tuple of limits (lower, upper). + + """ + if axis == 'az': + axis = 'azimuth' + elif axis == 'el': + axis = 'elevation' + limits = self.motion_limits[axis.lower()] + limits = limits['lower'], limits['upper'] + + def limit_func(target): + return max(min(target, limits[1]), limits[0]) + return limit_func, limits + + @inlineCallbacks + def _go_to_axis(self, session, axis, target, + state_feedback=None): + """Execute a movement, using "Preset" mode, on a specific axis. + + Args: + session: session object variable of the parent operation. + axis (str): one of 'Azimuth', 'Elevation', 'Boresight'. + target (float): target position. + state_feedback (dict): place to record state (see notes). + + Returns: + ok (bool): True if the motion completed successfully and + arrived at target position. + msg (str): success/error message. + + Notes: + This has various checks to ensure the movement executes as + expected and in a timely fashion. In the case that the + warning horn sounds, this function should block until that + completes, even if the requested position has been achieved + (i.e. no actual motion was needed). + + The state_feedback may be used to pipeline the initial parts + of the movement, so two functions aren't trying to command + at the same time. The ``state_feedback`` dict should be + passed in initialized with ``{'state': 'init'}``. When + initial commanding is finished, this function will update it + to `state="wait"`, and then on completion to `state="done"`. + + """ + # Step time in event loop. + TICK_TIME = 0.1 + + # Time for which to sample distance for "still" and "moving" + # conditions. + PROFILE_TIME = 1. + + # When aborting, how many seconds to use to project a good + # stopping position (d = v*t) + ABORT_TIME = 2. + + # Threshold (deg) for declaring that we've reached + # destination. + THERE_YET = 0.01 + + # How long to wait after initiation for signs of motion, + # before giving up. This is normally within 2 or 3 seconds + # (SATP), but in "cold" cases where siren needs to sound, this + # can be as long as 12 seconds. For the LAT, can take an + # extra couple seconds if there were faults to clear. + MAX_STARTUP_TIME = 15. + + # How long does it take to sound the warning horn? It takes + # 10 seconds. Don't wait longer than this. + WARNING_HORN_TOO_LONG = 15. + + # How long after mode change to Preset should we expect to see + # brakes released, except in case that warning horn is + # sounding? 3 seconds should be enough. + WARNING_HORN_DETECT = 3. + + # Velocity to assume when computing maximum time a move should + # take (to bail out in unforeseen circumstances). There are + # other checks in place to catch when the platform has not + # started moving or has stopped at the wrong place. So the + # timeout computed from this should only activate in cases + # where some other commander has taken over and then kept the + # platform moving around. + UNREASONABLE_VEL = 0.1 + + # Enum for the motion states + State = Enum(f'{axis}State', + ['INIT', 'WAIT_MOVING', 'WAIT_STILL', 'FAIL', 'DONE']) + + if state_feedback is None: + state_feedback = {} + state_feedback['state'] = 'init' + + # If this axis is "ignore", skip it. + for _axis, short_name in [ + ('Azimuth', 'az'), + ('Elevation', 'el'), + ('Boresight', 'third'), + ]: + if _axis == axis and short_name in self.ignore_axes: + self.log.warn('Ignoring requested motion on {axis}', axis=axis) + state_feedback['state'] = 'done' + yield dsleep(1) + return True, 'axis successfully ignored' + + # Specialization for different axis types. + + class AxisControl: + def get_pos(_self): + return self.data['status']['summary'][f'{axis}_current_position'] + + def get_mode(_self): + return self.data['status']['summary'][f'{axis}_mode'] + + def get_vel(_self): + return self.data['status']['summary'][f'{axis}_current_velocity'] + + def get_active(_self): + return bool( + self.data['status']['axis_state'][f'{axis}_brakes_released'] + and not self.data['status']['axis_state'][f'{axis}_axis_stop']) + + class AzAxis(AxisControl): + @inlineCallbacks + def goto(_self, target): + result = yield self.acu_control.go_to(az=target, set_mode='target') + return result + + class ElAxis(AxisControl): + @inlineCallbacks + def goto(_self, target): + result = yield self.acu_control.go_to(el=target, set_mode='target') + return result + + class ThirdAxis(AxisControl): + def get_vel(_self): + return 0. + + @inlineCallbacks + def goto(_self, target): + result = yield self.acu_control.go_3rd_axis(target) + return result + + class LatCorotator(ThirdAxis): + def get_pos(_self): + return self.data['status']['corotator']['Corotator_current_position'] + + def get_mode(_self): + return self.data['status']['corotator']['Corotator_mode'] + + def get_active(_self): + return bool( + self.data['status']['corotator']['Corotator_brakes_released'] + and not self.data['status']['corotator']['Corotator_axis_stop']) + + ctrl = None + if axis == 'Azimuth': + ctrl = AzAxis() + elif axis == 'Elevation': + ctrl = ElAxis() + elif axis == 'Boresight': + if self.acu_config['platform'] in ['ccat', 'lat']: + ctrl = LatCorotator() + else: + ctrl = ThirdAxis() + if ctrl is None: + return False, f"No configuration for axis={axis}" + + limit_func, _ = self._get_limit_func(axis) + + # History of recent distances from target. + history = [] + + def get_history(t): + # Returns (ok, hist) where hist is roughly the past t + # seconds of position data and ok is whether or not + # that much history was actually available. + n = int(t // TICK_TIME) + 1 + return (n <= len(history)), history[-n:] + + last_state = None + state = State.INIT + start_time = None + motion_aborted = False + assumption_fail = False + motion_completed = False + give_up_time = None + has_never_moved = True + warning_horn = False + + while session.status in ['starting', 'running', 'stopping']: + # Time ... + now = time.time() + if start_time is None: + start_time = now + time_since_start = now - start_time + motion_expected = time_since_start > MAX_STARTUP_TIME + + # Space ... + current_pos, current_vel = ctrl.get_pos(), ctrl.get_vel() + distance = abs(target - current_pos) + history.append(distance) + if give_up_time is None: + give_up_time = now + distance / UNREASONABLE_VEL \ + + MAX_STARTUP_TIME + 2 * PROFILE_TIME \ + + WARNING_HORN_TOO_LONG + + # Do we seem to be moving / not moving? + ok, _d = get_history(PROFILE_TIME) + still = ok and (np.std(_d) < 0.01) + moving = ok and (np.std(_d) >= 0.01) + has_never_moved = (has_never_moved and not moving) + + near_destination = distance < THERE_YET + mode_ok = (ctrl.get_mode() == 'Preset') + active_now = ctrl.get_active() + + # Log only on state changes + if state != last_state: + _state = f'{axis}.state={state.name}' + self.log.info( + f'{_state:<30} dt={now - start_time:7.3f} dist={distance:8.3f}') + last_state = state + + # Handle task abort + if session.status == 'stopping' and not motion_aborted: + target = limit_func(current_pos + current_vel * ABORT_TIME) + state = State.INIT + motion_aborted = True + + # Turn "too long" into an immediate exit. + if now > give_up_time: + self.log.error('Motion did not complete in a timely fashion; exiting.') + assumption_fail = True + break + + # Main state machine + if state == State.INIT: + # Set target position and change mode to Preset. + result = yield ctrl.goto(target) + if result in OK_RESPONSES: + state = State.WAIT_MOVING + else: + self.log.error(f'ACU rejected go_to with message: {result}') + state = State.FAIL + # Reset the clock for tracking "still" / "moving". + history = [] + start_time = time.time() + + elif state == State.WAIT_MOVING: + # Position and mode change requested, now wait for + # either mode change or clear failure of motion. + if mode_ok: + if active_now: + state = state.WAIT_STILL + elif time_since_start > WARNING_HORN_TOO_LONG: + self.log.error('Warning horn too long!') + state = state.FAIL + elif time_since_start > WARNING_HORN_DETECT and not warning_horn: + warning_horn = True + self.log.info('Warning horn is probably sounding.') + elif still and motion_expected: + self.log.error(f'Motion did not start within {MAX_STARTUP_TIME:.1f} s.') + state = state.FAIL + + elif state == State.WAIT_STILL: + # Once moving, watch for end of motion. + state_feedback['state'] = 'wait' + if not mode_ok: + self.log.error('Unexpected axis mode transition; exiting.') + state = State.FAIL + elif still: + if near_destination: + state = State.DONE + elif has_never_moved and motion_expected: + # The settling time, near a soft limit, can be + # a bit long ... so only timeout on + # motion_expected if we've never moved at all. + self.log.error(f'Motion did not start within {MAX_STARTUP_TIME:.1f} s.') + state = State.FAIL + + elif state == State.FAIL: + # Move did not complete as planned. + assumption_fail = True + break + + elif state == State.DONE: + # We seem to have arrived at destination. + motion_completed = True + break + + # Keep only ~20 seconds of history ... + _, history = get_history(20.) + + yield dsleep(TICK_TIME) + + success = motion_completed and not (motion_aborted or assumption_fail) + + if success: + msg = 'Move complete.' + elif motion_aborted: + msg = 'Move aborted!' + else: + msg = 'Irregularity during motion!' + + state_feedback['state'] = 'done' + return success, msg + + @inlineCallbacks + def _go_to_axes(self, session, el=None, az=None, third=None, + clear_faults=False): + """Execute a movement along multiple axes, using "Preset" + mode. This just launches _go_to_axis on each required axis, + and collects the results. + + Args: + session: session object variable of the parent operation. + az (float): target for Azimuth axis (ignored if None). + el (float): target for Elevation axis (ignored if None). + third (float): target for Boresight axis (ignored if None). + clear_faults (bool): whether to clear ACU faults first. + + Returns: + ok (bool): True if all motions completed successfully and + arrived at target position. + msg (str): success/error message (combined from each target + axis). + + """ + # Construct args for each _go_to_axis command... don't create + # the Deferred here, because we will want to clear_faults + # first (and the Deferred might start running before that + # completes). + move_defs = [] + for axis_name, short_name, target in [ + ('Azimuth', 'az', az), + ('Elevation', 'el', el), + ('Boresight', 'third', third), + ]: + if target is not None: + move_defs.append( + (short_name, (session, axis_name, target))) + + if len(move_defs) == 0: + return True, 'No motion requested.' + + if clear_faults: + yield self.acu_control.clear_faults() + yield dsleep(1) + + # Start each move, waiting for each to pass the "init" state + # before beginning the next one. + moves = [] + for name, args in move_defs: + fb = {'state': 'init'} + move_def = self._go_to_axis(*args, state_feedback=fb) + while fb['state'] == 'init': + yield dsleep(.1) + moves.append(move_def) + + # Now wait for all to complete. + moves = yield DeferredList(moves) + all_ok, msgs = True, [] + for _ok, result in moves: + if _ok: + all_ok = all_ok and result[0] + msgs.append(result[1]) + else: + all_ok = False + msgs.append(f'Crash! {result}') + + if all_ok: + msg = msgs[0] + else: + msg = ' '.join([f'{name}: {msg}' + for (name, args), msg in zip(move_defs, msgs)]) + return all_ok, msg + + @ocs_agent.param('az', type=float, default=None) + @ocs_agent.param('el', type=float, default=None) + @ocs_agent.param('end_stop', default=True, type=bool) + @inlineCallbacks + def go_to(self, session, params): + """go_to(az, el, end_stop=True) + + **Task** - Move the telescope to a particular point (azimuth, + elevation) in Preset mode. When motion has ended and the telescope + reaches the preset point, the function returns. + + Parameters: + az (float): destination angle for the azimuth axis + el (float): destination angle for the elevation axis + end_stop (bool): put the commanded axes in Stop mode at + the end of the motion + + Notes: + If az or el is unspecified (None), the axis will not be + commanded to a new position and will not be put in Preset + mode, and will not be put in Stop (if end_stop) after motion. + + When omitting el, and if Sun Avoidance path-finding + decides an elevation change is required to travel from the + current position to the implicit target position, the + task will exit with error. + + """ + with self.azel_lock.acquire_timeout(0, job='go_to') as acquired: + if not acquired: + return False, f"Operation failed: {self.azel_lock.job} is running." + + if self._get_sun_policy('motion_blocked'): + return False, "Motion blocked; Sun avoidance in progress." + + self.log.info('Clearing faults to prepare for motion.') + yield self.acu_control.clear_faults() + yield dsleep(1) + + ok, msg = yield self._check_ready_motion(session) + if not ok: + return False, msg + + targets = {k: params[k] for k in ['az', 'el']} + + def axis_filter_args(az, el): + return {k: v for k, v in [('az', az), ('el', el)] + if targets[k] is not None} + + for axis, target in targets.items(): + limit_func, limits = self._get_limit_func(axis) + if target is not None and target != limit_func(target): + raise ocs_agent.ParamError( + f'{axis}={target} not in accepted range, ' + f'[{limits[0]}, {limits[1]}].') + + self.log.info('Requested position: ' + ', '.join( + [f'{axis}={target}' for axis, target in targets.items()])) + + legs, msg = yield self._get_sunsafe_moves(targets['az'], targets['el']) + if msg is not None: + self.log.error(msg) + return False, msg + + if len(legs) > 2: + if None in targets.values(): + return False, "Sun-safe path requires multiple moves, but simple path requested." + self.log.info(f'Executing move via {len(legs) - 1} separate legs (sun optimized)') + + # Check HWP safety + hwp_safe, msg = yield self._check_hwpsafe_legs(legs) + if not hwp_safe: + self.log.info('{msg}', msg=msg) + return False, msg + + for leg_az, leg_el in legs[1:]: + all_ok, msg = yield self._go_to_axes(session, **axis_filter_args(leg_az, leg_el)) + if not all_ok: + break + + if all_ok and params['end_stop']: + yield self._set_modes(**axis_filter_args('Stop', 'Stop')) + + return all_ok, msg + + @ocs_agent.param('target', type=float) + @ocs_agent.param('end_stop', default=True, type=bool) + @inlineCallbacks + def set_boresight(self, session, params): + """set_boresight(target, end_stop=True) + + **Task** - Move the telescope to a particular third-axis angle. + + Parameters: + target (float): destination angle for boresight rotation + end_stop (bool): put axis in Stop mode after motion + + """ + with self.boresight_lock.acquire_timeout(0, job='set_boresight') as acquired: + if not acquired: + return False, f"Operation failed: {self.boresight_lock.job} is running." + + hwp_ok, msg = self._check_hwpsafe_here(['third']) + if not hwp_ok: + self.log.info('{msg}', msg=msg) + return False, f"Motion not HWP-safe: {msg}" + + self.log.info('Clearing faults to prepare for motion.') + yield self.acu_control.clear_faults() + yield dsleep(1) + + ok, msg = yield self._check_ready_motion(session) + if not ok: + return False, msg + + target = params['target'] + + for axis, target in {'boresight': target}.items(): + limit_func, limits = self._get_limit_func(axis) + if target != limit_func(target): + raise ocs_agent.ParamError( + f'{axis}={target} not in accepted range, ' + f'[{limits[0]}, {limits[1]}].') + + self.log.info(f'Commanded position: boresight={target}') + + ok, msg = yield self._go_to_axis(session, 'Boresight', target) + + if ok and params['end_stop']: + yield self._set_modes(third='Stop') + + return ok, msg + + @ocs_agent.param('target') + @ocs_agent.param('end_stop', default=True, type=bool) + @inlineCallbacks + def go_to_named(self, session, params): + """go_to_named(target, end_stop=True) + + **Task** - Move the telescope to a named position, + e.g. "home", that has been configured through command line args. + + Parameters: + target (str): name of the target position. + end_stop (bool): put axes in Stop mode after motion + + """ + target = self.named_positions.get(params['target']) + if target is None: + return False, 'Position "%s" is not configured.' % params['target'] + + ok, msg, _session = self.agent.start('go_to', {'az': target[0], 'el': target[1], + 'end_stop': params['end_stop']}) + if ok == ocs.ERROR: + return False, 'Failed to start go_to task.' + ok, msg, _session = yield self.agent.wait('go_to') + return (ok == ocs.OK), msg + + @ocs_agent.param('speed_mode', choices=['high', 'low']) + @inlineCallbacks + def set_speed_mode(self, session, params): + """set_speed_mode(speed_mode) + + **Task** - Set the ACU Speed Mode. This affects motion when + in Preset mode, such as when using go_to in this Agent. It + should not affect the speed of scans done in ProgramTrack + mode. + + Parameters: + speed_mode (str): 'high' or 'low'. + + Notes: + The axes must be in Stop mode for this to work. This task + will return an error if the command appears to have failed. + + The actual speed and acceleration settings for the "high" + and "low" (perhaps called "aux") settings must be configured + on the ACU front panel. + + """ + http = aculib.streams.ModularHttpInterface( + self.acu_config['dev_url'], backend=TwistedHttpBackend()) + data = 'Command=Set Speed ' + params['speed_mode'].capitalize() + resp_bytes = yield http.Post(data, 'DataSets.CmdGeneralTransfer', '3') + resp = resp_bytes.decode('utf8') + if '

Status: executed

' in resp: + return True, "Speed mode changed." + elif '

Status: not allowed

' in resp: + return False, "Mode change blocked (are you in Stop?)" + else: + return False, "Response was not as expected." + + @ocs_agent.param('az_speed', type=float, default=None) + @ocs_agent.param('az_accel', type=float, default=None) + @ocs_agent.param('el_freq', type=float, default=None) + @ocs_agent.param('el_mode', choices=['stop', 'preset', 'programtrack', ''], + default=None) + @ocs_agent.param('turnaround_method', type=str, default=None, + choices=[None, 'standard', 'standard_gen', + 'three_leg', 'two_leg']) + @ocs_agent.param('reset', default=False, type=bool) + @inlineCallbacks + def set_scan_params(self, session, params): + """set_scan_params(az_speed=None, az_accel=None, reset=False)) + + **Task** - Update the default scan parameters, used by + generate_scan if not passed explicitly. + + Parameters: + az_speed (float, optional): The azimuth scan speed. + az_accel (float, optional): The (average) azimuth + acceleration at turn-around. + el_freq (float, optional): The frequency of elevation nods in + type 3 scans. + el_mode (str, optional): If not null, the elevation axis + will be put in this mode after the initial position seek + (but before scan begins). This can be used to do type 1/2 + scans with el axis held in Stop mode. The special value + of '' will revert el_mode to the default (None). + reset (bool, optional): If True, reset all params to default + values before applying any updates passed explicitly here. + + """ + if params['reset']: + self.scan_params.update(self.default_scan_params) + for k in ['az_speed', 'az_accel', 'el_freq', 'turnaround_method', 'el_mode']: + if params[k] is not None: + self.scan_params[k] = params[k] + if params['el_mode'] == '': + self.scan_params['el_mode'] = None + self.log.info('Updated default scan params to {sp}', sp=self.scan_params) + yield + return True, 'Done' + + @ocs_agent.param('_') + @inlineCallbacks + def clear_faults(self, session, params): + """clear_faults() + + **Task** - Clear any axis faults. + + """ + + yield self.acu_control.clear_faults() + session.set_status('stopping') + return True, 'Job completed.' + + @ocs_agent.param('all_axes', default=False, type=bool) + @inlineCallbacks + def stop_and_clear(self, session, params): + """stop_and_clear(all_axes=False) + + **Task** - Change the azimuth, elevation, and 3rd axis modes + to Stop; also clear the ProgramTrack stack. + + Args: + all_axes (bool): Send Stop to all axes, even ones user has + requested to be ignored. + + """ + def _read_modes(): + modes = [self.data['status']['summary']['Azimuth_mode'], + self.data['status']['summary']['Elevation_mode']] + if self.acu_config['platform'] == 'satp': + modes.append(self.data['status']['summary']['Boresight_mode']) + elif self.acu_config['platform'] in ['ccat', 'lat']: + modes.append(self.data['status']['corotator']['Corotator_mode']) + return modes + + for i in range(6): + for short_name, mode in zip(['az', 'el', 'third'], + _read_modes()): + if (params['all_axes'] or short_name not in self.ignore_axes) and mode != 'Stop': + break + else: + self.log.info('All axes in Stop mode') + break + yield self._stop(params['all_axes']) + self.log.info('Stop called (iteration %i)' % (i + 1)) + yield dsleep(0.1) + + else: + msg = 'Failed to set all axes to Stop mode!' + self.log.error(msg) + return False, msg + + for i in range(6): + free_stack = self.data['status']['summary']['Free_upload_positions'] + if free_stack < FULL_STACK: + yield self.acu_control.http.Command('DataSets.CmdTimePositionTransfer', + 'Clear Stack') + self.log.info('Clear Stack called (iteration %i)' % (i + 1)) + yield dsleep(0.1) + else: + self.log.info('Stack cleared') + break + else: + msg = 'Failed to clear the ProgramTrack stack!' + self.log.warn(msg) + return False, msg + + session.set_status('stopping') + return True, 'Job completed' + + @ocs_agent.param('action', choices=['unstow', 'elsync']) + @ocs_agent.param('force', type=bool, default=False) + @ocs_agent.param('elsync_ref', type=float, default=None) + @inlineCallbacks + def special_action(self, session, params): + """special_action(action, force=False, elsync_ref=None) + + **Task** - Perform a special action or set a special mode. + + Args: + action (str): Action to perform. See notes. + force (bool): Perform the action even if conditions suggest + it need not or should not be run. + elsync_ref (float): For action='elsync', sets the reference + elevation for the locked co-rotator mode. (This is the + negative of the ACU offset parameter.) + + Notes: + - 'unstow': Set the el and az axis modes to "UnStow". This + is used to recover the LAT from "maintenance stow" + position, where el=-90 and pins inserted. The Task + returns after setting the mode; transition to Stop will + normally occur after a few seconds. + - 'elsync': Put the LAT corotator into ElSync mode. If + elsync_ref is provided, that is sent to the ACU + first. Otherwise the offset is left unchanged. Unless + force=True, the corotator axis should be in Stop before + when this is called. + + """ + if params['action'] == 'unstow': + if not params['force']: + el_mode = self.data['status']['summary']['Elevation_mode'] + if el_mode.lower() not in ['stow', 'maintenancestow']: + return False, f"Not unstowing because elevation mode is {el_mode}; "\ + "override with force=True." + response = yield self._set_modes(az='Stop', el='UnStow') + self.log.info('response to UnStow: {response}', response=response) + yield dsleep(0.5) + + elif params['action'] == 'elsync': + if not params['force']: + third_mode = self.data['status']['corotator']['Corotator_mode'] + if third_mode.lower() != 'stop': + return False, f"Not going to elsync mode because corotator mode is {third_mode}; "\ + "override with force=True." + if params['elsync_ref'] is not None: + response = yield self.acu_control.http.Command( + 'DataSets.Corotator', 'SetOffsetToElevation', '%.6f' % (-params['elsync_ref'])) + self.log.info('response to set elsync_ref : {response}', response=response) + yield dsleep(0.5) + + response = yield self._set_modes(third='ElSync') + self.log.info('response to set ElSync mode: {response}', response=response) + yield dsleep(0.5) + + else: + return False, f"Unimplemented action '{params['action']}'." + + return True, 'Done.' + + @ocs_agent.param('filename', type=str) + @ocs_agent.param('absolute_times', type=bool, default=False) + @ocs_agent.param('azonly', type=bool, default=True) + @inlineCallbacks + def fromfile_scan(self, session, params=None): + """fromfile_scan(filename, absolute_times=False, azonly=True) + + **Process** - Upload and execute a scan pattern from a file. + + Parameters: + filename (str): full path to the track file. + absolute_times (bool): If True, the track timestamps are + taken at face value. Otherwise, the timestamps are + treated as relative to the track start time, which + will be a few seconds in the future from when this + function is called. + azonly (bool): If True, the elevation part of the track + will be uploaded but the el axis won't be put in + ProgramTrack mode. It might be put in Stop mode + though. + + Notes: + See :func:`drivers.from_file + ` for discussion of the + file structure. + + """ + ff_scan = sh.from_file(params['filename']) + + if ff_scan.az_range[0] <= self.motion_limits['azimuth']['lower'] \ + or ff_scan.az_range[1] >= self.motion_limits['azimuth']['upper']: + return False, 'Azimuth location out of range!' + if ff_scan.el_range[0] <= self.motion_limits['elevation']['lower'] \ + or ff_scan.el_range[1] >= self.motion_limits['elevation']['upper']: + return False, 'Elevation location out of range!' + + # Modify times? + t_shift = 0 + if not params['absolute_times']: + t_shift = time.time() + 5. + + # Turn those lines into a generator. + def line_batcher(ff_scan, t_shift=0., n=10): + lines = [sh.track_point_time_shift(p, t_shift) + for p in ff_scan.points] + while True: + while len(lines): + some, lines = lines[:n], lines[n:] + yield some + if ff_scan.loop_time <= 0: + break + t_shift += ff_scan.loop_time + lines = [sh.track_point_time_shift(p, t_shift) + for p in ff_scan.points[ff_scan.preamble_count:]] + + point_gen = line_batcher(ff_scan, t_shift) + + if params['azonly']: + track_axes = ['az'] + else: + track_axes = ['az', 'el'] + + ok, err = yield self._run_track( + session, + point_gen, + step_time=ff_scan.step_time, + free_form=ff_scan.free_form, + track_axes=track_axes) + return ok, err + + @ocs_agent.param('az_endpoint1', type=float) + @ocs_agent.param('az_endpoint2', type=float) + @ocs_agent.param('az_speed', type=float, default=None) + @ocs_agent.param('az_accel', type=float, default=None) + @ocs_agent.param('el_endpoint1', type=float, default=None) + @ocs_agent.param('el_endpoint2', type=float, default=None) + @ocs_agent.param('el_speed', type=float, default=0.) + @ocs_agent.param('el_freq', type=float, default=None) + @ocs_agent.param('el_mode', choices=['stop', 'preset', 'programtrack'], + default=None) + @ocs_agent.param('num_scans', type=float, default=None) + @ocs_agent.param('start_time', type=float, default=None) + @ocs_agent.param('wait_to_start', type=float, default=None) + @ocs_agent.param('step_time', type=float, default=None) + @ocs_agent.param('az_start', default='end', + choices=['end', 'mid', 'az_endpoint1', 'az_endpoint2', + 'mid_inc', 'mid_dec']) + @ocs_agent.param('az_drift', type=float, default=None) + @ocs_agent.param('scan_type', default=1, choices=[1, 2, 3]) + @ocs_agent.param('az_vel_ref', type=float, default=None) + @ocs_agent.param('turnaround_method', default=None, + choices=[None, 'standard', 'standard_gen', + 'three_leg', 'two_leg']) + @ocs_agent.param('scan_upload_length', type=float, default=None) + @ocs_agent.param('type', default=None, choices=[1, 2, 3]) + @inlineCallbacks + def generate_scan(self, session, params): + """generate_scan(az_endpoint1, az_endpoint2, \ + az_speed=None, az_accel=None, \ + el_endpoint1=None, el_endpoint2=None, \ + el_speed=None, el_freq=None, \ + el_mode=None, \ + num_scans=None, start_time=None, \ + wait_to_start=None, step_time=None, \ + az_start='end', az_drift=None, \ + scan_type=1, az_vel_ref=None, \ + turnaround_method=None, \ + scan_upload_length=None) + + **Process** - Scan generator, currently only works for + constant-velocity az scans with fixed elevation. + + Parameters: + az_endpoint1 (float): first endpoint of a linear azimuth scan + az_endpoint2 (float): second endpoint of a linear azimuth scan + az_speed (float): azimuth speed for constant-velocity scan + az_accel (float): turnaround acceleration for a constant-velocity scan + el_endpoint1 (float): first endpoint of elevation motion. + In the present implementation, this will be the + constant elevation declared at every point in the + track. + el_endpoint2 (float): this is ignored. + el_speed (float): this is ignored. + el_freq (float): frequency of the elevation nods for + scan_type=3. + el_mode (str): By default, the elevation axis mode for + type 1 and 2 scans will be left in Preset after the + initial move. To force it instead into Stop mode, + pass "stop" (case-sensitive) here. ("preset" and + "programtrack" are also accepted, and will result in + that mode being set prior to launching the track.) + num_scans (int or None): if not None, limits the scan to + the specified number of constant velocity legs. The + process will exit without error once that has + completed. + start_time (float or None): a unix timestamp giving the + time at which the scan should begin. The default is + None, which means the scan will start immediately (but + taking into account the value of wait_to_start). + wait_to_start (float): number of seconds to wait before + starting a scan, in the case that start_time is None. + The default is to compute a minimum time based on the + scan parameters and the ACU ramp-up algorithm; this is + typically 5-10 seconds. + step_time (float): time, in seconds, between points on the + constant-velocity parts of the motion. The default is + None, which will cause an appropriate value to be + chosen automatically (typically 0.1 to 1.0). + az_start (str): part of the scan to start at. To start at one + of the extremes, use 'az_endpoint1', 'az_endpoint2', or + 'end' (same as 'az_endpoint1'). To start in the midpoint + of the scan use 'mid_inc' (for first half-leg to have + positive az velocity), 'mid_dec' (negative az velocity), + or 'mid' (velocity oriented towards endpoint2). + az_drift (float): if set, this should be a drift velocity + in deg/s. The scan extrema will move accordingly. This + can be used to better follow compact sources as they + rise or set through the focal plane. + scan_type (int): What type of scan to use. Only 1, 2, 3 are valid. + Type 1 is a constant elevation scan. + Type 2 includes a variation in az speed that scales as sin(az). + Type 3 is a Type 2 with an sinusoidal el nod. + az_vel_ref (float or None): azimuth to center the velocity profile at. + If None then the average of the endpoints is used. + turnaround_method (str): The method used for generating turnaround. + Default (None) generates the baseline minimal jerk trajectory. + 'standard' uses the acu standard turnaround generation (same as None). + 'standard_gen' generates a track_point list of points that mimics + the acu standard turnaround generation for use in type2/type3 scans. + 'three_leg' generates a three-leg turnaround which attempts to + minimize the acceleration at the midpoint of the turnaround. + 'two_leg' generates a three-leg turnaround with second_leg_time = 0. + scan_upload_length (float): number of seconds for each set + of uploaded points. If this is not specified, the + track manager will try to use as short a time as is + reasonable. + type (int): Temporary alias for scan_type. Do not + use. Will be removed. + + Notes: + Note that all parameters are optional except for + az_endpoint1 and az_endpoint2. If only those two parameters + are passed, the Process will scan between those endpoints, + with the elevation axis held in Stop, indefinitely (until + Process .stop method is called).. + + """ + init_time = time.time() # for params feed. + + if self._get_sun_policy('motion_blocked'): + return False, "Motion blocked; Sun avoidance in progress." + + if params['type'] is not None: + self.log.warn('Caller passed "type" instead of "scan_type" arg; moving.') + params['scan_type'] = params['type'] + del params['type'] + + self.log.info('User scan params: {params}', params=params) + + az_endpoint1 = params['az_endpoint1'] + az_endpoint2 = params['az_endpoint2'] + el_endpoint1 = params['el_endpoint1'] + el_endpoint2 = params['el_endpoint2'] + az_vel_ref = params['az_vel_ref'] + + # Params with defaults configured ... + az_speed = params['az_speed'] + az_accel = params['az_accel'] + el_freq = params['el_freq'] + turnaround_method = params['turnaround_method'] + el_mode = params['el_mode'] + if az_speed is None: + az_speed = self.scan_params['az_speed'] + if az_accel is None: + az_accel = self.scan_params['az_accel'] + if el_freq is None: + el_freq = self.scan_params['el_freq'] + if turnaround_method is None: + turnaround_method = self.scan_params['turnaround_method'] + if params['scan_type'] in [2, 3] and turnaround_method == 'standard': + turnaround_method = 'standard_gen' + self.log.info('Setting turnaround_method="standard_gen" for type2/3 scan.') + if el_mode is None: + el_mode = self.scan_params['el_mode'] # ... which may also be None. + + # Check if the turnaround method is usable for the called scan type. + # This should never happen with the above turnaround_method setting. + if turnaround_method == "standard" and params['scan_type'] != 1: + raise ValueError("Cannot use standard turnaround method with type 2 or 3 scans!") + + # Do we need to limit the az_accel? This limit comes from a + # maximum jerk parameter; the equation below (without the + # empirical 0.85 adjustment) is stated in the SATP ACU ICD. + min_turnaround_time = (0.85 * az_speed / 9 * 11.616)**.5 + max_turnaround_accel = 2 * az_speed / min_turnaround_time + + # You must also not exceed the platform max accel. + if self.motion_limits['azimuth'].get('accel'): + max_turnaround_accel = min( + max_turnaround_accel, + self.motion_limits['azimuth'].get('accel') / 1.88) + + if az_accel > max_turnaround_accel: + self.log.warn('WARNING: user requested accel=%.2f; limiting to %.2f' % + (az_accel, max_turnaround_accel)) + az_accel = max_turnaround_accel + + # If el is not specified, drop in the current elevation. + if el_endpoint1 is None: + el_endpoint1 = self.data['status']['summary']['Elevation_current_position'] + if el_endpoint2 is None: + el_endpoint2 = el_endpoint1 + + # If requested el is just outside acceptable range, tweak it in. + _f, _ = self._get_limit_func('elevation') + el_endpoint1, _untweaked_el = _f(el_endpoint1), el_endpoint1 + if abs(el_endpoint1 - _untweaked_el) > 0.1: + return False, "Current elevation (%.4f) is well outside limits." % _untweaked_el + init_el = el_endpoint1 + + scan_upload_len = params.get('scan_upload_length') + scan_params = {k: params.get(k) for k in [ + 'num_scans', 'num_batches', 'start_time', + 'wait_to_start', 'step_time', 'batch_size', + 'az_start', 'az_drift'] + if params.get(k) is not None} + if params['scan_type'] in [2, 3]: + scan_params["az_start"] = "mid_dec" + el_speed = params.get('el_speed', 0.0) + az_edge_speed = az_speed + if params['scan_type'] in [2, 3]: + if az_vel_ref is None: + az_vel_ref = (az_endpoint1 + az_endpoint2) / 2. + az_cent = az_vel_ref - 90 + az_edge = np.max(np.abs((az_endpoint1 - az_cent, az_endpoint2 - az_cent))) + az_edge_speed = az_speed / np.sin(az_edge) + + plan = sh.plan_scan(az_endpoint1, az_endpoint2, + el=el_endpoint1, v_az=az_edge_speed, a_az=az_accel, + az_start=scan_params.get('az_start'), + scan_type=params['scan_type']) + + # Use the plan to set scan upload parameters. + if scan_params.get('step_time') is None: + scan_params['step_time'] = plan['step_time'] + if scan_params.get('wait_to_start') is None: + scan_params['wait_to_start'] = plan['wait_to_start'] + + step_time = scan_params['step_time'] + point_batch_count = None + if scan_upload_len: + point_batch_count = scan_upload_len / step_time + + self.log.info('The plan: {plan}', plan=plan) + self.log.info('The scan_params: {scan_params}', scan_params=scan_params) + + # Before any motion, check for sun safety. + ok, msg = self._check_scan_sunsafe(az_endpoint1, az_endpoint2, el_endpoint1, + az_speed, az_accel) + if ok: + self.log.info('Sun safety check: {msg}', msg=msg) + else: + self.log.error('Sun safety check fails: {msg}', msg=msg) + return False, 'Scan is not Sun Safe.' + + # Clear faults. + self.log.info('Clearing faults to prepare for motion.') + yield self.acu_control.clear_faults() + yield dsleep(1) + + # Verify we're good to move + ok, msg = yield self._check_ready_motion(session) + if not ok: + return False, msg + + # Seek to starting position. Note "legs" will always include + # at least 2 points; first point being current (az, el). + self.log.info(f'Moving to start position, az={plan["init_az"]}, el={init_el}') + legs, msg = yield self._get_sunsafe_moves(plan['init_az'], init_el) + if msg is not None: + self.log.error(msg) + return False, msg + hwp_safe, msg = yield self._check_hwpsafe_legs(legs) + if not hwp_safe: + msg = f'Move to start position not permitted: {msg}' + self.log.info('{msg}', msg=msg) + return False, msg + + # Also validate the scan generally -- need to be movable in az. + hwp_safe, msg = yield self._check_hwpsafe(init_el, init_el, axes=['az']) + if not hwp_safe: + msg = f'Const-el scan not permitted: {msg}' + self.log.info(msg) + return False, msg + + for leg_az, leg_el in legs[1:]: + ok, msg = yield self._go_to_axes(session, az=leg_az, el=leg_el) + if not ok: + return False, f'Start position seek failed with message: {msg}' + + # Force elevation axis to stop mode? + if el_mode: + for k in ['Stop', 'Preset', 'ProgramTrack']: + if el_mode.lower() == k.lower(): + yield self._set_modes(el=k) + break + else: + return False, f'User requested invalid el_mode={el_mode}' + + # Prepare the point generator. + free_form = False + if params['scan_type'] == 1: + track_axes = ['az'] + if turnaround_method != 'standard': + free_form = True + + g = sh.generate_constant_velocity_scan(az_endpoint1=az_endpoint1, + az_endpoint2=az_endpoint2, + az_speed=az_speed, acc=az_accel, + turnaround_method=turnaround_method, + el_endpoint1=el_endpoint1, + el_endpoint2=el_endpoint2, + el_speed=el_speed, + az_first_pos=plan['init_az'], + **scan_params) + elif params['scan_type'] == 2: + free_form = True + track_axes = ['az'] + g = sh.generate_type2_scan(az_endpoint1=az_endpoint1, + az_endpoint2=az_endpoint2, + az_speed=az_speed, acc=az_accel, + turnaround_method=turnaround_method, + el_endpoint1=el_endpoint1, + az_vel_ref=az_vel_ref, + az_first_pos=plan['init_az'], + **scan_params) + elif params['scan_type'] == 3: + free_form = True + track_axes = ['az', 'el'] + g = sh.generate_type3_scan(az_endpoint1=az_endpoint1, + az_endpoint2=az_endpoint2, + az_speed=az_speed, acc=az_accel, + turnaround_method=turnaround_method, + el_endpoint1=el_endpoint1, + el_endpoint2=el_endpoint2, + el_freq=el_freq, + az_vel_ref=az_vel_ref, + az_first_pos=plan['init_az'], + **scan_params) + else: + raise ValueError("Scan type must be 1, 2, or 3") + + scan_params_bundle = {'session_id': session.session_id, + 'schema': 1, + 'event': 1, + 'init_time': init_time, + } + scan_params_bundle.update({ + 'az1': az_endpoint1, + 'az2': az_endpoint2, + 'az_vel': az_speed, + 'az_accel': az_accel, + 'el1': el_endpoint1, + 'el2': el_endpoint2, + 'el_freq': el_freq, + 'type': params['scan_type'], + 'turnaround_type': sh.TURNAROUNDS_ENUM[turnaround_method], + 'track_axes': ','.join(track_axes), + }) + + self.agent.publish_to_feed('scan_params', + {'timestamp': time.time(), + 'block_name': 'info', + 'data': scan_params_bundle}) + + ret_val = (yield self._run_track( + session=session, point_gen=g, step_time=step_time, stop_accel=az_accel, + track_axes=track_axes, point_batch_count=point_batch_count, + free_form=free_form, unabort_failure=(params['scan_type'] in [2, 3]))) + + self.agent.publish_to_feed('scan_params', + {'timestamp': time.time(), + 'block_name': 'exit', + 'data': {'session_id': session.session_id, + 'event': 2}}) + return ret_val + + @inlineCallbacks + def _run_track(self, session, point_gen, step_time, stop_accel=0.5, track_axes=['az'], + point_batch_count=None, free_form=False, unabort_failure=False): + """Run a ProgramTrack track scan, with points provided by a + generator. + + Args: + session: session object for the parent operation. + point_gen: generator that yields points + step_time: the minimum time between point track points. + This is used to guarantee that points are uploaded + sufficiently in advance for the servo unit to process + them. + stop_accel: float acceleration value used to generate the + stop PointTrack for the scan. If _run_track is called from + generate_scan, stop_accel is equal to the az_accel for the + scan. By default the stop will be generated with stop_accel=0.5. + track_axes: list of strings indicating which axes ('az', + 'el') should be put in ProgramTrack mode. Axes not + included here will not have their mode changed. + point_batch_count: number of points to include in batch + uploads. This parameter can be used to increase the value + beyond the minimum set internally based on step_time. + free_form: if True, disable ACU linear interpolation and + turn-around profiling. + unabort_failure: if True don't fail on a bad exit. + + Returns: + Tuple (success, msg) where success is a bool. + + """ + # The approximate loop time + LOOP_STEP = 0.1 # seconds + + # Time to allow for initial ProgramTrack transition. + MAX_PROGTRACK_SET_TIME = 5. + + # Minimum number of points to have in the stack. While the + # docs strictly require 4, this number should be at least 1 + # more than that to allow for rounding when we are setting the + # refill threshold. + MIN_STACK_POP = 6 # points + MAX_ALLOWABLE_FREE_POSITIONS = FULL_STACK - MIN_STACK_POP + + # Minimum amount of time (seconds), in advance, to populate + # the trajectory. In cases where step_time is short, this + # creates a longer track window to survive agent outages. + # (The cost is that stopping a scan may take a little longer.) + MIN_STACK_ADVANCE_TIME = 3. + + # Special error bits to watch here + PTRACK_FAULT_KEYS = [ + 'ProgramTrack_position_failure', + 'Track_start_too_early', + 'Turnaround_accel_too_high', + 'Turnaround_time_too_short', + ] + + if free_form: + init_cmds = [ + ('Clear Stack', 0.), + ('Set Profiler Off', 0.), + ('Set Interpolation Spline', 0.5) + ] + else: + init_cmds = [ + ('Clear Stack', 0.), + ('Set Profiler On', 0.), + ('Set Interpolation Linear', 0.5) + ] + + with self.azel_lock.acquire_timeout(0, job='generate_scan') as acquired: + if not acquired: + return False, f"Operation failed: {self.azel_lock.job} is running." + if session.status not in ['starting', 'running']: + return False, "Operation aborted before motion began." + + for _c, _d in init_cmds: + resp = yield self.acu_control.http.Command( + 'DataSets.CmdTimePositionTransfer', _c) + if resp != b'OK, Command executed.': + return False, f"Failed to init: {_c}" + if _d > 0: + yield dsleep(_d) + + if track_axes is not None and len(track_axes) > 0: + assert ([_ax in ['az', 'el'] for _ax in track_axes]) + mode_args = {_ax: 'ProgramTrack' for _ax in track_axes} + yield self._set_modes(**mode_args) + + yield dsleep(0.1) + + # Values for mode are: + # - 'go' -- keep uploading points (unless there are no more to upload). + # - 'stop' -- do not request more points from generator; + # finish the ones that are already in "points", let the stack empty, + # and wait for settling condition. + # - 'abort' -- do not upload more points; exit loop with error; wait + # a few seconds and clear the stack. + mode = 'go' + + point_prov = sh.PointProvider(point_gen) + last_mode = None + last_upload_az = None + start_time = time.time() + got_progtrack = False + faults = {} + got_points_in = False + first_upload_time = None + last_uploaded_timestamp = 0 + wait_stop_timeout = None + + prog_track_err = False + stop_message = "" + while True: + now = time.time() + current_modes = {'Az': self.data['status']['summary']['Azimuth_mode'], + 'El': self.data['status']['summary']['Elevation_mode'], + 'Remote': self.data['status']['platform_status']['Remote_mode']} + az_state = {'pos': self.data['status']['summary']['Azimuth_current_position'], + 'vel': self.data['status']['summary']['Azimuth_current_velocity']} + free_positions = self.data['status']['summary']['Free_upload_positions'] + + # Use this var to detect case where we're uploading + # points but ACU is quietly dumping them because the + # vel is too high. + got_points_in = got_points_in \ + or (got_progtrack and free_positions < FULL_STACK) + + if last_mode != mode: + self.log.info(f'scan mode={mode}, line_buffer={len(point_prov)}, track_free={free_positions}') + last_mode = mode + + for k in PTRACK_FAULT_KEYS: + if k not in faults and self.data['status']['ACU_failures_errors'].get(k): + self.log.info('Fault during track: "{k}"', k=k) + faults[k] = True + + if mode != 'abort': + # Reasons we might decide to abort ... + if current_modes['Az'] == 'ProgramTrack': + got_progtrack = True + else: + if got_progtrack: + self.log.warn('Unexpected exit from ProgramTrack mode!') + if mode == 'stop': + prog_track_err = True + mode = 'abort' + elif now - start_time > MAX_PROGTRACK_SET_TIME: + self.log.warn('Failed to set ProgramTrack mode in a timely fashion.') + mode = 'abort' + if not got_points_in and (first_upload_time is not None) \ + and (now - first_upload_time > 10): + self.log.warn('ACU seems to be dumping our track. Vel too high?') + mode = 'abort' + if current_modes['Remote'] == 0: + self.log.warn('ACU no longer in remote mode!') + mode = 'abort' + if session.status == 'stopping' and mode not in ['stop', 'abort']: + mode = 'stop' + stop_message = 'User-requested stop.' + point_prov.stop(free_form, stop_accel) + + if mode == 'abort': + point_prov.abort() + + # Is it time to upload more lines? + # This happens when the current time of uploaded points is less + # than the MIN_STACK_ADVANCE_TIME. + # (Meaning we have less than the minimum time worth of points uploaded). + # Or if the total number of free positions is higher than the MAX_ALLOWABLE_FREE_POSITIONS. + # (Meaning we haven't uploaded at least the minimum number of points) + if ((last_uploaded_timestamp - time.time()) <= MIN_STACK_ADVANCE_TIME) \ + or (free_positions > MAX_ALLOWABLE_FREE_POSITIONS): + + upload_lines = [] + # Grab points from point_prov until our last point is at least + # 2 * MIN_STACK_ADVANCE_TIME seconds from now. + # If that isn't enough points to have MIN_STACK_POP_TIME amount of points uploaded, + # Keep grabbing points until we have enough. + while not point_prov.is_empty() and (len(upload_lines) == 0 + or upload_lines[-1].timestamp - time.time() < (2 * MIN_STACK_ADVANCE_TIME) + or (free_positions - len(upload_lines) > MAX_ALLOWABLE_FREE_POSITIONS)): + + upload_lines.append(point_prov.pop()) + + # If the last line has a "group" flag, keep transferring lines. + while not point_prov.is_empty() and len(upload_lines) and upload_lines[-1].group_flag != 0: + upload_lines.append(point_prov.pop()) + + if point_prov.is_empty() and mode == 'go': + mode = 'stop' + stop_message = 'Stop due to end of the planned track.' + + if len(upload_lines): + # Discard the group flag and upload all. + text = sh.get_track_points_text( + upload_lines, timestamp_offset=3, text_block=True) + for attempt in range(5): + _dt = time.time() + try: + # This seems to return b'Ok.' no matter ~what, + # so not much point checking it. + yield self.acu_control.http.UploadPtStack(text) + break + except Exception as err: + _dt = time.time() - _dt + self.log.warn(f'Upload {len(upload_lines)} failed (attempt {attempt}) after {_dt:.3f} seconds') + self.log.warn('Exception was: {err}', err=err) + else: + raise RuntimeError('Upload fail.') + if first_upload_time is None: + first_upload_time = time.time() + last_upload_az = upload_lines[-1].az + + # Track the timestamp of the current upload. + last_uploaded_timestamp = upload_lines[-1].timestamp + + if point_prov.is_empty() and free_positions >= FULL_STACK - 1: + if mode == 'stop': + if wait_stop_timeout is None: + self.log.info('Stack is empty; waiting for settling...') + wait_stop_timeout = now + 20. + elif now > wait_stop_timeout: + self.log.warn('Graceful stop condition not met in a timely fashion.') + mode = 'abort' + # Await safe exit condition. + pos_ok = last_upload_az is None or ( + abs(az_state['pos'] - last_upload_az) < 0.01) + vel_ok = abs(abs(az_state['vel']) < .01) + if pos_ok and vel_ok: + break + else: + self.log.warn('Somehow ran out of points!') + break + + yield dsleep(LOOP_STEP) + + # Go to Stop mode? + # yield self.acu_control.stop() + + # Wait a couple more seconds and clear the stack. + yield dsleep(2) + yield self.acu_control.http.Command('DataSets.CmdTimePositionTransfer', + 'Clear Stack') + + if mode == 'abort': + if unabort_failure and prog_track_err: + return True, 'Problems on shutdown but close enough.' + return False, 'Problems during scan' + return True, f'Scan ended. {stop_message}' + + # + # Sun Safety Monitoring and Active Avoidance + # + + def _reset_sun_params(self): + """Resets self.sun_params based on the instance defaults, and + motion_limits. This must be called at least once, on startup, + to set up Sun monitoring and avoidance properly. + + """ + # Set up sun_params data structure. + _p = { + # Global enable (but see "disable_until"). + 'active_avoidance': False, + + # Can be set to a timestamp, in which case Sun Avoidance + # is disabled until that time has passed. + 'disable_until': 0, + + # Flag for indicating normal motions should be blocked + # (Sun Escape is active). + 'block_motion': False, + + # Flag for update_sun to indicate Sun map needs recomputed + 'recompute_req': False, + + # If set, should be a timestamp at which escape_sun_now + # will be initiated. + 'next_drill': None, + + # Parameters for the Sun Safety Map computation. + 'safety_map_kw': { + 'sun_time_shift': 0, + }, + + # Avoidance policy, for use in avoidance decisions. + 'policy': {}, + } + + # Active avoidance? + _p['active_avoidance'] = self.sun_config['enabled'] + + # Avoidance requires platform limits and move policies + _p['policy'].update({ + 'min_az': self.motion_limits['azimuth']['lower'], + 'max_az': self.motion_limits['azimuth']['upper'], + 'min_el': self.motion_limits['elevation']['lower'], + 'max_el': self.motion_limits['elevation']['upper'], + 'axes_sequential': self.motion_limits.get('axes_sequential', False), + }) + + # User parameters defining the danger zone, and escape + # policies. This list should be kept consistent with the + # preamble docs in avoidance.py. + for k in [ + 'exclusion_radius', + 'min_sun_time', + 'response_time', + 'el_horizon', + 'el_dodging', + 'axes_sequential', + ]: + if k in self.sun_config: + _p['policy'][k] = self.sun_config[k] + + self.sun_params = _p + + def _get_sun_policy(self, key): + now = time.time() + p = self.sun_params + active = (p['active_avoidance'] and (now >= p['disable_until'])) + + if key == 'motion_blocked': + return active and p['block_motion'] + elif key == 'sunsafe_moves': + return active + elif key == 'escape_enabled': + return active + elif key == 'map_valid': + return (self.sun is not None + and self.sun.base_time is not None + and self.sun.base_time <= now + and self.sun.base_time >= now - 2 * SUN_MAP_REFRESH) + else: + return p[key] + + @ocs_agent.param('_') + @inlineCallbacks + def monitor_sun(self, session, params): + """monitor_sun() + + **Process** - Monitors and reports the position of the Sun; + maintains a Sun Safety Map for verifying that moves and scans + are Sun-safe; triggers a "Sun escape" if the boresight enters + an unsafe position. + + The monitoring functions are always active (as long as this + process is running). But the escape functionality must be + explicitly enabled (through the default platform + configuration, command line arguments, or the update_sun + task). + + Session data looks like this:: + + { + "timestamp": 1698848292.5579932, + "active_avoidance": false, + "disable_until": 0, + "block_motion": false, + "recompute_req": false, + "next_drill": null, + "safety_map_kw": { + "sun_time_shift": 0 + }, + "policy": { + "exclusion_radius": 20, + "el_horizon": 10, + "min_sun_time": 1800, + "response_time": 7200, + "min_az": -90, + "max_az": 450, + "min_el": 18.5, + "max_el": 90 + }, + "sun_pos": { + "map_exists": true, + "map_is_old": false, + "map_ref_time": 1698848179.1123455, + "platform_azel": [ + 90.0158, + 20.0022 + ], + "sun_radec": [ + 216.50815789438036, + -14.461844389380719 + ], + "sun_azel": [ + 78.24269024936028, + 60.919554369324096 + ], + "sun_down": false, + "sun_dist": 41.75087242151837, + "sun_safe_time": 71760, + "platform_down": false + }, + "avoidance": { + "safety_unknown": false, + "warning_zone": false, + "danger_zone": false, + "escape_triggered": false, + "escape_active": false, + "last_escape_time": 0, + "sun_is_real": true, + "platform_is_moveable": true + } + } + + In debugging, the Sun position might be falsified. In that + case the "sun_pos" subtree will contain an entry like this:: + + "WARNING": "Fake Sun Position is in use!", + + and "avoidance": "sun_is_real" will be set to false. (No + other functionality is changed when using a falsified Sun + position; flags are computed and actions decided based on the + false position.) + + """ + def _get_sun_map(): + # To run in thread ... + start = time.time() + new_sun = avoidance.SunTracker(policy=self.sun_params['policy'], + **self.sun_params['safety_map_kw']) + return new_sun, time.time() - start + + def _notify_recomputed(result): + nonlocal req_out + new_sun, compute_time = result + self.log.info('(Re-)computed Sun Safety Map (took %.1fs)' % + compute_time) + self.sun = new_sun + req_out = False + + def lookup(keys, tree): + if isinstance(keys, str): + keys = [keys] + if len(keys) == 0: + if isinstance(tree, (bool, np.bool_)): + return int(tree) + return tree + return lookup(keys[1:], tree[keys[0]]) + + # Feed -- unpack some elements of session.data + feed_keys = { + 'sun_avoidance': ('active_avoidance', int), + 'sun_az': (('sun_pos', 'sun_azel', 0), float), + 'sun_el': (('sun_pos', 'sun_azel', 1), float), + 'sun_dist': (('sun_pos', 'sun_dist'), float), + 'sun_safe_time': (('sun_pos', 'sun_safe_time'), float), + 'sun_down': (('sun_pos', 'sun_down'), int), + 'platform_down': (('sun_pos', 'platform_down'), int), + } + for k in ['warning_zone', 'danger_zone', + 'escape_triggered', 'escape_active']: + feed_keys[f'sun_{k}'] = (('avoidance', k), int) + feed_pacer = Pacemaker(.1) + + req_out = False + self.sun = None + last_panic = 0 + + session.data = {} + + while session.status in ['starting', 'running']: + new_data = { + 'timestamp': time.time(), + } + new_data.update(self.sun_params) + + try: + az, el = [self.data['status']['summary'][f'{ax}_current_position'] + for ax in ['Azimuth', 'Elevation']] + if az is None or el is None: + raise KeyError + except KeyError: + az, el = None, None + + try: + moveable = [bool(self.data['status']['platform_status'][k]) + for k in ['Safe_mode', 'Remote_mode']] + moveable = (not moveable[0]) and moveable[1] + except KeyError: + moveable = False + + no_map = self.sun is None + old_map = (not no_map + and self.sun._now() - self.sun.base_time > SUN_MAP_REFRESH) + do_recompute = ( + not req_out + and (no_map or old_map or self.sun_params['recompute_req']) + ) + + if do_recompute: + req_out = True + self.sun_params['recompute_req'] = False + threads.deferToThread(_get_sun_map).addCallback( + _notify_recomputed) + + new_data.update({ + 'sun_pos': { + 'map_exists': not no_map, + 'map_is_old': old_map, + 'map_ref_time': None if no_map else self.sun.base_time, + 'platform_azel': (az, el), + }, + }) + + # Flags for unsafe position. + safety_known, danger_zone, warning_zone = False, False, False + # Flag for time shift during debugging. + sun_is_real = True + if self.sun is not None: + info = self.sun.get_sun_pos(az, el) + sun_is_real = ('WARNING' not in info) + new_data['sun_pos'].update(info) + if az is not None: + t = self.sun.check_trajectory([az], [el])['sun_time'] + new_data['sun_pos']['sun_safe_time'] = t if t > 0 else 0 + safety_known = True + danger_zone = (t < self.sun_params['policy']['min_sun_time']) + warning_zone = (t < self.sun_params['policy']['response_time']) + + # Has a drill been requested? + drill_req = (self.sun_params['next_drill'] is not None + and self.sun_params['next_drill'] <= time.time()) + + # Should we be doing a escape_sun_now? + panic_for_real = safety_known and danger_zone and self._get_sun_policy('escape_enabled') + panic_for_fun = drill_req + + # Is escape_sun_now task running? + ok, msg, _session = self.agent.status('escape_sun_now') + escape_in_progress = (_session.get('status', 'done') != 'done') + + # Block motion as long as we are not sun-safe. + self.sun_params['block_motion'] = (panic_for_real or escape_in_progress) + + new_data['avoidance'] = { + 'safety_unknown': not safety_known, + 'warning_zone': warning_zone, + 'danger_zone': danger_zone, + 'escape_triggered': panic_for_real, + 'escape_active': escape_in_progress, + 'last_escape_time': last_panic, + 'sun_is_real': sun_is_real, + 'platform_is_moveable': moveable, + } + + if (panic_for_real or panic_for_fun): + now = time.time() + # Different retry conditions for moveable / not moveable + if moveable and (now - last_panic > 60.): + # When moveable, only attempt escape every 1 minute. + self.log.warn('monitor_sun is requesting escape_sun_now.') + self.agent.start('escape_sun_now') + last_panic = now + elif not moveable and (now - last_panic > 600.): + # When not moveable, only print complaint message every 10 minutes. + self.log.warn('monitor_sun cannot request escape_sun_now, ' + 'because platform not moveable by remote!') + last_panic = now + + # Regardless, clear the drill indicator -- we don't + # want that to occur randomly later. + self.sun_params['next_drill'] = None + + # Update session. + session.data.update(new_data) + + # Publish -- only if we have the sun pos though.. + if sun_is_real and safety_known and feed_pacer.next_sample <= time.time(): + feed_pacer.sleep() # should be instantaneous, just update counters + block = {'timestamp': time.time(), + 'block_name': 'sun0', + 'data': {}} + for kshort, (keys, cast) in feed_keys.items(): + block['data'][kshort] = cast(lookup(keys, new_data)) + self.agent.publish_to_feed('sun', block) + + yield dsleep(1) + + return True, 'monitor_sun exited cleanly.' + + @ocs_agent.param('reset', type=bool, default=None) + @ocs_agent.param('enable', type=bool, default=None) + @ocs_agent.param('temporary_disable', type=float, default=None) + @ocs_agent.param('escape', type=bool, default=None) + @ocs_agent.param('exclusion_radius', type=float, default=None) + @ocs_agent.param('shift_sun_hours', type=float, default=None) + def update_sun(self, session, params): + """update_sun(reset=None, enable=None, temporary_disable=None, \ + escape=None, exclusion_radius=None, \ + shift_sun_hours=None) + + **Task** - Update Sun monitoring and avoidance parameters. + + All arguments are optional. + + Args: + reset (bool): If True, reset all sun_params to the platform + defaults. (The "defaults" includes any overrides + specified on Agent command line.) + enable (bool): If True, enable active Sun avoidance. If + avoidance was temporarily disabled it is re-enabled. If + False, disable active Sun avoidance (non-temporarily). + temporary_disable (float): If set, disable Sun avoidance for + this number of seconds. + escape (bool): If True, schedule an escape drill for 10 + seconds from now. + exclusion_radius (float): If set, change the FOV radius + (degrees), for Sun avoidance purposes, to this number. + shift_sun_hours (float): If set, compute the Sun position as + though it were this many hours in the future. This is for + debugging, testing, and work-arounds. Pass zero to + cancel. + + """ + do_recompute = False + now = time.time() + self.log.info('update_sun params: {params}', + params={k: v for k, v in params.items() + if v is not None}) + + if params['reset']: + self._reset_sun_params() + do_recompute = True + if params['enable'] is not None: + self.sun_params['active_avoidance'] = params['enable'] + self.sun_params['disable_until'] = 0 + if params['temporary_disable'] is not None: + self.sun_params['disable_until'] = params['temporary_disable'] + now + if params['escape']: + self.log.warn('Setting sun escape drill to start in 10 seconds.') + self.sun_params['next_drill'] = now + 10 + if params['exclusion_radius'] is not None: + self.sun_params['policy']['exclusion_radius'] = \ + params['exclusion_radius'] + do_recompute = True + if params['shift_sun_hours'] is not None: + self.sun_params['safety_map_kw']['sun_time_shift'] = \ + params['shift_sun_hours'] * 3600 + do_recompute = True + + if do_recompute: + self.sun_params['recompute_req'] = True + + return True, 'Params updated.' + + @ocs_agent.param('_') + @inlineCallbacks + def escape_sun_now(self, session, params): + """escape_sun_now() + + **Task** - Take control of the platform, and move it to a + Sun-Safe position. This will abort/stop any current go_to or + generate_scan, identify the safest possible path to North or + South (without changing elevation, if possible), and perform + the moves to get there. + + """ + state = 'init' + last_state = state + + session.data = {'state': state, + 'timestamp': time.time()} + + while session.status in ['starting', 'running'] and state not in ['escape-done']: + az, el = [self.data['status']['summary'][f'{ax}_current_position'] + for ax in ['Azimuth', 'Elevation']] + + if state == 'init': + state = 'escape-abort' + elif state == 'escape-abort': + # raise stop flags and issue stop on motion ops + for op in ['generate_scan', 'go_to']: + self.agent.stop(op) + self.agent.abort(op) + state = 'escape-wait-idle' + timeout = 30 + elif state == 'escape-wait-idle': + for op in ['generate_scan', 'go_to']: + ok, msg, _session = self.agent.status(op) + if _session.get('status', 'done') != 'done': + break + else: + state = 'escape-move' + last_move = time.time() + timeout -= 1 + if timeout < 0: + state = 'escape-stop' + elif state == 'escape-stop': + yield self._stop() + state = 'escape-move' + last_move = time.time() + elif state == 'escape-move': + self.log.info('Getting escape path for (t, az, el) = ' + '(%.1f, %.3f, %.3f)' % (time.time(), az, el)) + escape_path = self.sun.find_escape_paths(az, el) + if escape_path is None: + self.log.error('Failed to find acceptable path; using ' + 'failsafe (South, low el).') + legs = [(180., max(self.sun_params['policy']['min_el'], 0))] + else: + legs = escape_path['moves'].nodes[1:] + self.log.info('Escaping to (az, el)={pos} ({n} moves)', + pos=legs[-1], n=len(legs)) + state = 'escape-move-legs' + leg_d = None + elif state == 'escape-move-legs': + def _leg_done(result): + nonlocal state, last_move, leg_d + all_ok, msg = result + if not all_ok: + self.log.error('Leg failed.') + # Recompute the escape path. + if time.time() - last_move > 60: + self.log.error('Too many failures -- giving up for now') + state = 'escape-done' + else: + state = 'escape-move' + else: + leg_d = None + last_move = time.time() + if not self._get_sun_policy('escape_enabled'): + state = 'escape-done' + if leg_d is None: + if len(legs) == 0: + state = 'escape-done' + else: + leg_az, leg_el = legs.pop(0) + leg_d = self._go_to_axes(session, az=leg_az, el=leg_el, + clear_faults=True) + leg_d.addCallback(_leg_done) + elif state == 'escape-done': + # This block won't run -- loop will exit. + pass + + session.data['state'] = state + if state != last_state: + self.log.info('escape_sun_now: state is now "{state}"', state=state) + last_state = state + yield dsleep(1) + + return True, "Exited." + + def _check_scan_sunsafe(self, az1, az2, el, v_az, a_az): + """This will return True if active avoidance is disabled. If active + avoidance is enabled, then it will only return true if the + planned scan seems to currently be sun-safe. + + """ + if not self._get_sun_policy('sunsafe_moves'): + return True, 'Sun-safety checking is not enabled.' + + if not self._get_sun_policy('map_valid'): + return False, 'Sun Safety Map not computed or stale; run the monitor_sun process.' + + # Include a bit of buffer for turn-arounds. + az1, az2 = min(az1, az2), max(az1, az2) + turn = v_az**2 / a_az + az1 -= turn + az2 += turn + n = max(2, int(np.ceil((az2 - az1) / 1.))) + azs = np.linspace(az1, az2, n) + + info = self.sun.check_trajectory(azs, azs * 0 + el) + safe = info['sun_time'] >= self.sun_params['policy']['min_sun_time'] + if safe: + msg = 'Scan is safe for %.1f hours' % (info['sun_time'] / 3600) + else: + msg = 'Scan will be unsafe in %.1f hours' % (info['sun_time'] / 3600) + + return safe, msg + + def _get_sunsafe_moves(self, target_az, target_el): + """Given a target position, find a Sun-safe way to get there. This + will either be a direct move, or else an ordered slew in az + before el (or vice versa). + + If target_az or target_el are None, they are taken to be the + current axis position. + + Returns (legs, msg). If legs is None, it indicates that no + Sun-safe path could be found; msg is an error message. If a + path can be found, the legs is a list of intermediate move + targets, ``[(az0, el0), (az1, el1) ...]``, terminating on + ``(target_az, target_el)``. msg is None in that case. The + first position (az0, el0) is the current position of the + platform. + + In the case that the platform is already at the target + position, the returned list will still have 2 entries. + + When Sun avoidance is not enabled, this function returns as + though the direct path to the target is a safe one (though + axes_sequential=True may cause an intermediate step to be + added). + + """ + # Get current position. + (az0, el0), msg = self._current_azel() + if az0 is None: + return None, msg + + if target_az is None: + target_az = az0 + if target_el is None: + target_el = el0 + + if not self._get_sun_policy('sunsafe_moves'): + if self.motion_limits.get('axes_sequential'): + # Move in az first, then el. + return [(target_az, el0), (target_az, target_el)], None + return [(az0, el0), (target_az, target_el)], None + + if not self._get_sun_policy('map_valid'): + return None, 'Sun Safety Map not computed or stale; run the monitor_sun process.' + + # Check the target position and block it outright. + if self.sun.check_trajectory([target_az], [target_el])['sun_time'] <= 0: + return None, 'Requested target position is not Sun-Safe.' + + moves = self.sun.analyze_paths(az0, el0, target_az, target_el) + move, decisions = self.sun.select_move(moves) + if move is None: + return None, 'No Sun-Safe moves could be identified!' + + legs = list(move['moves'].nodes) + if len(legs) == 1: + # Pad to two entries. + return [legs[0], legs[0]], None + return legs, None + + # + # HWP State Safety + # + + @inlineCallbacks + def monitor_hwp(self, session, params): + """monitor_hwp() + + **Process** - Monitors the state of a HWP, by querying a + HWPSupervisor's ``monitor`` Process session data. Assesses + what motions are permitted, given the HWP state. + + session.data example:: + + { + "interlocks_config": { + "configured": true, + "enabled": true, + "instance_id": "hwp-supervisor", + "limit_sun_avoidance": true, + "tolerance": 0.1, + }, + "supervisor_data": { + "timestamp": 1744692973.185377, + "ok": true, + "err_msg": "", + "_grip_brakes": [1, 1, 1], + "_grip_state": "ungripped", + "_is_spinning": true, + "_target_freq": 2.1, + "grip_state": "ungripped", + "spin_state": "spinning", + "request_block_motion": null, + "request_block_motion_timestamp": null + }, + "allowed": { + "el": [true, [40, 70], [ + [40, 70], + ], + "az": [true, [40, 70], [ + [40, 70], + ], + "third": [false, null, []], + } + } + + """ + if not self.hwp_rules.configured: + session.data = { + 'interlocks_config': self.hwp_rules.encoded(basic=True), + } + return False, "HWP Interlocks not configured - monitoring blocked." + + def _update_sun_lims(el_range): + if el_range is None: + el_range = (self.motion_limits['elevation']['lower'], + self.motion_limits['elevation']['upper']) + # Is this a change to sun policy? + if tuple(el_range) != (self.sun_params['policy']['min_el'], + self.sun_params['policy']['max_el']): + self.sun_params['policy']['min_el'] = el_range[0] + self.sun_params['policy']['max_el'] = el_range[1] + self.sun_params['recompute_req'] = True + + pacer = Pacemaker(1.) + last_enabled = False + hwp_supervisor = self.hwp_rules.get_client() + + while session.status == 'running': + new_data = yield threads.deferToThread(hwp_supervisor.update) + new_sd = { + 'interlocks_config': self.hwp_rules.encoded(basic=True), + 'supervisor_data': new_data, + } + (_, el), msg = self._current_azel() + allowed = self.hwp_rules.test_range( + (None if el is None else (el, el)), + new_data.get('grip_state'), + new_data.get('spin_state')) + new_sd['allowed'] = allowed + + session.data = new_sd + self.data['hwp'] = new_data + + if self.hwp_rules.enabled: + if self.hwp_rules.limit_sun_avoidance and el is not None: + tol = self.hwp_rules.tolerance + if allowed['el'][0]: + _update_sun_lims(allowed['el'][1]) + else: + # If we're in a forbidden spot ... just try to + # keep it at this el for now, and hopefully + # HWPSupervisor will improve things soon. + _update_sun_lims((el - tol, el + tol)) + elif last_enabled and self.hwp_rules.limit_sun_avoidance: + # Restore the sun_params default elevation range. + _update_sun_lims(None) + + last_enabled = self.hwp_rules.enabled + yield pacer.dsleep() + return True, "Bye." + + @ocs_agent.param('enable', type=bool, default=None) + def update_hwp(self, session, params): + """update_hwp(enable=None) + + **Task** - Update HWP state monitoring and safety parameters. + + All arguments are optional. + + Args: + enable (bool): If True, enable HWP state checks. If False, + disable HWP state checks (non-temporarily). + + """ + self.log.info('update_hwp params: {params}', + params={k: v for k, v in params.items() + if v is not None}) + + if not self.hwp_rules.configured: + return False, 'HWP interlocks not configured in config file.' + + if params['enable'] is not None: + self.hwp_rules.enabled = params['enable'] + + return True, 'Params updated.' + + def _check_hwpsafe(self, el1, el2, axes, hwp_data=None): + """Checks whether certain axis motions are permitted, over a + certain range of elevations. + + Args: + el1, el2: elevation range over which the checks should be + considered. + axes: list of axes to check for permission on (taken from + 'el', 'az', 'third'). + hwp_data: dict from which to get grip_state and spin_state; + if not provided, uses self.data['hwp']. + + Returns: + motions_permitted (bool): whether all requested axes are + permitted to move, given the HWP state and el range. + message (str): helpful text. + + Note that when hwp_rules are not enabled, motions are + generally permitted by this function. + + See additional helper functions, _check_hwpsafe_here and + _check_hwpsafe_legs. + + """ + if not self.hwp_rules.enabled: + return (True, "HWP monitoring is disabled.") + + # Grab a self-consistent copy... + if hwp_data is None: + hwp_data = self.data['hwp'] + + # Check staleness + if time.time() - hwp_data.get('timestamp', 0) > 10: + return False, "HWP monitoring dataset is stale; cannot validate move." + + # Check it. + state_args = { + 'el_range': [el1, el2], + 'grip_state': hwp_data.get('grip_state'), + 'spin_state': hwp_data.get('spin_state'), + } + axes_ok = self.hwp_rules.test_range(**state_args) + for ax in axes: + if not axes_ok[ax][0]: + return (False, (f"Motion in {ax} not permitted due to HWP rules " + f"for {state_args}.")) + return axes_ok, 'All requested axes pass the HWP rules.' + + def _check_hwpsafe_here(self, axes): + """Check whether motion in ``axes`` is permitted, at the + present elevation and hwp state. + + """ + if not self.hwp_rules.enabled: + return (True, "HWP monitoring is disabled.") + + (_, el), msg = self._current_azel() + if el is None: + return False, f'HWP safety could not be ensured: {msg}' + return self._check_hwpsafe(el, el, axes) + + def _check_hwpsafe_legs(self, legs): + """Check whether motions specified by legs (list of (az, el) + positions) are permitted, given the current HWP state. + + """ + if not self.hwp_rules.enabled: + return (True, "HWP monitoring is disabled.") + + hwp_data = self.data['hwp'] + for (az1, el1), (az2, el2) in zip(legs[:-1], legs[1:]): + axes = [] + if abs(el2 - el1) > self.hwp_rules.tolerance: + axes.append('el') + if abs(az2 - az1) > self.hwp_rules.tolerance: + axes.append('az') + ok, msg = self._check_hwpsafe(el1, el2, axes, hwp_data=hwp_data) + if not ok: + return False, msg + return (True, "All moves passed HWP safety checks.") + + # + # Exercise! + # + + @ocs_agent.param('action', choices=['open', 'close']) + @inlineCallbacks + def set_shutter(self, session, params): + """set_shutter(action) + + **Task** - Request a (LAT) shutter action, wait for it to + complete or fail. + + Args: + action (str): 'open' or 'close' + + Notes: + If the shutter reads out as in the requested state already, + then no action is taken and the task will quickly return as + succeeded. + + """ + def log(msg): + session.add_message(msg) + + log(f'requested action={params["action"]}') + + if self.data['status'].get('shutter', {}).get('Shutter_open') is None: + return False, 'Shutter dataset does not seem to be populating.' + + if params['action'] == 'open': + dset_cmd = 'ShutterOpen' + desired_key, undesired_key = 'Shutter_open', 'Shutter_closed' + else: + dset_cmd = 'ShutterClose' + desired_key, undesired_key = 'Shutter_closed', 'Shutter_open' + + # This just needs to be longer than 1 loop time. + STATE_WAIT = 5. + + # Shutter typically closes in ~45 seconds. But in early tests + # it sometimes takes an additional 45 seconds for moving->0. + MOVING_WAIT = 120. + + state = 'init' + session.data = {'state': state, + 'timestamp': time.time()} + + while (session.status in ['starting', 'running'] + and state not in ['done', 'error']): + last_state = state + now = time.time() + + az, el = [self.data['status']['summary'][f'{ax}_current_position'] + for ax in ['Azimuth', 'Elevation']] + shutter = self.data['status']['shutter'] + + for bad_key in ['Shutter_timeout', 'Shutter_failure']: + if shutter[bad_key]: + state = 'error' + message = f'Detected error state: {bad_key}' + + if state in ['error', 'done']: + pass + + elif state == 'init': + if shutter[desired_key] and not shutter[undesired_key]: + state = 'done' + message = f'Shutter already reporting state={desired_key}' + else: + # Issue the command + result = yield self.acu_control.Command(self.datasets['shutter'], dset_cmd) + if result in OK_RESPONSES: + state = 'wait-moving' + timeout = time.time() + STATE_WAIT + else: + state = 'error' + message = 'Failed to issue shutter command.' + + elif state == 'wait-moving': + if now > timeout: + state = 'error' + message = 'Shutter failed to start moving.' + elif shutter['Shutter_moving']: + state = 'wait-stopped' + timeout = now + MOVING_WAIT + + elif state == 'wait-stopped': + if now > timeout: + state = 'error' + message = 'Shutter will not stop moving.' + elif not shutter['Shutter_moving']: + state = 'wait-final' + timeout = now + STATE_WAIT + + elif state == 'wait-final': + if now > timeout: + state = 'error' + message = 'Shutter failed to reach final expected state.' + elif shutter[desired_key] and not shutter[undesired_key]: + state = 'done' + message = 'Shutter move successful.' + + else: + message = f'invalid state: {state}' + state = 'error' + + session.data['state'] = state + if state != last_state: + log(f'set_shutter: state is now "{state}"') + last_state = state + yield dsleep(1) + + if state == 'done': + return True, message + elif state == 'error': + return False, message + + return False, 'Aborted in state {state}' + + @ocs_agent.param('starting_index', type=int, default=0) + def exercise(self, session, params): + """exercise(starting_index=0) + + **Process** - Run telescope platform through some pre-defined motions. + + For historical reasons, this does not command agent functions + internally, but rather instantiates a *client* and calls the + agent as though it were an external entity. + + """ + # Load the exercise plan. + plans = yaml.safe_load(open(self.exercise_plan, 'rb')) + super_plan = exercisor.get_plan(plans[self.acu_config_name]) + + session.data = { + 'timestamp': time.time(), + 'iterations': 0, + 'attempts': 0, + 'errors': 0, + } + + def _publish_activity(activity): + msg = { + 'block_name': 'A', + 'timestamp': time.time(), + 'data': {'activity': activity}, + } + self.agent.publish_to_feed('activity', msg) + + def _publish_error(delta_error=1): + session.data['errors'] += delta_error + msg = { + 'block_name': 'B', + 'timestamp': time.time(), + 'data': {'error_count': session.data['errors']} + } + self.agent.publish_to_feed('activity', msg) + + def _exit_now(ok, msg): + _publish_activity('idle') + self.agent.feeds['activity'].flush_buffer() + return ok, msg + + _publish_activity('idle') + _publish_error(0) + + target_instance_id = self.agent.agent_address.split('.')[-1] + exercisor.set_client(target_instance_id, self.agent.site_args) + settings = super_plan.get('settings', {}) + + plan_idx = 0 + plan_t = None + + for plan in super_plan['steps']: + plan['iter'] = iter(plan['driver']) + + while session.status in ['running']: + time.sleep(1) + session.data['timestamp'] = time.time() + session.data['iterations'] += 1 + + # Fault maintenance + faults = exercisor.get_faults() + if faults['safe_lock']: + self.log.info('SAFE lock detected, exiting') + return _exit_now(False, 'Exiting on SAFE lock.') + + if faults['local_mode']: + self.log.info('LOCAL mode detected, exiting') + return _exit_now(False, 'Exiting on LOCAL mode.') + + if faults['az_summary']: + if session.data['attempts'] > 5: + self.log.info('Too many az summary faults, exiting.') + return _exit_now(False, 'Too many az summary faults.') + session.data['attempts'] += 1 + self.log.info('az summary fault -- trying to clear.') + exercisor.clear_faults() + time.sleep(10) + continue + + session.data['attempts'] = 0 + + # Plan execution + active_plan = super_plan['steps'][plan_idx] + if plan_t is None: + plan_t = time.time() + + now = time.time() + if now - plan_t > active_plan['duration']: + plan_idx = (plan_idx + 1) % len(super_plan['steps']) + plan_t = None + continue + + if settings.get('use_boresight'): + bore_target = random.choice(settings['boresight_opts']) + self.log.info(f'Setting boresight={bore_target}...') + _publish_activity('boresight') + exercisor.set_boresight(bore_target) + + plan, info = next(active_plan['iter']) + + self.log.info('Launching next scan. plan={plan}', plan=plan) + + _publish_activity(active_plan['driver'].code) + ok = None + if 'targets' in plan: + exercisor.steps(**plan) + else: + exercisor.scan(**plan) + _publish_activity('idle') + + if ok is None: + self.log.info('Scan completed without error.') + else: + self.log.info(f'Scan exited with error: {ok}') + _publish_error() + + return _exit_now(True, "Stopped run process") + + +def add_agent_args(parser_in=None): + if parser_in is None: + parser_in = argparse.ArgumentParser() + pgroup = parser_in.add_argument_group('Agent Options') + pgroup.add_argument("--acu-config") + pgroup.add_argument("--no-processes", action='store_true', + default=False) + pgroup.add_argument("--ignore-axes", choices=['el', 'az', 'third', 'none'], + nargs='+', help="One or more axes to ignore.") + pgroup.add_argument("--disable-idle-reset", action='store_true', + help="Disable idle_reset, even for LAT.") + pgroup.add_argument("--min-el", type=float, + help="Override the minimum el defined in platform config.") + pgroup.add_argument("--max-el", type=float, + help="Override the maximum el defined in platform config.") + pgroup.add_argument("--disable-sun-avoidance", action='store_true', + help="Disable Sun Avoidance before startup.") + pgroup.add_argument("--disable-hwp-interlocks", action='store_true', + help="Disable HWP interlocks before startup.") + + return parser_in + + +def main(args=None): + parser = add_agent_args() + args = site_config.parse_args(agent_class='ACUAgent', + parser=parser, + args=args) + + agent, runner = ocs_agent.init_site_agent(args) + _ = ACUAgent(agent, args.acu_config, + startup=not args.no_processes, + ignore_axes=args.ignore_axes, + disable_idle_reset=args.disable_idle_reset, + disable_sun_avoidance=args.disable_sun_avoidance, + disable_hwp_interlocks=args.disable_hwp_interlocks, + min_el=args.min_el, + max_el=args.max_el) + + runner.run(agent, auto_reconnect=True) + + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/pcs/agents/acu_interface/status_keys.py b/pcs/agents/acu_interface/status_keys.py new file mode 100644 index 0000000..aeb7884 --- /dev/null +++ b/pcs/agents/acu_interface/status_keys.py @@ -0,0 +1,163 @@ +# { +# "Elevation axis in stow position": false, +# "Elevation stow pins - status": 0, +# "AT Lock On": false, +# } + +status_fields = { + 'latp' : { + 'status_fields': { + 'summary': { + 'Time': 'Time', + 'Year': 'Year', + 'Azimuth Mode': 'Azimuth_mode', + 'Azimuth current position': 'Azimuth_current_position', + 'Azimuth current velocity': 'Azimuth_current_velocity', + 'Elevation Mode': 'Elevation_mode', + 'Elevation current position': 'Elevation_current_position', + 'Elevation current velocity': 'Elevation_current_velocity', + 'Qty of free program track stack positions': 'Free_upload_positions', + "Elevation axis in stow position": 'Elevation_axis_in_stow_position', + "Elevation stow pins - status": 'Elevation_stow_pins_status', + "AT Lock On": 'AT_Lock_On', + }, + 'position_errors': { + 'Azimuth average position error': 'Azimuth_avg_position_error', + 'Azimuth peak position error': 'Azimuth_peak_position_error', + 'Elevation average position error': 'Elevation_avg_position_error', + 'Elevation peak position error': 'Elevation_peak_position_error', + }, + 'axis_limits': { + # 'Azimuth CCW limit: 2nd emergency': 'AzCCW_HWlimit_2ndEmergency', + # 'Azimuth CCW limit: emergency': 'AzCCW_HWlimit_emergency', + # 'Azimuth CCW limit: operating': 'AzCCW_HWlimit_operating', + # 'Azimuth CCW limit: pre-limit': 'AzCCW_HWprelimit', + 'Azimuth CCW limit': 'AzCCW_SWlimit_operating', + # 'Azimuth CCW limit: pre-limit (ACU software limit)': 'AzCCW_SWprelimit', + # 'Azimuth CW limit: pre-limit (ACU software limit)': 'AzCW_SWprelimit', + 'Azimuth CW limit': 'AzCW_SWlimit_operating', + # 'Azimuth CW limit: pre-limit': 'AzCW_HWprelimit', + # 'Azimuth CW limit: operating': 'AzCW_HWlimit_operating', + # 'Azimuth CW limit: emergency': 'AzCW_HWlimit_emergency', + # 'Azimuth CW limit: 2nd emergency': 'AzCW_HWlimit_2ndEmergency', + # 'Elevation Down limit: emergency': 'ElDown_HWlimit_emergency', + # 'Elevation Down limit: operating': 'ElDown_HWlimit_operating', + # 'Elevation Down limit: pre-limit': 'ElDown_HWprelimit', + 'Elevation CCW limit': 'ElDown_SWlimit_operating', + # 'Elevation Down limit: pre-limit (ACU software limit)': 'ElDown_SWprelimit', + # 'Elevation Up limit: pre-limit (ACU software limit)': 'ElUp_SWprelimit', + 'Elevation CW limit': 'ElUp_SWlimit_operating', + # 'Elevation Up limit: pre-limit': 'ElUp_HWprelimit', + # 'Elevation Up limit: operating': 'ElUp_HWlimit_operating', + # 'Elevation Up limit: emergency': 'ElUp_HWlimit_emergency', + }, + 'axis_faults_errors_overages': { + 'Azimuth summary fault': 'Azimuth_summary_fault', + # 'Azimuth motion error': 'Azimuth_motion_error', + # 'Azimuth motor 1 overtemperature': 'Azimuth_motor1_overtemp', + # 'Azimuth motor 2 overtemperature': 'Azimuth_motor2_overtemp', + # 'Azimuth overspeed': 'Azimuth_overspeed', + # 'Azimuth regeneration resistor 1 overtemperature': 'Azimuth_resistor1_overtemp', + # 'Azimuth regeneration resistor 2 overtemperature': 'Azimuth_resistor2_overtemp', + # 'Azimuth overcurrent motor 1': 'Azimuth_motor1_overcurrent', + # 'Azimuth overcurrent motor 2': 'Azimuth_motor2_overcurrent', + 'Elevation summary fault': 'Elevation_summary_fault', + # 'Elevation motion error': 'Elevation_motion_error', + # 'Elevation motor 1 overtemp': 'Elevation_motor1_overtemp', + # 'Elevation overspeed': 'Elevation_overspeed', + # 'Elevation regeneration resistor 1 overtemperature': 'Elevation_resistor1_overtemp', + # 'Elevation overcurrent motor 1': 'Elevation_motor1_overcurrent', + }, + # 'axis_warnings': { + # 'Azimuth oscillation warning': 'Azimuth_oscillation_warning', + # 'Elevation oscillation warning': 'Elevation_oscillation_warning', + # }, + # 'axis_failures': { + # 'Azimuth servo failure': 'Azimuth_servo_failure', + # 'Azimuth brake 1 failure': 'Azimuth_brake1_failure', + # 'Azimuth brake 2 failure': 'Azimuth_brake2_failure', + # 'Azimuth breaker failure': 'Azimuth_breaker_failure', + # 'Azimuth amplifier power cylce interlock': 'Azimuth_power_cycle_interlock', + # 'Azimuth amplifier 1 failure': 'Azimuth_amp1_failure', + # 'Azimuth amplifier 2 failure': 'Azimuth_amp2_failure', + # 'Azimuth CAN bus amplifier 1 communication failure': 'Az_CANbus_amp1_comms_failure', + # 'Azimuth CAN bus amplifier 2 communication failure': 'Az_CANbus_amp2_comms_failure', + # 'Azimuth encoder failure': 'Azimuth_encoder_failure', + # 'Azimuth tacho failure': 'Azimuth_tacho_failure', + # 'Elevation servo failure': 'Elevation_servo_failure', + # 'Elevation brake 1 failure': 'Elevation_brake1_failure', + # 'Elevation breaker failure': 'Elevation_breaker_failure', + # 'Elevation amplifier power cylce interlock': 'Elevation_power_cycle_interlock', + # 'Elevation amplifier 1 failure': 'Elevation_amp1_failure', + # 'Elevation CAN bus amplifier 1 communication failure': 'El_CANbus_amp1_comms_failure', + # 'Elevation encoder failure': 'Elevation_encoder_failure', + # }, + 'axis_state': { + 'Azimuth computer disabled': 'Azimuth_computer_disabled', + 'Azimuth axis disabled': 'Azimuth_disabled', + 'Azimuth axis in stop': 'Azimuth_axis_stop', + 'Azimuth brakes released': 'Azimuth_brakes_released', + 'Azimuth stop at LCP': 'Azimuth_stop_LCP', + 'Azimuth power on': 'Azimuth_power_on', + # 'Azimuth AUX 1 mode selected': 'Azimuth_AUX1_mode_selected', + # 'Azimuth AUX 2 mode selected': 'Azimuth_AUX2_mode_selected', + # 'Azimuth immobile': 'Azimuth_immobile', + 'Elevation computer disabled': 'Elevation_computer_disabled', + 'Elevation axis disabled': 'Elevation_disabled', + 'Elevation axis in stop': 'Elevation_axis_stop', + 'Elevation brakes released': 'Elevation_brakes_released', + 'Elevation stop at LCP': 'Elevation_stop_LCP', + 'Elevation power on': 'Elevation_power_on', + # 'Elevation immobile': 'Elevation_immobile', + }, + # 'osc_alarms': { + # 'Azimuth oscillation alarm': 'Azimuth_oscillation_alarm', + # 'Elevation oscillation alarm': 'Elevation_oscillation_alarm', + # }, + 'commands': { + 'Azimuth commanded position': 'Azimuth_commanded_position', + 'Elevation commanded position': 'Elevation_commanded_position', + }, + # 'ACU_failures_errors': { + # 'General summary fault': 'General_summary_fault', + # 'Power failure (latched)': 'Power_failure_Latched', + # '24V power failure': 'Power_failure_24V', + # 'General Breaker failure': 'General_breaker_failure', + # 'Power failure (not latched)': 'Power_failure_NotLatched', + # 'Cabinet Overtemperature': 'Cabinet_overtemp', + # 'Ambient temperature low (operation inhibited)': 'Ambient_temp_TooLow', + # 'PLC-ACU interface error': 'PLC_interface_error', + # 'ACU fan failure': 'ACU_fan_failure', + # 'Cabinet undertemperature': 'Cabinet_undertemp', + # 'Time synchronisation error': 'Time_sync_error', + # 'ACU-PLC communication error': 'PLC_comms_error', + # 'Program Track position failure': 'ProgramTrack_position_failure', + # 'Start of Program Track too early': 'Track_start_too_early', + # 'Turnaround acceleration too high': 'Turnaround_accel_too_high', + # 'Turnaround time too short': 'Turnaround_time_too_short', + # }, + 'platform_status': { + 'PCU Operation': 'PCU_operation', + # 'Safe': 'Safe_mode', + # 'Lightning protection surge arresters': 'Lightning_protection_surge_arresters', + # 'Co-Moving Shield off': 'CoMoving_shield_off', + 'Remote': 'Remote_mode', + }, + # 'ACU_emergency': { + # 'E-Stop servo drive cabinet': 'EStop_servo_drive_cabinet', + # 'E-Stop service pole': 'EStop_service_pole', + # 'E-Stop Az movable': 'EStop_Az_movable', + # 'Key Switch Bypass Emergency Limit': 'Key_switch_bypass_emergency_limit', + # } + } + }, + + } + +def allkeys(platform_type): + all_keys = [] + pfd = status_fields[platform_type]['status_fields'] + for category in pfd.keys(): + for key in pfd[category].keys(): + all_keys.append(key) + return all_keys \ No newline at end of file diff --git a/pcs/agents/acu_interface/tests/__init__.py b/pcs/agents/acu_interface/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pcs/agents/acu_interface/tests/mock_tcs.py b/pcs/agents/acu_interface/tests/mock_tcs.py new file mode 100644 index 0000000..65e32a7 --- /dev/null +++ b/pcs/agents/acu_interface/tests/mock_tcs.py @@ -0,0 +1,75 @@ +"""Minimal mock of the FYST Go TCS REST surface for testing. + +A tiny FastAPI app mirroring the four endpoints the typed scan tasks drive (``POST +/api/v1/telescope/path``, ``/move-to``, ``/abort``, ``GET .../acu/status``), shaped +after ``observatory-control-system/api/app/telescope.py``. Each handler records the +JSON it received on ``app.state.recorder`` and returns 200, so a test can assert +what the task POSTed. The ``/api/v1/telescope`` prefix matches the PCS client's +``url_prefix``, so a client at this app's base URL hits the same routes as the real +OCS proxy. Used by the dispatch tests via FastAPI's ``TestClient`` (in-process, no +live server); import is guarded so the suite runs without FastAPI. +""" + +from typing import Literal + +from fastapi import FastAPI, Request +from pydantic import BaseModel, conlist + +PREFIX = "/api/v1/telescope" + + +class Recorder: + """Collects the requests the mock received, for test assertions.""" + + def __init__(self) -> None: + self.path_bodies: list[dict] = [] + self.move_to_bodies: list[dict] = [] + self.abort_count: int = 0 + + +class MoveToParameters(BaseModel): + azimuth: float + elevation: float + + +class PathParameters(BaseModel): + start_time: float + coordsys: Literal["Horizon", "ICRS"] + points: list[conlist(float, min_length=5, max_length=5)] + + +def create_mock_tcs() -> FastAPI: + """Build a mock-TCS FastAPI app with a fresh :class:`Recorder` on state.""" + app = FastAPI() + app.state.recorder = Recorder() + + @app.post(f"{PREFIX}/move-to") + async def move_to(param: MoveToParameters, request: Request): + request.app.state.recorder.move_to_bodies.append(param.model_dump()) + return {"status": "ok", "message": "moving"} + + @app.post(f"{PREFIX}/path") + async def path(param: PathParameters, request: Request): + request.app.state.recorder.path_bodies.append(param.model_dump()) + return {"status": "ok", "message": "path accepted"} + + @app.post(f"{PREFIX}/abort") + async def abort(request: Request): + request.app.state.recorder.abort_count += 1 + return {"status": "ok", "message": "aborted"} + + @app.get(f"{PREFIX}/acu/status") + async def acu_status(request: Request): + # Drained StatusGeneral8100-shaped dict (raw ACU aliases) so a + # completion-poll test can assert the normal-exit path: free stack + # at maxFreeProgramTrackStack-1 (9999) with zero axis velocities is the + # "scan complete (stack drained)" signal the constant_el_scan poll uses. + return { + "Qty of free program track stack positions": 9999, + "Azimuth current velocity": 0.0, + "Elevation current velocity": 0.0, + "Azimuth current position": 0.0, + "Elevation current position": 60.0, + } + + return app diff --git a/pcs/agents/acu_interface/tests/test_abort_stops_scan_process.py b/pcs/agents/acu_interface/tests/test_abort_stops_scan_process.py new file mode 100644 index 0000000..50cef19 --- /dev/null +++ b/pcs/agents/acu_interface/tests/test_abort_stops_scan_process.py @@ -0,0 +1,283 @@ +"""Execution coverage for the standalone ``abort`` Task stopping running scans. + +Background, the bug this guards +--------------------------------- +Two abort paths: + +* NORMAL, stopping a running typed-scan **Process** sets its session to + ``'stopping'``; the scan's dispatch loop sends its own Go TCS ``/abort`` and + RETURNS, releasing ``azel_lock``. Already works. +* The standalone ``abort`` **Task** used to ONLY send a bare ``/abort``. Go TCS + ``/abort`` cancel+Stops but does NOT ProgramTrackClear (telescope.go), so on an + early abort the stack-drained latch never fires; absent the ``'stopping'`` + signal the scan Process kept spinning, holding ``azel_lock`` to its scan-end + backstop. + +The fix: the ``abort`` Task ALSO stops every running typed scan Process +(``self.agent.stop`` over ``SCAN_PROCESS_OPS``) so an out-of-band abort cannot +strand the lock. + +What this file proves, by EXECUTING ``ACUAgent.abort`` +------------------------------------------------------- +1. A running scan's session is flipped to ``'stopping'`` AND the bare ``/abort`` + still goes out. +2. NEGATIVE CONTROL: the OLD body (``_safe_abort`` only) leaves the scan + ``'running'``. The lock would be stranded. +3. Safe with no scan running (every ``stop`` returns an error tuple, none raises). +4. Idempotent when a scan is already ``'stopping'`` (stopper not re-invoked). + +Harness: ``OCSAgent.stop`` only touches ``self.tasks`` / ``processes`` / +``sessions`` / ``access_config`` / ``op.{stopper,stopper_blocking,min_privs}``, so +we build a real ``OCSAgent`` via ``__new__`` + the REAL ``register_process`` and a +``'none'`` access policy (``min_privs=0`` + ``password=None`` clears the privilege +gate). ``abort`` calls ``self.agent.stop(op)`` on the reactor (MainThread) thread, +so ``OpSession.set_status`` takes its synchronous in-reactor path. No real +reactor needed; ``sync_reactor`` drives the one off-reactor ``_safe_abort`` call. +``OpSession`` gets a stub ``app`` with a ``.log`` (``app=None`` makes +``add_message`` raise). WSL-only: needs the real ``ocs.ocs_agent`` (shadowed on +Windows), so the module skip-guards like its siblings. +""" + +import importlib.util +import sys +import types + +import pytest + + +# --- Real ocs framework required (skip-guard exactly like the sibling files). +_AGENT = None +_IMPORT_ERR = None +try: + if importlib.util.find_spec("ocs.ocs_agent") is None: + raise ImportError("ocs.ocs_agent not importable (single-file REST client shadows it)") + import ocs + from ocs import access + from ocs.ocs_agent import OpSession + + from twisted.internet import defer + from twisted.internet.defer import Deferred + from twisted.python.failure import Failure + + from pcs.agents.acu_interface import agent as _AGENT +except Exception as exc: # pragma: no cover - environment-dependent + _IMPORT_ERR = exc + +pytestmark = pytest.mark.skipif( + _AGENT is None, reason=f"real ocs framework not importable ({_IMPORT_ERR})" +) + + +class _Log: + def info(self, *a, **k): + pass + + warn = warning = error = debug = critical = info + + +class _App: + """Stub WAMP app: ``OpSession.add_message`` reads ``app.log``.""" + + def __init__(self): + self.log = _Log() + + +class _FakeTCS: + """Records the ``/abort`` so the test can assert the bare abort still fires.""" + + def __init__(self): + self.aborted = False + + def abort(self): + self.aborted = True + return {"status": "ok"} + + +def _make_ocs_agent(): + """A minimal real ``OCSAgent`` carrying only what ``stop()`` reads.""" + from ocs.ocs_agent import OCSAgent + + a = OCSAgent.__new__(OCSAgent) + a.log = _Log() + a.tasks = {} + a.processes = {} + a.sessions = {} + a.access_config = access.agent_get_policy_default(None) + return a + + +def _simple_process_stop(session, params): + """Non-blocking stopper: flips a running session to 'stopping', returns a + Deferred (what _stop_helper expects for a non-blocking stopper).""" + if session.status == "running": + session.set_status("stopping") + return Deferred() + + +def _make_acu_agent(ocs_agent, tcs): + """A real ``ACUAgent`` with just enough state to EXECUTE ``abort``.""" + a = _AGENT.ACUAgent.__new__(_AGENT.ACUAgent) + a.log = _Log() + a.agent = ocs_agent + a._make_tcs = lambda: tcs # avoid needing acu_conf/certs + return a + + +def _register_scans(ocs_agent): + """Register the four typed scans as genuine non-blocking Processes.""" + for op in _AGENT.SCAN_PROCESS_OPS: + ocs_agent.register_process( + op, lambda s, p: None, _simple_process_stop, + blocking=False, min_privs=0) + + +@pytest.fixture +def sync_reactor(monkeypatch): + """Run the @inlineCallbacks ``abort`` body synchronously: ``deferToThread`` + runs the fn inline; ``dsleep`` is an already-fired no-op. ``self.agent.stop`` + runs directly on this (MainThread = reactor-context) thread, so + ``set_status`` takes its synchronous in-reactor path.""" + monkeypatch.setattr( + _AGENT, + "threads", + types.SimpleNamespace( + deferToThread=lambda fn, *a, **k: defer.maybeDeferred(fn, *a, **k) + ), + ) + monkeypatch.setattr(_AGENT, "dsleep", lambda *a, **k: defer.succeed(None)) + + +def _run(d): + out = [] + d.addBoth(out.append) + assert out, "Deferred did not fire synchronously" + res = out[0] + if isinstance(res, Failure): + res.raiseException() + return res + + +# --------------------------------------------------------------------------- +# 1. abort stops a running scan Process (releases the lock) AND sends /abort. +# --------------------------------------------------------------------------- + + +def test_abort_stops_running_scan_releases_lock(sync_reactor): + """A typed scan is running; ``abort`` flips its session to 'stopping' (the + signal its dispatch loop watches to release ``azel_lock``) AND still sends the + bare Go TCS /abort. EXECUTES ``ACUAgent.abort``.""" + ocs_agent = _make_ocs_agent() + _register_scans(ocs_agent) + app = _App() + running = OpSession(0, "source_scan", status="running", app=app) + ocs_agent.sessions["source_scan"] = running + + tcs = _FakeTCS() + agent = _make_acu_agent(ocs_agent, tcs) + session = OpSession(1, "abort", status="running", app=app) + + ok, msg = _run(agent.abort(session, {})) + + assert ok is True + assert running.status == "stopping", \ + "abort did not stop the running scan; azel_lock would be stranded" + assert tcs.aborted, "abort did not also send the bare Go TCS /abort" + + +# --------------------------------------------------------------------------- +# 2. NEGATIVE CONTROL: the OLD body (no stop loop) leaves the scan running. +# --------------------------------------------------------------------------- + + +def test_old_abort_leaves_scan_running(sync_reactor): + """Reconstruct the OLD ``abort`` body (only ``_safe_abort``, no stop loop) and + drive it against the same running scan: its session stays 'running', the + lock is stranded. Proves the fix is load-bearing, deterministically.""" + ocs_agent = _make_ocs_agent() + _register_scans(ocs_agent) + app = _App() + running = OpSession(0, "source_scan", status="running", app=app) + ocs_agent.sessions["source_scan"] = running + + tcs = _FakeTCS() + agent = _make_acu_agent(ocs_agent, tcs) + + @defer.inlineCallbacks + def _old_abort_body(): + # The pre-fix body: bare /abort only, no SCAN_PROCESS_OPS stop loop. + ok = yield _AGENT.threads.deferToThread(agent._safe_abort, agent._make_tcs()) + return (True, "old") if ok else (False, "old-fail") + + # _safe_abort needs a real send; give it the recording fake. + agent._safe_abort = lambda t: (t.abort() or True) + + ok, _ = _run(_old_abort_body()) + + assert ok is True + assert tcs.aborted, "control sanity: the bare /abort should still fire" + assert running.status == "running", \ + "OLD body unexpectedly stopped the scan; control is vacuous" + + +# --------------------------------------------------------------------------- +# 3. abort is safe with no scan running. +# --------------------------------------------------------------------------- + + +def test_abort_safe_with_no_scan_running(sync_reactor): + """No scan active (all sessions None): ``abort`` returns success, and every + ``self.agent.stop(op)`` returns an error tuple WITHOUT raising.""" + ocs_agent = _make_ocs_agent() + _register_scans(ocs_agent) # sessions all None + tcs = _FakeTCS() + agent = _make_acu_agent(ocs_agent, tcs) + app = _App() + session = OpSession(0, "abort", status="running", app=app) + + ok, msg = _run(agent.abort(session, {})) + + assert ok is True + assert tcs.aborted + # Sanity: stop() on a None-session op returns ERROR, no raise. + status, _, _ = ocs_agent.stop("pong_scan") + assert status == ocs.ERROR + + +# --------------------------------------------------------------------------- +# 4. abort is idempotent when a scan is already stopping. +# --------------------------------------------------------------------------- + + +def test_abort_idempotent_when_already_stopping(sync_reactor): + """A scan already 'stopping': ``abort`` must not re-invoke the stopper or + trip the forward-only status assert. The session stays 'stopping' and the op + returns the 'already stopping' error tuple.""" + ocs_agent = _make_ocs_agent() + _register_scans(ocs_agent) + app = _App() + already = OpSession(0, "daisy_scan", status="running", app=app) + already.set_status("stopping") # pre-set + ocs_agent.sessions["daisy_scan"] = already + + # Spy: ensure the stopper is NOT called again for the already-stopping op. + called = {"daisy_scan": 0} + orig = _simple_process_stop + + def _spy_stop(session, params): + called["daisy_scan"] += 1 + return orig(session, params) + + ocs_agent.processes["daisy_scan"].stopper = _spy_stop + + tcs = _FakeTCS() + agent = _make_acu_agent(ocs_agent, tcs) + session = OpSession(1, "abort", status="running", app=app) + + ok, msg = _run(agent.abort(session, {})) + + assert ok is True + assert already.status == "stopping" + assert called["daisy_scan"] == 0, \ + "stopper was re-invoked on an already-stopping op (not idempotent)" + status, m, _ = ocs_agent.stop("daisy_scan") + assert status == ocs.ERROR and "already" in m.lower() diff --git a/pcs/agents/acu_interface/tests/test_acu_read_concurrency.py b/pcs/agents/acu_interface/tests/test_acu_read_concurrency.py new file mode 100644 index 0000000..8c8a065 --- /dev/null +++ b/pcs/agents/acu_interface/tests/test_acu_read_concurrency.py @@ -0,0 +1,465 @@ +"""Empirical proof that the per-scan-client fix removes concurrent same-Session +access. + +Background, the bug this guards +--------------------------------- +``aculib.observatory_control_system`` holds ONE persistent ``requests.Session``, +which is NOT thread-safe. Before the fix the agent reused the single shared +``self.acu_read`` client from three places that can run at once on DIFFERENT +threads: + +* the always-on ``monitor`` Process (reactor thread; on by default, NOT gated by + ``azel_lock``), ``self.acu_read.get_status()``; +* a running scan's thread-pool calls in ``_dispatch_scan_process`` + (``deferToThread`` worker), ``move_to`` / ``get_status`` / ``scan_pattern``; +* the standalone ``abort`` task's ``/abort`` POST. + +So a scan's pool-thread GET/POST could enter the very Session the monitor's +reactor thread was mid-request on (highest-severity: abort-vs-scan). The fix +gives every scan (and the ``abort`` task) its OWN client via +:meth:`ACUAgent._make_tcs`, leaving only the monitor + the +(reactor-thread-serialized) ``_current_encoder_azel`` fallback on +``self.acu_read``. + +What this file proves, deterministically, not by luck +------------------------------------------------------- +1. ``_make_tcs`` returns a DISTINCT client whose ``.session`` differs from + ``self.acu_read.session`` and is fresh per call. +2. Driving the REAL paths, monitor's ``self.acu_read.get_status()`` vs a scan's + ``scan_pattern()`` / ``get_status()`` on a ``_make_tcs()`` client. No single + Session is entered from two threads at once. +3. CONTROL: with the OLD shared pattern (both threads on ``self.acu_read.session``) + the SAME Session IS entered by two threads at once, proving the hazard was + real and distinct Sessions remove it. + +Determinism: not "run two threads and hope they collide". We instrument +``requests.Session.request`` (the method both ``.get`` and ``.post`` funnel +through) with a per-Session re-entrancy counter recording the MAX threads +simultaneously inside that one Session, widening the window with a fixed in-call +sleep + a ``threading.Barrier`` that releases both threads together. Distinct +Sessions -> max concurrency 1 by construction; a forced-shared Session -> +deterministically both threads inside it. Assertions are on those recorded +maxima, not on whether a race corrupted anything. HTTP is answered by a tiny +in-process stdlib loopback server, so ``Session.request`` runs end-to-end (never +mocked, only wrapped). + +Skip-guarded on Windows: the agent imports the SO ``ocs`` framework, unimportable +on a dev box (a single-file OCS REST client named ``ocs`` shadows +``ocs.ocs_agent``). We stub ONLY the three ``ocs`` names the module imports +(matching ``test_dispatch_runner.py``), then import the agent; if that fails the +module skips so Windows stays green (the 2 genuine agent-import skips). +""" + +import importlib.util +import sys +import threading +import types + +import pytest +import requests + +# --- Import the agent exactly as test_dispatch_runner.py does (stub the three +# ocs names the module needs; skip the whole file if the agent is unimportable). +_AGENT = None +_IMPORT_ERR = None +try: + try: + _real_ocs = importlib.util.find_spec("ocs.ocs_agent") is not None + except (ImportError, ValueError, ModuleNotFoundError): + _real_ocs = False + if not _real_ocs: + import ocs # the single-file REST client + + ocs.ocs_agent = types.SimpleNamespace(param=lambda *a, **k: (lambda f: f)) + ocs.site_config = types.SimpleNamespace() + _otw = types.ModuleType("ocs.ocs_twisted") + _otw.TimeoutLock = type("TimeoutLock", (), {}) + sys.modules.setdefault("ocs.ocs_twisted", _otw) + ocs.ocs_twisted = _otw + + from pcs.agents.acu_interface import agent as _AGENT + from pcs.agents.acu_interface import aculib as _ACULIB +except Exception as exc: # pragma: no cover - environment-dependent + _IMPORT_ERR = exc + +pytestmark = pytest.mark.skipif( + _AGENT is None, reason=f"agent module not importable ({_IMPORT_ERR})" +) + + +class _Log: + def info(self, *a, **k): + pass + + warn = warning = error = debug = info + + +# A cert-less device block: empty cert strings route start_session down its +# cert-less branch (aculib.py:103-107), so _make_tcs builds a real +# requests.Session with no filesystem/network dependency. This is exactly the +# "cert-less device block" case the _make_tcs docstring documents. +def _bare_agent(base_url): + """Real ACUAgent with just enough state to call the REAL _make_tcs + + construct the REAL shared self.acu_read, both via the genuine + aculib.observatory_control_system, no __init__, no reactor, no network on + construction.""" + a = _AGENT.ACUAgent.__new__(_AGENT.ACUAgent) + a.log = _Log() + a.acu_conf = {"base_url": base_url, "certs": {"verify": False}} + # The shared client the monitor + _current_encoder_azel use, built the same + # way the real __init__ builds it (aculib.py:82) but cert-less. + a.acu_read = _ACULIB.observatory_control_system(base_url, a.log, verify_cert=False) + # Cold broadcast so _current_encoder_azel falls through to its status read, + # the path the fallback-routing tests exercise. + a.data = {"broadcast": {}} + return a + + +# --------------------------------------------------------------------------- +# 1. _make_tcs returns a DISTINCT, fresh requests.Session each call. +# --------------------------------------------------------------------------- + + +def test_make_tcs_returns_fresh_distinct_session(): + """``_make_tcs()`` builds a fresh client whose ``.session`` is distinct from + ``self.acu_read.session`` AND from any other ``_make_tcs()`` call, the + structural precondition for the no-shared-Session property.""" + agent = _bare_agent("http://127.0.0.1:9") + + c1 = agent._make_tcs() + c2 = agent._make_tcs() + + assert isinstance(c1.session, requests.Session) + assert isinstance(agent.acu_read.session, requests.Session) + # Distinct from the shared monitor client... + assert c1.session is not agent.acu_read.session + assert c2.session is not agent.acu_read.session + # ...and fresh per call (two scans never share a Session). + assert c1.session is not c2.session + # The client objects themselves are distinct too. + assert c1 is not c2 and c1 is not agent.acu_read + + +# --------------------------------------------------------------------------- +# Concurrency harness: a per-Session re-entrancy meter + a tiny loopback HTTP +# server, so requests.Session.request runs end-to-end and we MEASURE the max +# threads simultaneously inside each individual Session. +# --------------------------------------------------------------------------- + + +class _ConcurrencyMeter: + """Wraps ``requests.Session.request`` to record, per Session instance, the max + threads simultaneously inside ``request``. A fixed in-window sleep + a barrier + widen the window so an overlap is observed deterministically iff two threads + share a Session.""" + + def __init__(self, barrier, window_sec=0.15): + self._orig = requests.Session.request + self._barrier = barrier + self._window = window_sec + self._lock = threading.Lock() + self.live = {} # id(session) -> current concurrent count + self.peak = {} # id(session) -> max concurrent count observed + + def __enter__(self): + meter = self + + def _patched(session, method, url, *args, **kwargs): + # Release both worker threads together, then hold the Session for a + # fixed window so a genuine overlap is guaranteed if they share one. + try: + meter._barrier.wait(timeout=5) + except threading.BrokenBarrierError: + pass + sid = id(session) + with meter._lock: + n = meter.live.get(sid, 0) + 1 + meter.live[sid] = n + meter.peak[sid] = max(meter.peak.get(sid, 0), n) + try: + import time + + time.sleep(meter._window) + return meter._orig(session, method, url, *args, **kwargs) + finally: + with meter._lock: + meter.live[sid] -= 1 + + requests.Session.request = _patched + return self + + def __exit__(self, *exc): + requests.Session.request = self._orig + return False + + def peak_for(self, session): + return self.peak.get(id(session), 0) + + +def _loopback_server(): + """A tiny stdlib HTTP server that answers any GET/POST with a JSON body the + aculib client can parse (``.json()`` -> a drained-status dict; POST returns a + parseable body for ``scan_pattern``/``abort``). Returns (base_url, shutdown).""" + from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer + + class _H(BaseHTTPRequestHandler): + def _reply(self): + body = ( + b'{"status": "ok", "message": "ok", ' + b'"Qty of free program track stack positions": 9999, ' + b'"Azimuth current velocity": 0.0, "Elevation current velocity": 0.0}' + ) + self.send_response(200) + self.send_header("Content-Type", "application/json") + self.send_header("Content-Length", str(len(body))) + self.end_headers() + self.wfile.write(body) + + do_GET = do_POST = lambda self: self._reply() + + def log_message(self, *a, **k): + pass + + srv = ThreadingHTTPServer(("127.0.0.1", 0), _H) + t = threading.Thread(target=srv.serve_forever, daemon=True) + t.start() + host, port = srv.server_address + return f"http://{host}:{port}", srv.shutdown + + +def _drive_two_threads(monitor_call, scan_call, barrier, *, require_clean=True): + """Run ``monitor_call`` and ``scan_call`` on two real threads, surfacing any + exception. The barrier (inside the meter) makes them enter ``request`` + together. With ``require_clean`` (default) any worker exception fails the + test; the negative control passes ``require_clean=False`` because a transport + fault from the deliberately-induced shared-Session race is itself proof of + the hazard, not a test failure. Returns the captured worker errors.""" + errors = [] + + def _wrap(fn): + def _run(): + try: + fn() + except BaseException as e: # noqa: BLE001 - surface to the test + errors.append(e) + + return _run + + t_mon = threading.Thread(target=_wrap(monitor_call)) + t_scan = threading.Thread(target=_wrap(scan_call)) + t_mon.start() + t_scan.start() + t_mon.join(timeout=15) + t_scan.join(timeout=15) + assert not t_mon.is_alive() and not t_scan.is_alive(), "worker thread hung" + if require_clean: + assert not errors, f"worker raised: {errors!r}" + return errors + + +# --------------------------------------------------------------------------- +# 2. THE FIX: monitor on self.acu_read vs scan on a _make_tcs() client -> +# no single Session is entered by two threads at once. +# --------------------------------------------------------------------------- + + +def test_fix_no_session_entered_concurrently(): + """Drive the REAL agent paths concurrently: monitor's + ``self.acu_read.get_status()`` (reactor thread) vs a scan's + ``scan_pattern(...)`` + ``get_status()`` on a fresh ``_make_tcs()`` client + (pool thread). The Sessions are distinct, so each peak concurrency is exactly + 1 even though the two HTTP calls genuinely overlap (meter window + barrier).""" + base_url, shutdown = _loopback_server() + try: + agent = _bare_agent(base_url) + scan_tcs = agent._make_tcs() # what every typed scan now uses + # Sanity: distinct Sessions (precondition for the property under test). + assert scan_tcs.session is not agent.acu_read.session + + payload = { + "start_time": 1.0, + "coordsys": "Horizon", + "points": [[0.0, 1.0, 2.0, 0.0, 0.0]], + } + barrier = threading.Barrier(2) + with _ConcurrencyMeter(barrier) as meter: + _drive_two_threads( + monitor_call=agent.acu_read.get_status, # reactor-thread path + scan_call=lambda: ( + scan_tcs.scan_pattern(payload), # POST (Session.request) + scan_tcs.get_status(), # GET (Session.request) + ), + barrier=barrier, + ) + + # The decisive assertion: NO Session was entered by two threads at once. + assert meter.peak_for(agent.acu_read.session) == 1, ( + "monitor's shared Session was entered concurrently; " + "fix did not isolate it" + ) + assert meter.peak_for(scan_tcs.session) == 1, ( + "scan's per-scan Session was entered concurrently" + ) + # And they really are different Session objects. + assert id(scan_tcs.session) != id(agent.acu_read.session) + finally: + shutdown() + + +# --------------------------------------------------------------------------- +# 3. CONTROL: force BOTH paths back onto self.acu_read.session (the OLD shared +# pattern) -> that ONE Session IS entered by two threads at once. +# --------------------------------------------------------------------------- + + +def test_control_shared_session_is_entered_concurrently(): + """Negative control proving the hazard was real. Reconstruct the OLD pattern: + both the monitor path and the scan path drive ``self.acu_read`` (one shared + Session). Through the same harness that single Session's peak concurrency + reaches 2, exactly the bug. If this did NOT reach 2, the fix test would be + vacuous.""" + base_url, shutdown = _loopback_server() + try: + agent = _bare_agent(base_url) + shared = agent.acu_read # the OLD shared client both paths reused + + payload = { + "start_time": 1.0, + "coordsys": "Horizon", + "points": [[0.0, 1.0, 2.0, 0.0, 0.0]], + } + barrier = threading.Barrier(2) + with _ConcurrencyMeter(barrier) as meter: + errors = _drive_two_threads( + monitor_call=shared.get_status, # monitor on shared + scan_call=lambda: shared.scan_pattern(payload), # scan ALSO on shared + barrier=barrier, + require_clean=False, # a transport fault on the shared Session is ALSO the hazard + ) + + # The hazard manifests EITHER as two threads co-occupying the one shared + # Session (peak == 2) OR as a transport fault raised by that unsafe + # concurrent use (a requests/urllib3 connection reset, or aculib's + # SystemExit on a RequestException). Both prove the shared Session is + # unsafe under concurrency. Accepting either keeps the control robust + # under heavy full-suite load (where the race can fault before peak is + # sampled) WITHOUT weakening it: an unrelated error type would not satisfy + # this, and the fix test above still requires the clean peak == 1. + transport_fault = any( + isinstance(e, (requests.exceptions.RequestException, ConnectionError, OSError, SystemExit)) + for e in errors + ) + assert meter.peak_for(shared.session) == 2 or transport_fault, ( + "control did not reproduce the hazard: expected two threads inside the " + "shared Session (peak == 2) or a transport fault from the shared-Session " + f"race; got peak={meter.peak_for(shared.session)}, errors={errors!r}" + ) + finally: + shutdown() + + +# --------------------------------------------------------------------------- +# 4. COUPLING: after the fix the monitor's status read (off-reactor on +# self.acu_read) and _current_encoder_azel's cold-broadcast fallback (now +# reading the PER-SCAN tcs) must NOT share a Session. The naive monitor-only fix +# would leave the fallback on self.acu_read -> a SECOND thread on the monitor's +# Session -> the very race the per-scan-client fix removed. These drive the REAL +# post-fix bodies. +# --------------------------------------------------------------------------- + + +def test_monitor_offreactor_and_fallback_no_shared_session(): + """EXECUTES the changed ``_current_encoder_azel``: with a COLD broadcast it + falls through to ``tcs.get_status()`` on the PER-SCAN client. Driven against + the monitor's ``self.acu_read.get_status()``, the two Sessions are distinct so + each peak is <= 1, the post-fix actor map (fallback reads ``tcs``, never + ``self.acu_read``).""" + base_url, shutdown = _loopback_server() + try: + agent = _bare_agent(base_url) # cold broadcast (a.data = {'broadcast': {}}) + scan_tcs = agent._make_tcs() # the per-scan client the runner hands the fallback + assert scan_tcs.session is not agent.acu_read.session + + barrier = threading.Barrier(2) + with _ConcurrencyMeter(barrier) as meter: + _drive_two_threads( + monitor_call=agent.acu_read.get_status, # Session A (reactor path) + # Cold broadcast -> _current_encoder_azel hits tcs.get_status() + # on the per-scan client (Session B), the post-fix fallback. + scan_call=lambda: agent._current_encoder_azel(scan_tcs), + barrier=barrier, + ) + + # Sanity: the cold-broadcast fallback DID issue its status GET on the + # per-scan Session (peak >= 1 means that Session was entered end-to-end), + # i.e. the fallback read ``tcs``, NOT ``self.acu_read``. (The loopback body + # carries no position keys, so the helper's RETURN is None, irrelevant + # to the Session-isolation property under test.) + assert meter.peak_for(scan_tcs.session) >= 1, \ + "fallback did not issue its status GET on the per-scan Session" + # The decisive assertions: neither Session was entered by two threads. + assert meter.peak_for(agent.acu_read.session) == 1, \ + "monitor's shared Session was entered concurrently by the fallback" + assert meter.peak_for(scan_tcs.session) == 1, \ + "per-scan Session was entered concurrently" + finally: + shutdown() + + +def test_control_fallback_on_acu_read_races(): + """NEGATIVE CONTROL: reconstruct the OLD coupling, the fallback reading + ``self.acu_read`` (the naive monitor-only fix) instead of the per-scan client. + Driven against the monitor's ``self.acu_read.get_status()``, that ONE shared + Session reaches peak 2 (or faults), proving the coupling was real and that + routing the fallback onto the per-scan Session is what removes it.""" + base_url, shutdown = _loopback_server() + try: + agent = _bare_agent(base_url) + shared = agent.acu_read + + # The pre-fix fallback shape: cold broadcast -> read self.acu_read, NOT + # a per-scan tcs. (A faithful copy of the old body's status branch.) + def _old_fallback_reads_acu_read(): + bcast = agent.data.get("broadcast", {}) + if "Azimuth" in bcast and "Elevation" in bcast: + return float(bcast["Azimuth"]), float(bcast["Elevation"]), "broadcast" + status = shared.get_status() # <-- the shared Session, the hazard + return (status.get("Azimuth current position"), + status.get("Elevation current position"), "status") + + barrier = threading.Barrier(2) + with _ConcurrencyMeter(barrier) as meter: + errors = _drive_two_threads( + monitor_call=shared.get_status, + scan_call=_old_fallback_reads_acu_read, + barrier=barrier, + require_clean=False, # a transport fault on the shared Session is ALSO the hazard + ) + + transport_fault = any( + isinstance(e, (requests.exceptions.RequestException, ConnectionError, OSError, SystemExit)) + for e in errors + ) + assert meter.peak_for(shared.session) == 2 or transport_fault, ( + "control did not reproduce the coupling: expected two threads inside " + "the shared self.acu_read Session (peak == 2) or a transport fault; got " + f"peak={meter.peak_for(shared.session)}, errors={errors!r}") + finally: + shutdown() + + +def test_current_encoder_azel_prefers_broadcast_no_session_touch(): + """When the broadcast is WARM, ``_current_encoder_azel`` returns from it and + must NOT touch the client at all, guarding the fast path stays Session-free + (so the pool-thread read of ``self.data['broadcast']`` is the only access, + and the per-scan Session is never entered). A tcs whose ``get_status`` raises + proves the fallback was not taken.""" + agent = _bare_agent("http://127.0.0.1:9") + agent.data = {"broadcast": {"Azimuth": 12.0, "Elevation": 45.0}} + + class _Boom: + def get_status(self): + raise AssertionError("fast path must not call tcs.get_status()") + + pos = agent._current_encoder_azel(_Boom()) + assert pos == (12.0, 45.0, "broadcast") diff --git a/pcs/agents/acu_interface/tests/test_aculib_fromfile.py b/pcs/agents/acu_interface/tests/test_aculib_fromfile.py new file mode 100644 index 0000000..b302262 --- /dev/null +++ b/pcs/agents/acu_interface/tests/test_aculib_fromfile.py @@ -0,0 +1,70 @@ +"""Regression test: ``aculib.scan_pattern_from_file`` must return its response. + +The method previously ended in a bare ``return`` (None), so the legacy +``fromfile_scan`` Task's ``msg.status_code`` log crashed with ``AttributeError`` +on EVERY call (and a real 503 -> ``{}`` would crash too). It now returns whatever +``scan_pattern`` returns (the Response on success, ``{}`` on a 503 +short-circuit), so the caller can read the status via ``tcs_response_status``. + +ocs-free (``aculib`` imports no OCS/twisted), so this runs in the minimal env and +is date-independent. +""" + +import logging + +from pcs.agents.acu_interface import aculib + + +def _client(): + """Build a client with empty certs (plain session, no network at init).""" + return aculib.observatory_control_system( + url="http://localhost:0", + log=logging.getLogger("test_aculib_fromfile"), + server_cert="", + client_cert="", + client_key="", + verify_cert=False, + ) + + +def _write_points(tmp_path): + """A minimal two-point Horizon path file (az el per line).""" + p = tmp_path / "path.txt" + p.write_text("120.0 45.0\n121.0 45.0\n", encoding="utf-8") + return str(p) + + +def test_scan_pattern_from_file_returns_response(tmp_path, monkeypatch): + """On a real Response it returns that Response (not None).""" + + class _Resp: + status_code = 200 + text = "ok" + + resp = _Resp() + client = _client() + # scan_pattern is the only thing that would touch the network; stub it. + monkeypatch.setattr(client, "scan_pattern", lambda data: resp) + result = client.scan_pattern_from_file(_write_points(tmp_path)) + assert result is resp + + +def test_scan_pattern_from_file_503_returns_empty_dict(tmp_path, monkeypatch): + """A 503 (``scan_pattern`` -> ``{}``) propagates ``{}``, never None.""" + client = _client() + monkeypatch.setattr(client, "scan_pattern", lambda data: {}) + result = client.scan_pattern_from_file(_write_points(tmp_path)) + assert result == {} + # The old bug: a bare ``return`` yielded None, crashing ``None.status_code``. + assert result is not None + + +def test_scan_pattern_from_file_passes_parsed_points(tmp_path, monkeypatch): + """The file is parsed into float rows and handed to scan_pattern as 'points'.""" + captured = {} + client = _client() + monkeypatch.setattr( + client, "scan_pattern", lambda data: captured.update(data) or {}) + client.scan_pattern_from_file(_write_points(tmp_path)) + assert captured["coordsys"] == "Horizon" + assert captured["points"] == [[120.0, 45.0], [121.0, 45.0]] diff --git a/pcs/agents/acu_interface/tests/test_aculib_move_to.py b/pcs/agents/acu_interface/tests/test_aculib_move_to.py new file mode 100644 index 0000000..87a3eca --- /dev/null +++ b/pcs/agents/acu_interface/tests/test_aculib_move_to.py @@ -0,0 +1,48 @@ +"""Regression test: ``aculib.move_to`` must not crash on HTTP 503. + +Go TCS can return 503 for ``/move-to`` (e.g. a prior command still running); +``observatory_control_system.post()`` short-circuits a 503 to ``{}`` (not a +Response), so ``move_to``'s diagnostic ``.json()`` log must be guarded or it +raises ``AttributeError`` inside the client and tears down the calling Process. +ocs-free: ``aculib`` imports no OCS/twisted, so this runs in +the minimal env and is date-independent. +""" + +import logging + +from pcs.agents.acu_interface import aculib + + +def _client(): + """Build a client with empty certs (plain session, no network at init).""" + return aculib.observatory_control_system( + url="http://localhost:0", + log=logging.getLogger("test_aculib_move_to"), + server_cert="", + client_cert="", + client_key="", + verify_cert=False, + ) + + +def test_move_to_503_returns_empty_dict_without_crashing(monkeypatch): + """A 503 (``post()`` -> ``{}``) returns ``{}`` from move_to, not AttributeError.""" + client = _client() + monkeypatch.setattr(client, "post", lambda cmd, data: {}) + result = client.move_to(120.0, 45.0) + assert result == {} + + +def test_move_to_200_still_returns_response(monkeypatch): + """On a real Response the 200-path is unchanged (move_to returns it).""" + + class _Resp: + status_code = 200 + + def json(self): + return {"status": "ok"} + + resp = _Resp() + client = _client() + monkeypatch.setattr(client, "post", lambda cmd, data: resp) + assert client.move_to(120.0, 45.0) is resp diff --git a/pcs/agents/acu_interface/tests/test_constant_el_scan.py b/pcs/agents/acu_interface/tests/test_constant_el_scan.py new file mode 100644 index 0000000..bae1b24 --- /dev/null +++ b/pcs/agents/acu_interface/tests/test_constant_el_scan.py @@ -0,0 +1,778 @@ +"""Tests for the FYST constant-elevation scan dispatch core + Go TCS contract. + +PRIMARY (no server, no ocs): drive +:func:`pcs.agents.acu_interface.trajectory.build_constant_el_payload` directly and +assert the returned ``/path`` body satisfies *every* rule the Go TCS enforces in +``commands.go`` (``pathCmd.Check()`` + ``checkAzEl``): + +- exactly the three keys ``{start_time, coordsys, points}``; ``coordsys == + "Horizon"``; +- ``start_time`` absolute Unix and >= now + 9.8 s (and >= now + 10 s, the + :data:`SCAN_DISPATCH_BUFFER_SEC` floor); +- ``points`` is N x 5; every consecutive ``dt >= 0.05`` s; +- the first 100 points satisfy az in [-180, 360], el in [el_min, el_max], + ``|vaz| <= 3.0``, ``|vel| <= 1.5``, velocities present (cols 3/4 non-null); +- the slew-target az is in range and ``points[0][1] == encoder_az`` (wrap align); +- a too-fast velocity raises (hardware-dynamics escalation). + +Imports ``trajectory.py`` directly, never ``agent.py``, because the agent needs +the ``ocs`` / ``twisted`` framework, the whole reason the core is factored ocs-free. + +SECONDARY (mock TCS, in-process): POST a built body through FastAPI's +``TestClient`` against :mod:`mock_tcs` and assert it is recorded + returns 200. +Skipped if FastAPI is unavailable; no live server (context-managed transport). + +Dispatch-core guards covered here: + +- acceleration: the quintic turnaround's peak az accel is ``1.5 * az_accel`` by + design; a breach of the Go TCS hardware ceiling (6.0 deg/s^2) escalates to + ``TrajectoryValidationError``. +- dispatch delay: a below-horizon field resolves a crossing many hours out; + ``build_constant_el_payload`` rejects a resolved start more than + ``max_dispatch_delay_sec`` out with ``DispatchDelayError`` (date-stable fixed + ``Time``). The contract tests pass ``max_dispatch_delay_sec=float("inf")`` so + the guard does not fire on REPRESENTATIVE_PARAMS (which resolve ~hours out). + +Process-level contracts (NOT here, they need the ocs/twisted framework; the +DECISION logic is unit-tested ocs-free above and the runner is exercised in +``test_dispatch_runner.py``): + +- completion: ``/path`` is fire-and-forget (HTTP 200 == accepted, not done; Go + never calls back). The Process detects completion via the stack-drained signal + (free == 9999 + near-zero velocities), backstopped by the absolute scan end + time, so it releases ``azel_lock`` instead of spinning. ``mock_tcs`` exposes a + drained ``GET /acu/status`` for a Process-level harness. +- position-unknown refusal: refuses to dispatch (False, RETRYABLE) when no live + encoder position is available rather than guessing a wrong-wrap ``current_az``. + The ``get_status()`` ``SystemExit`` (aculib raises it on ConnectionError) is + trapped. +- slew-arrival gate: polls the broadcast until the dish reaches the slew target + before POSTing ``/path`` (a premature POST returns 503), and fails on a non-200 + ``/path`` response. +""" + +import time +import warnings + +import numpy as np +import pytest +from astropy.time import Time + +from fyst_trajectories import get_fyst_site +from fyst_trajectories.exceptions import ElevationBoundsError + +from pcs.agents.acu_interface.trajectory import ( + MAX_REFLOOR_DRIFT_SEC, + SCAN_DISPATCH_BUFFER_SEC, + TCS_PROGRAM_TRACK_DRAINED, + DispatchDelayError, + ScanCompletionLatch, + TrajectoryValidationError, + build_constant_el_payload, + refloor_drift_seconds, + refloor_payload_start_time, + tcs_response_status, +) + +# --------------------------------------------------------------------------- +# Go TCS /path contract constants, transcribed from +# telescope-control-system/commands.go (checkAzEl + pathCmd.Check()). +# --------------------------------------------------------------------------- +AZ_MIN_TCS = -180.0 # commands.go:18 azimuthMin +AZ_MAX_TCS = 360.0 # commands.go:19 azimuthMax +EL_MIN_TCS = -90.0 # commands.go:24 elevationMin +EL_MAX_TCS = 180.0 # commands.go:25 elevationMax +AZ_SPEED_MAX_TCS = 3.0 # commands.go:20 azimuthSpeedMax +EL_SPEED_MAX_TCS = 1.5 # commands.go:26 elevationSpeedMax +MIN_DT_TCS = 0.05 # commands.go:261 minimum sample interval +MIN_LEAD_TCS = 9.8 # commands.go:253 program track starts too soon (< 9.8 s) +FIRST_N_CHECKED = 100 # commands.go:269 first 100 coordinates checked + + +# A representative rising constant-el scan well inside the FYST limits. +REPRESENTATIVE_PARAMS = dict( + ra_center=80.0, + dec_center=-40.0, + width=3.0, + height=3.0, + elevation=50.0, + velocity=0.5, # azimuth-coordinate deg/s (mount frame) + rising=True, +) + +# DATE-STABILITY: the contract builds all plan REPRESENTATIVE_PARAMS (or +# distinct-geometry fields) whose el=50 crossings exist only at certain wall-clock +# times; on a date with no crossing in the 12 h search ``plan_constant_el_scan`` +# raises ValueError before any assertion runs. Thread ONE fixed epoch through every +# build so the suite is deterministic regardless of run date. +# +# 2026-06-15T02:00 UTC is the single epoch at which *all* the contract builds +# resolve: REPRESENTATIVE_PARAMS (rising), the setting RA=0/Dec=-60 field in +# ``test_wrap_alignment_applies_nonzero_shift``, AND the ``now+3600`` anchor in +# ``test_scheduled_t0_is_a_search_anchor...`` (a near-term-crossing epoch like +# 13:00 breaks the +3600 case: consecutive rising crossings are ~a sidereal day +# apart). Contract tests pass ``max_dispatch_delay_sec=float("inf")`` so the +# crossing being hours out here is irrelevant; they assert the ``/path`` body, +# not timeliness; the dispatch-delay tests keep their own near/far epochs. +FIXED_EPOCH = Time("2026-06-15T02:00:00", scale="utc") +FIXED_NOW = FIXED_EPOCH.unix + + +@pytest.fixture +def site(): + # Sun avoidance disabled so the wrap choice is purely geometric and the + # contract assertions are deterministic regardless of run date. The Go TCS + # contract (bounds + velocity + timing) is independent of sun avoidance. + return get_fyst_site(sun_avoidance_enabled=False) + + +@pytest.fixture +def built(site): + """A built payload from representative params and a current az/el. + + Silences the advisory acceleration ``PointingWarning`` at the turnaround (the + quintic peak accel is ``1.5 * az_accel`` by design, a real commanded value, + not a sampling artifact); velocity escalation and the analytic accel guard are + tested separately. Uses :data:`FIXED_NOW` for date-stability, with + ``max_dispatch_delay_sec=float("inf")`` so the ``/path``-contract assertions do + not couple to whether the crossing falls inside the default window. + """ + now = FIXED_NOW + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + result = build_constant_el_payload( + scan_params=REPRESENTATIVE_PARAMS, + current_az=200.0, + current_el=60.0, + site=site, + now_unix=now, + max_dispatch_delay_sec=float("inf"), + ) + return result, now + + +# --------------------------------------------------------------------------- +# PRIMARY: Go TCS /path contract (pathCmd.Check + checkAzEl) +# --------------------------------------------------------------------------- + + +def test_payload_has_exactly_three_keys(built): + result, _ = built + payload = result["payload"] + # DisallowUnknownFields on the Go TCS receiver: exactly these three keys. + assert set(payload.keys()) == {"start_time", "coordsys", "points"} + + +def test_coordsys_is_horizon(built): + result, _ = built + assert result["payload"]["coordsys"] == "Horizon" + + +def test_start_time_absolute_and_at_least_9p8s_out(built): + result, now = built + start_time = result["payload"]["start_time"] + # Absolute Unix seconds (not relative): jsontime() in commands.go treats + # values < 100000 as relative-to-now, so an absolute stamp must be large. + assert start_time > 1e9 + # commands.go:253: rejected if < 9.8 s in the future. + assert start_time >= now + MIN_LEAD_TCS + + +def test_start_time_respects_buffer_floor(built): + result, now = built + # SCAN_DISPATCH_BUFFER_SEC (10 s) floor, stronger than the 9.8 s reject. + assert result["payload"]["start_time"] >= now + SCAN_DISPATCH_BUFFER_SEC - 1e-6 + + +def test_points_is_n_by_5(built): + result, _ = built + points = result["payload"]["points"] + assert len(points) > 0 # commands.go:246 "no points in path" + assert all(len(p) == 5 for p in points) + + +def test_consecutive_dt_at_least_50ms(built): + result, _ = built + times = np.array([p[0] for p in result["payload"]["points"]]) + dt = np.diff(times) + # commands.go:261: any pair closer than 0.05 s is rejected. + assert dt.min() >= MIN_DT_TCS + + +def test_first_100_points_satisfy_checkAzEl(built, site): + result, _ = built + points = result["payload"]["points"] + el_min = site.telescope_limits.elevation.min + el_max = site.telescope_limits.elevation.max + for i, p in enumerate(points[:FIRST_N_CHECKED]): + t, az, el, vaz, vel = p + # Position: Go TCS hardware bounds (checkAzEl). + assert AZ_MIN_TCS <= az <= AZ_MAX_TCS, f"point {i}: az {az} out of TCS range" + assert EL_MIN_TCS <= el <= EL_MAX_TCS, f"point {i}: el {el} out of TCS range" + # Planning elevation also sits inside the (tighter) site limits. + assert el_min <= el <= el_max, f"point {i}: el {el} out of site range" + # Velocity: the quantity checkAzEl enforces. + assert abs(vaz) <= AZ_SPEED_MAX_TCS, f"point {i}: |vaz| {vaz} > {AZ_SPEED_MAX_TCS}" + assert abs(vel) <= EL_SPEED_MAX_TCS, f"point {i}: |vel| {vel} > {EL_SPEED_MAX_TCS}" + + +def test_velocities_present_and_non_null(built): + result, _ = built + points = result["payload"]["points"] + for i, p in enumerate(points[:FIRST_N_CHECKED]): + # Cols 3/4 must exist and be real numbers (the Go TCS path iterator + # reads AzVel/ElVel from these columns). + assert p[3] is not None and np.isfinite(p[3]), f"point {i}: az_vel null/non-finite" + assert p[4] is not None and np.isfinite(p[4]), f"point {i}: el_vel null/non-finite" + + +def test_slew_target_in_range(built, site): + result, _ = built + az_limits = site.telescope_limits.azimuth + el_limits = site.telescope_limits.elevation + assert az_limits.is_in_range(result["encoder_az"]) + assert el_limits.is_in_range(result["encoder_el"]) + + +def test_wrap_alignment_first_point_equals_encoder_az(built): + result, _ = built + # The posted trajectory and the slew target must share one az wrap. + assert result["payload"]["points"][0][1] == pytest.approx(result["encoder_az"], abs=1e-9) + + +def test_wrap_alignment_applies_nonzero_shift(site): + """Wrap alignment must hold even when the chosen wrap differs from the planner's. + + A setting scan of RA=0, Dec=-60 deg starts near az 190 deg, which has two + in-range encoder images in [-180, 360]: 190 and -170. With the dish near + -170, ``choose_encoder_solution`` picks the -170 wrap (nearer slew), so the + whole trajectory must be shifted by -360 deg to start there. This exercises + the non-trivial branch of the alignment that the representative fixture + (shift == 0) does not. + + Date-stable: the setting RA=0, Dec=-60 crossing resolves at the shared + :data:`FIXED_NOW` epoch. + """ + now = FIXED_NOW + params = dict( + ra_center=0.0, dec_center=-60.0, width=3.0, height=3.0, + elevation=50.0, velocity=0.5, rising=False, + ) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + result = build_constant_el_payload( + scan_params=params, current_az=-170.0, current_el=60.0, + site=site, now_unix=now, max_dispatch_delay_sec=float("inf"), + ) + enc_az = result["encoder_az"] + points = result["payload"]["points"] + # The chosen wrap is the negative one (nearer the dish at -170). + assert enc_az < 0.0 + # First point is aligned to it ... + assert points[0][1] == pytest.approx(enc_az, abs=1e-9) + # ... and the *whole* shifted trajectory still satisfies the TCS az bounds. + az = np.array([p[1] for p in points]) + assert az.min() >= AZ_MIN_TCS + assert az.max() <= AZ_MAX_TCS + + +def test_too_fast_velocity_raises_h3(site): + fast = dict(REPRESENTATIVE_PARAMS, velocity=4.0) # > az speed limit 3.0 + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + with pytest.raises(TrajectoryValidationError, match="velocity"): + build_constant_el_payload( + scan_params=fast, + current_az=200.0, + current_el=60.0, + site=site, + now_unix=FIXED_NOW, + max_dispatch_delay_sec=float("inf"), + ) + + +def test_too_high_accel_raises(site): + """An az_accel whose quintic peak (1.5x) exceeds the HW ceiling raises. + + az_accel=5.0 -> peak 7.5 deg/s^2 > the Go TCS hardware ceiling of 6.0 + (commands.go:21). The guard is analytic (1.5 * az_accel), not np.gradient. + """ + bad = dict(REPRESENTATIVE_PARAMS, az_accel=5.0) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + with pytest.raises(TrajectoryValidationError, match=r"accel"): + build_constant_el_payload( + scan_params=bad, + current_az=200.0, + current_el=60.0, + site=site, + now_unix=FIXED_NOW, + max_dispatch_delay_sec=float("inf"), + ) + + +def test_accel_at_hardware_boundary_passes(site): + """az_accel=4.0 -> peak exactly 6.0 deg/s^2 passes (strict ``>`` threshold). + + Mirrors checkAzEl's ``> max`` convention for velocity: a value exactly at + the ceiling is accepted. + """ + ok = dict(REPRESENTATIVE_PARAMS, az_accel=4.0) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + result = build_constant_el_payload( + scan_params=ok, + current_az=200.0, + current_el=60.0, + site=site, + now_unix=FIXED_NOW, + max_dispatch_delay_sec=float("inf"), + ) + assert set(result["payload"].keys()) == {"start_time", "coordsys", "points"} + + +def test_scheduled_t0_in_past_is_floored(site): + now = FIXED_NOW + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + result = build_constant_el_payload( + scan_params=REPRESENTATIVE_PARAMS, + current_az=200.0, + current_el=60.0, + site=site, + scheduled_t0_unix=now - 1000.0, + now_unix=now, + max_dispatch_delay_sec=float("inf"), + ) + assert result["payload"]["start_time"] >= now + SCAN_DISPATCH_BUFFER_SEC - 1e-6 + + +def test_scheduled_t0_is_a_search_anchor_not_a_literal_start(site): + """``scheduled_t0_unix`` is the planner's *search anchor*, not the literal start. + + ``plan_constant_el_scan`` searches forward from ``start_time`` for the + field's elevation crossing and uses that crossing as the trajectory start + (matching the library's documented ``start_time`` contract). So the posted + ``start_time`` is the resolved crossing, always at or after the anchor, + never before it, and a later anchor must not yield an earlier start + (monotonicity). The Go-TCS-relevant invariant (start >= now + buffer) is + asserted by the buffer-floor test and holds because crossing >= anchor >= + now + buffer. + + Date-stable: both anchors (now+30, now+3600) resolve crossings at the shared + :data:`FIXED_NOW` epoch. + """ + now = FIXED_NOW + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + early = build_constant_el_payload( + scan_params=REPRESENTATIVE_PARAMS, current_az=200.0, current_el=60.0, + site=site, scheduled_t0_unix=now + 30.0, now_unix=now, + max_dispatch_delay_sec=float("inf"), + )["payload"]["start_time"] + later = build_constant_el_payload( + scan_params=REPRESENTATIVE_PARAMS, current_az=200.0, current_el=60.0, + site=site, scheduled_t0_unix=now + 3600.0, now_unix=now, + max_dispatch_delay_sec=float("inf"), + )["payload"]["start_time"] + # Resolved crossing is at/after the anchor, and a later anchor never moves + # the start earlier. + assert early >= now + 30.0 - 1e-6 + assert later >= early - 1e-6 + + +def test_dispatch_delay_far_out_field_raises(site): + """A field whose elevation crossing resolves hours out raises DispatchDelayError. + + Uses a FIXED ``now`` (not ``time.time()``) so the test is date-stable. At + 2026-06-15T02:00 UTC, REPRESENTATIVE_PARAMS (RA=80, Dec=-40, el=50, rising) + resolve a crossing ~11 h out (far past the 30-min default), so the guard + refuses to slew and hold ``azel_lock``. Default ``max_dispatch_delay_sec`` + (no override) is exercised on purpose. + """ + now = Time("2026-06-15T02:00:00", scale="utc").unix + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + with pytest.raises(DispatchDelayError, match=r"after dispatch"): + build_constant_el_payload( + scan_params=REPRESENTATIVE_PARAMS, + current_az=200.0, + current_el=60.0, + site=site, + now_unix=now, + ) + + +def test_dispatch_delay_prompt_field_passes(site): + """A promptly-reachable crossing passes the default dispatch-delay guard. + + Date-stable fixed ``now``: at 2026-06-15T13:00 UTC the same field resolves a + crossing ~16 min out (inside the 30-min default), so the build succeeds + with no ``max_dispatch_delay_sec`` override. + """ + now = Time("2026-06-15T13:00:00", scale="utc").unix + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + result = build_constant_el_payload( + scan_params=REPRESENTATIVE_PARAMS, + current_az=200.0, + current_el=60.0, + site=site, + now_unix=now, + ) + # Resolved start is at/after now and within the 30-min default window. + delay = result["payload"]["start_time"] - now + assert 0.0 < delay <= 1800.0 + + +def test_velocity_passed_through_mount_frame_not_cos_el(site): + """The commanded az velocity is the scan velocity, not cos(el)-scaled. + + At el=50 deg, cos(el)~0.64. A cos(el)-scaled value would read ~0.32 deg/s; + the mount-frame pass-through must read ~0.50 (the requested velocity). + """ + now = FIXED_NOW + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + result = build_constant_el_payload( + scan_params=REPRESENTATIVE_PARAMS, + current_az=200.0, + current_el=60.0, + site=site, + now_unix=now, + max_dispatch_delay_sec=float("inf"), + ) + vaz = np.array([abs(p[3]) for p in result["payload"]["points"]]) + assert vaz.max() == pytest.approx(REPRESENTATIVE_PARAMS["velocity"], abs=1e-6) + + +def test_goal_elevation_out_of_range_raises(site): + """An el below the FYST el_min (20) is rejected as a bounds breach. + + Pinned to the FIXED_NOW epoch (rising). On an *unfavourable* date the el=10 + field has no crossing in the 12 h search and ``plan_constant_el_scan`` would + raise a transient ``ValueError``, a DIFFERENT failure that would let a real + below-el_min regression slip through. At FIXED_NOW the el=10 crossing exists, + so the build proceeds to ``validate_trajectory`` and the trajectory is + rejected for leaving the elevation limits. Assert that specific path + (``ElevationBoundsError``, a ``PointingError``, with a limits message), + not merely "something raised". + """ + bad = dict(REPRESENTATIVE_PARAMS, elevation=10.0) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + with pytest.raises(ElevationBoundsError, match=r"exceeds limits"): + build_constant_el_payload( + scan_params=bad, + current_az=200.0, + current_el=60.0, + site=site, + now_unix=FIXED_NOW, + ) + + +# --------------------------------------------------------------------------- +# ScanCompletionLatch: the stack-drained completion decision (driven ocs-free). +# The headline test reproduces the OLD false-positive (an empty/drained stack read +# on the first poll after POST, before the scan ran) and confirms the latch +# suppresses it. +# --------------------------------------------------------------------------- + + +def test_latch_drained_from_start_does_not_false_complete(): + """Regression: a drained reading on the first poll must NOT report complete. + + Reproduces the bug the old ``free >= 9999`` check had: right after POST the + Go TCS stack can read drained (the empty PRE-upload stack is free == 10000, + and a transient free == 9999 can be seen before the upload goroutine pushes + points), with both axes still stopped. The old check would have fired + "complete" on poll #1 and released ``azel_lock`` for a scan that never ran. + The latch refuses any drained reading until the scan is observed RUNNING. + """ + start = 10_000.0 # scan starts well in the future relative to the polls + latch = ScanCompletionLatch(start) + # Poll #1: empty pre-upload stack (10000), axes stopped, well before start. + assert latch.update(free=10_000, vaz=0.0, vel=0.0, now_unix=start - 100.0) is False + # Poll #2: even a strict-9999 reading is not honored pre-arm. + assert latch.update(free=9999, vaz=0.0, vel=0.0, now_unix=start - 99.0) is False + assert latch.running_observed is False + + +def test_latch_arms_on_running_then_completes_on_drain(): + """Once the scan is observed running (non-empty stack), a later drain completes.""" + start = 10_000.0 + latch = ScanCompletionLatch(start) + # Non-empty stack with az moving -> arms the latch, not yet complete. + assert latch.update(free=5000, vaz=0.4, vel=0.0, now_unix=start - 50.0) is False + assert latch.running_observed is True + # Now strictly drained + axes stopped -> complete. + done = latch.update( + free=TCS_PROGRAM_TRACK_DRAINED, vaz=0.0, vel=0.0, now_unix=start - 40.0 + ) + assert done is True + + +def test_latch_arms_on_program_track_mode(): + """ProgramTrack mode (any axis) arms the latch even if the stack never reads non-empty.""" + start = 10_000.0 + latch = ScanCompletionLatch(start) + # Drained read but ProgramTrack reported -> armed, and (drained + stopped) completes. + done = latch.update( + free=9999, + vaz=0.0, + vel=0.0, + now_unix=start - 10.0, + az_mode="ProgramTrack", + el_mode="Stop", + ) + assert done is True + assert latch.running_observed is True + + +def test_latch_arms_on_wallclock_past_start(): + """Reaching the scan start_time arms the latch (covers a silent status stream).""" + start = 10_000.0 + latch = ScanCompletionLatch(start) + # Drained read at/after start_time -> armed via wall-clock, completes. + assert latch.update(free=9999, vaz=0.0, vel=0.0, now_unix=start + 1.0) is True + + +def test_latch_drained_but_axis_moving_not_complete(): + """An armed, drained stack with an axis still slewing is NOT complete.""" + start = 10_000.0 + latch = ScanCompletionLatch(start) + latch.update(free=3000, vaz=0.5, vel=0.0, now_unix=start - 20.0) # arm + # Stack drained but az still above the speed tolerance -> keep waiting. + assert latch.update(free=9999, vaz=0.5, vel=0.0, now_unix=start - 10.0) is False + + +def test_latch_missing_fields_not_complete(): + """A status read missing the free-stack key never reports complete.""" + start = 10_000.0 + latch = ScanCompletionLatch(start) + latch.update(free=3000, vaz=0.4, vel=0.0, now_unix=start + 1.0) # arm + assert latch.update(free=None, vaz=0.0, vel=0.0, now_unix=start + 2.0) is False + + +# --------------------------------------------------------------------------- +# tcs_response_status: 503-safe HTTP status read. aculib.post() returns {} (not +# a Response) on a 503; this maps it to a graceful "scan not launched" instead of +# an AttributeError on {}.status_code. Driven directly (ocs-free). +# --------------------------------------------------------------------------- + + +class _FakeResponse: + def __init__(self, status_code): + self.status_code = status_code + + +def test_tcs_response_status_503_returns_none(): + """A 503 short-circuits to {} in aculib.post(); the helper maps it to None != 200.""" + assert tcs_response_status({}) is None # the literal {} aculib returns on 503 + assert tcs_response_status(None) is None + # The Process guard is ``code != 200``; None is correctly treated as rejected. + assert tcs_response_status({}) != 200 + + +def test_tcs_response_status_reads_real_response(): + assert tcs_response_status(_FakeResponse(200)) == 200 + assert tcs_response_status(_FakeResponse(503)) == 503 + + +# --------------------------------------------------------------------------- +# refloor_payload_start_time: re-apply the dispatch floor after the slew. +# --------------------------------------------------------------------------- + + +def test_refloor_advances_a_stale_start_time(): + """A near-now start that the slew ate into is advanced to now + buffer.""" + # start_time only just above the original floor; a long slew has since run. + payload = { + "start_time": 1000.0 + SCAN_DISPATCH_BUFFER_SEC, + "coordsys": "Horizon", + "points": [[0.0, 100.0, 50.0, 0.5, 0.0]], + } + now_after_slew = 1000.0 + 150.0 # 150 s of slewing elapsed + refloor_payload_start_time(payload, now_after_slew) + assert payload["start_time"] == pytest.approx(now_after_slew + SCAN_DISPATCH_BUFFER_SEC) + + +def test_refloor_never_moves_a_future_start_earlier(): + """A start comfortably in the future is left untouched (floor never regresses it).""" + payload = { + "start_time": 5000.0, + "coordsys": "Horizon", + "points": [[0.0, 100.0, 50.0, 0.5, 0.0]], + } + refloor_payload_start_time(payload, 1000.0) # now + buffer == 1010 << 5000 + assert payload["start_time"] == 5000.0 + + +# --------------------------------------------------------------------------- +# refloor_drift_seconds: how far the re-floor advanced start_time (= the +# sidereal staleness of the baked az/el track). The runner refuses to POST when +# this exceeds MAX_REFLOOR_DRIFT_SEC. +# --------------------------------------------------------------------------- + + +def test_refloor_drift_seconds_reports_a_stale_advance(): + """A near-floor start that a long slew ate into reports the full advance. + + Mirrors ``test_refloor_advances_a_stale_start_time``: build a start just + above the original floor, re-floor with a now that ran 150 s later, and check + the drift equals how far start_time moved (~140 s).""" + original = 1000.0 + SCAN_DISPATCH_BUFFER_SEC + payload = { + "start_time": original, + "coordsys": "Horizon", + "points": [[0.0, 100.0, 50.0, 0.5, 0.0]], + } + now_after_slew = 1000.0 + 150.0 # 150 s of slewing elapsed + refloor_payload_start_time(payload, now_after_slew) + drift = refloor_drift_seconds(original, payload) + # start_time went from (1000+buffer) to (now_after_slew+buffer); the delta is + # exactly now_after_slew - 1000 == 150 s. + assert drift == pytest.approx(150.0) + + +def test_refloor_drift_seconds_zero_for_a_future_start(): + """A comfortably-future start is untouched by the floor, so drift is 0.""" + original = 5000.0 + payload = { + "start_time": original, + "coordsys": "Horizon", + "points": [[0.0, 100.0, 50.0, 0.5, 0.0]], + } + refloor_payload_start_time(payload, 1000.0) # now + buffer << 5000, no-op + assert refloor_drift_seconds(original, payload) == 0.0 + + +def test_refloor_drift_seconds_never_negative(): + """The re-floor never regresses start_time, so drift is clamped at >= 0 even + if asked about an original that is somehow already past the (untouched) start.""" + payload = {"start_time": 5000.0, "coordsys": "Horizon", "points": [[0.0, 1.0, 2.0, 0.0, 0.0]]} + # original AFTER the (unchanged) start: a degenerate caller; result floors at 0. + assert refloor_drift_seconds(6000.0, payload) == 0.0 + + +def test_refloor_drift_seconds_boundary_around_the_cap(): + """Drift straddling MAX_REFLOOR_DRIFT_SEC: just-under is tolerated, just-over + is what the runner rejects. Pins that the helper measures the quantity the + cap is compared against (drift > cap -> refuse).""" + eps = 0.5 + base = 1000.0 + # A payload whose start sits cap-eps above the original -> drift == cap-eps. + under = {"start_time": base + MAX_REFLOOR_DRIFT_SEC - eps, "coordsys": "Horizon", + "points": [[0.0, 1.0, 2.0, 0.0, 0.0]]} + assert refloor_drift_seconds(base, under) == pytest.approx(MAX_REFLOOR_DRIFT_SEC - eps) + assert refloor_drift_seconds(base, under) <= MAX_REFLOOR_DRIFT_SEC # NOT refused + over = {"start_time": base + MAX_REFLOOR_DRIFT_SEC + eps, "coordsys": "Horizon", + "points": [[0.0, 1.0, 2.0, 0.0, 0.0]]} + assert refloor_drift_seconds(base, over) == pytest.approx(MAX_REFLOOR_DRIFT_SEC + eps) + assert refloor_drift_seconds(base, over) > MAX_REFLOOR_DRIFT_SEC # refused + + +# --------------------------------------------------------------------------- +# SECONDARY: drive the body through a mock Go TCS (in-process FastAPI client). +# --------------------------------------------------------------------------- + +fastapi = pytest.importorskip("fastapi", reason="FastAPI not available for mock-TCS test") +from fastapi.testclient import TestClient # noqa: E402 + +from pcs.agents.acu_interface.tests.mock_tcs import PREFIX, create_mock_tcs # noqa: E402 + + +def test_mock_tcs_records_contract_valid_path(built): + """POST the built body to the mock TCS and assert it is recorded + 200. + + Uses FastAPI's in-process TestClient, no live server, no open port. The + mock's ``PathParameters`` model (Literal["Horizon","ICRS"], 5-wide rows) + mirrors the real OCS proxy, so a 200 means the body is schema-valid. + """ + result, _ = built + payload = result["payload"] + app = create_mock_tcs() + with TestClient(app) as client: + # Slew first (move_to), then POST the path: the task's order. + move_resp = client.post( + f"{PREFIX}/move-to", + json={"azimuth": result["encoder_az"], "elevation": result["encoder_el"]}, + ) + path_resp = client.post(f"{PREFIX}/path", json=payload) + + assert move_resp.status_code == 200 + assert path_resp.status_code == 200 + + recorder = app.state.recorder + assert len(recorder.move_to_bodies) == 1 + assert len(recorder.path_bodies) == 1 + recorded = recorder.path_bodies[0] + assert set(recorded.keys()) == {"start_time", "coordsys", "points"} + assert recorded["coordsys"] == "Horizon" + assert all(len(p) == 5 for p in recorded["points"]) + assert recorded["points"][0][1] == pytest.approx(result["encoder_az"], abs=1e-9) + + +def test_mock_tcs_rejects_wrong_coordsys(): + """A non-Horizon/ICRS coordsys is a schema violation (HTTP 422). + + Confirms the mock actually validates the contract rather than rubber-stamping. + """ + app = create_mock_tcs() + bad_body = {"start_time": time.time() + 100.0, "coordsys": "Galactic", + "points": [[0.0, 100.0, 50.0, 0.5, 0.0]]} + with TestClient(app) as client: + resp = client.post(f"{PREFIX}/path", json=bad_body) + assert resp.status_code == 422 + + +def test_mock_tcs_abort_recorded(): + app = create_mock_tcs() + with TestClient(app) as client: + resp = client.post(f"{PREFIX}/abort") + assert resp.status_code == 200 + assert app.state.recorder.abort_count == 1 + + +# --------------------------------------------------------------------------- +# Process-level (constant_el_scan): SKIP-GUARDED. The Process body needs the +# ocs/twisted framework, unimportable here; each DECISION is unit-tested ocs-free +# above, so this just confirms the agent imports and wires the four helpers where +# ocs IS available (CI/Linux). SKIPPED here. +# --------------------------------------------------------------------------- + +# NB: a module named ``ocs`` IS importable here, the OCS REST *client*, NOT the +# SO operations *framework* the agent needs, so ``importorskip("ocs")`` would +# not skip. Guard per-test on the submodule the agent imports (``ocs.ocs_agent``) +# so the skip is correct and local. +def _ocs_framework_available() -> bool: + import importlib.util + + try: + return importlib.util.find_spec("ocs.ocs_agent") is not None + except (ImportError, ValueError, ModuleNotFoundError): + return False + + +@pytest.mark.skipif( + not _ocs_framework_available(), + reason="ocs operations framework not importable; Process-level test skipped", +) +def test_agent_module_imports_and_wires_helpers(): + """The agent module imports and references the four dispatch helpers. + + Skipped where ocs is unavailable. This does not exercise the Process loop + (that needs a live reactor + session); it guards against the agent drifting + out of sync with the ocs-free helpers it now depends on. + """ + import inspect + + from pcs.agents.acu_interface import agent as agent_mod + + src = inspect.getsource(agent_mod) + # The Process delegates completion to the latch and reads HTTP status + the + # re-floor through the ocs-free helpers; _safe_abort wraps the abort calls. + assert "ScanCompletionLatch(" in src + assert "tcs_response_status(" in src + assert "refloor_payload_start_time(" in src + assert "_safe_abort(" in src diff --git a/pcs/agents/acu_interface/tests/test_daisy_scan.py b/pcs/agents/acu_interface/tests/test_daisy_scan.py new file mode 100644 index 0000000..e040408 --- /dev/null +++ b/pcs/agents/acu_interface/tests/test_daisy_scan.py @@ -0,0 +1,294 @@ +"""Tests for the FYST Daisy-scan dispatch core + Go TCS contract. + +PRIMARY (no server, no ocs): drive +:func:`pcs.agents.acu_interface.trajectory.build_daisy_payload` directly and assert +the returned ``/path`` body satisfies every rule the Go TCS enforces in +``commands.go`` (``pathCmd.Check()`` + ``checkAzEl``), mirroring +``test_constant_el_scan.py``: + +- exactly the three keys ``{start_time, coordsys, points}``; ``coordsys == + "Horizon"``; +- ``start_time`` absolute Unix and >= now + 10 s (the + :data:`SCAN_DISPATCH_BUFFER_SEC` floor, stronger than the 9.8 s reject); +- ``points`` is N x 5; every consecutive ``dt >= 0.05`` s; +- the first 100 points satisfy az in [-180, 360], el in [-90, 180], + ``|vaz| <= 3.0``, ``|vel| <= 1.5`` (velocities present); +- the slew-target az is in range and ``points[0][1] == encoder_az`` (wrap align). + +Plus the dispatch-core guards: + +- velocity frame: ``plan_daisy_scan``'s ``velocity`` is ON-SKY (tangent-plane) + deg/s, the SAME frame as Pong, DIFFERENT from constant-el/source; +- hardware-dynamics escalation: a velocity/acceleration breach anywhere raises + ``TrajectoryValidationError`` (a high-velocity / tight-turn Daisy drives accel + over the hardware ceiling); +- dispatch-delay: Daisy takes ``start_time`` literally, so its resolved start + equals the floored anchor and the guard passes trivially; the shared + :func:`enforce_dispatch_delay` raising-when-far behaviour is covered in + ``test_pong_scan.py``, and the buffer floor is verified here. + +DATE-STABILITY: Daisy uses ``start_time`` literally, so the only date dependence is +whether the source is observable (sun-safety disabled in the fixture). Every build +threads a FIXED ``Time`` epoch. At 2026-06-15T13:00 UTC the RA=80, Dec=-40 source +sits at el~45, comfortably inside the FYST limits. + +Imports ``trajectory.py`` directly, never ``agent.py``. +""" + +import warnings + +import numpy as np +import pytest +from astropy.time import Time + +from fyst_trajectories import get_fyst_site + +from pcs.agents.acu_interface.trajectory import ( + MAX_DISPATCH_DELAY_SEC, + SCAN_DISPATCH_BUFFER_SEC, + TrajectoryValidationError, + build_daisy_payload, +) + +# Go TCS /path contract constants (commands.go checkAzEl + pathCmd.Check()). +AZ_MIN_TCS = -180.0 # commands.go:18 +AZ_MAX_TCS = 360.0 # commands.go:19 +EL_MIN_TCS = -90.0 # commands.go:24 +EL_MAX_TCS = 180.0 # commands.go:25 +AZ_SPEED_MAX_TCS = 3.0 # commands.go:20 +EL_SPEED_MAX_TCS = 1.5 # commands.go:26 +MIN_DT_TCS = 0.05 # commands.go:261 +MIN_LEAD_TCS = 9.8 # commands.go:253 +FIRST_N_CHECKED = 100 # commands.go:269 + +# A representative Daisy on a source well inside the FYST limits at the epoch. +REPRESENTATIVE_PARAMS = dict( + ra=80.0, + dec=-40.0, + radius=0.5, + velocity=0.3, # ON-SKY deg/s (tangent-plane) + turn_radius=0.2, + avoidance_radius=0.0, + start_acceleration=0.5, + duration=120.0, +) + +# Epoch at which the source is observable (el~45). Daisy uses start_time +# literally so there is no forward-search dependence. See module docstring. +FIXED_EPOCH = Time("2026-06-15T13:00:00", scale="utc") +FIXED_NOW = FIXED_EPOCH.unix + + +@pytest.fixture +def site(): + # Sun avoidance disabled so the wrap choice is purely geometric. The Go TCS + # contract is independent of sun avoidance. + return get_fyst_site(sun_avoidance_enabled=False) + + +@pytest.fixture +def built(site): + """A built Daisy payload from representative params at the fixed epoch.""" + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + result = build_daisy_payload( + scan_params=REPRESENTATIVE_PARAMS, + current_az=200.0, + current_el=60.0, + site=site, + now_unix=FIXED_NOW, + max_dispatch_delay_sec=float("inf"), + ) + return result, FIXED_NOW + + +# --------------------------------------------------------------------------- +# PRIMARY: Go TCS /path contract (pathCmd.Check + checkAzEl) +# --------------------------------------------------------------------------- + + +def test_payload_has_exactly_three_keys(built): + result, _ = built + assert set(result["payload"].keys()) == {"start_time", "coordsys", "points"} + + +def test_coordsys_is_horizon(built): + result, _ = built + assert result["payload"]["coordsys"] == "Horizon" + + +def test_start_time_absolute_and_respects_buffer_floor(built): + result, now = built + start_time = result["payload"]["start_time"] + assert start_time > 1e9 + assert start_time >= now + MIN_LEAD_TCS # commands.go:253 + assert start_time >= now + SCAN_DISPATCH_BUFFER_SEC - 1e-6 # the 10 s floor + + +def test_points_is_n_by_5(built): + result, _ = built + points = result["payload"]["points"] + assert len(points) > 0 # commands.go:246 "no points in path" + assert all(len(p) == 5 for p in points) + + +def test_consecutive_dt_at_least_50ms(built): + result, _ = built + times = np.array([p[0] for p in result["payload"]["points"]]) + assert np.diff(times).min() >= MIN_DT_TCS # commands.go:261 + + +def test_first_100_points_satisfy_checkAzEl(built, site): + result, _ = built + points = result["payload"]["points"] + el_min = site.telescope_limits.elevation.min + el_max = site.telescope_limits.elevation.max + for i, p in enumerate(points[:FIRST_N_CHECKED]): + t, az, el, vaz, vel = p + assert AZ_MIN_TCS <= az <= AZ_MAX_TCS, f"point {i}: az {az} out of TCS range" + assert EL_MIN_TCS <= el <= EL_MAX_TCS, f"point {i}: el {el} out of TCS range" + assert el_min <= el <= el_max, f"point {i}: el {el} out of site range" + assert abs(vaz) <= AZ_SPEED_MAX_TCS, f"point {i}: |vaz| {vaz} > {AZ_SPEED_MAX_TCS}" + assert abs(vel) <= EL_SPEED_MAX_TCS, f"point {i}: |vel| {vel} > {EL_SPEED_MAX_TCS}" + + +def test_velocities_present_and_non_null(built): + result, _ = built + for i, p in enumerate(result["payload"]["points"][:FIRST_N_CHECKED]): + assert p[3] is not None and np.isfinite(p[3]), f"point {i}: az_vel null/non-finite" + assert p[4] is not None and np.isfinite(p[4]), f"point {i}: el_vel null/non-finite" + + +def test_slew_target_in_range(built, site): + result, _ = built + assert site.telescope_limits.azimuth.is_in_range(result["encoder_az"]) + assert site.telescope_limits.elevation.is_in_range(result["encoder_el"]) + + +def test_wrap_alignment_first_point_equals_encoder_az(built): + result, _ = built + assert result["payload"]["points"][0][1] == pytest.approx(result["encoder_az"], abs=1e-9) + + +# --------------------------------------------------------------------------- +# ON-SKY velocity (same frame as Pong, not the mount-frame source/CE one). +# --------------------------------------------------------------------------- + + +def test_velocity_is_on_sky_frame(built): + """Daisy velocity is on-sky; the realised mount dynamics derive from it. + + Like Pong (and unlike constant-el/source), ``plan_daisy_scan`` takes an + ON-SKY scan speed and maps it to the mount frame. The realised mount-frame + az/el velocities are bounded by the Go TCS ceilings (asserted in the + contract test); here we simply confirm the petal moves on BOTH axes (an + on-sky Daisy sweeps az and el together), which a degenerate mount-frame + az-only interpretation would not produce. + """ + result, _ = built + vaz = np.array([abs(p[3]) for p in result["payload"]["points"]]) + vel = np.array([abs(p[4]) for p in result["payload"]["points"]]) + assert vaz.max() > 0.0 + assert vel.max() > 0.0 # el genuinely moves -> on-sky petal, not az-only + + +# --------------------------------------------------------------------------- +# hardware-dynamics escalation (velocity / acceleration, whole trajectory). +# --------------------------------------------------------------------------- + + +def test_too_aggressive_daisy_escalates_h3(site): + """A high-velocity / tight-turn Daisy breaches a hardware ceiling and raises. + + velocity=3.0 with turn_radius=0.05 produces centripetal acceleration far + above the Go TCS hardware ceilings (commands.go:21/27, 6.0/1.5 deg/s^2), + which checkAzEl does NOT validate, so the agent must reject it BEFORE the + POST (and over the whole trajectory, not just the first 100 points). Date- + stable fixed ``now``. + """ + bad = dict( + REPRESENTATIVE_PARAMS, velocity=3.0, turn_radius=0.05, start_acceleration=2.0 + ) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + with pytest.raises(TrajectoryValidationError, match=r"velocity|acceleration"): + build_daisy_payload( + scan_params=bad, + current_az=200.0, + current_el=60.0, + site=site, + now_unix=FIXED_NOW, + max_dispatch_delay_sec=float("inf"), + ) + + +# --------------------------------------------------------------------------- +# Dispatch-delay guard. Daisy's start == the floored anchor, so it passes +# the default guard trivially (the shared helper's raising path is covered in +# test_pong_scan.py). The buffer floor is verified here. +# --------------------------------------------------------------------------- + + +def test_daisy_passes_default_dispatch_delay_guard(site): + """Daisy's literal start sits just past the buffer, inside the 30-min default.""" + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + result = build_daisy_payload( + scan_params=REPRESENTATIVE_PARAMS, + current_az=200.0, + current_el=60.0, + site=site, + now_unix=FIXED_NOW, + ) # default max_dispatch_delay_sec + delay = result["payload"]["start_time"] - FIXED_NOW + assert 0.0 < delay <= MAX_DISPATCH_DELAY_SEC + + +def test_scheduled_t0_in_past_is_floored(site): + """The buffer floor advances a past scheduled_t0 to now + buffer.""" + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + result = build_daisy_payload( + scan_params=REPRESENTATIVE_PARAMS, + current_az=200.0, + current_el=60.0, + site=site, + scheduled_t0_unix=FIXED_NOW - 1000.0, + now_unix=FIXED_NOW, + max_dispatch_delay_sec=float("inf"), + ) + assert result["payload"]["start_time"] >= FIXED_NOW + SCAN_DISPATCH_BUFFER_SEC - 1e-6 + + +# --------------------------------------------------------------------------- +# SECONDARY: drive the body through a mock Go TCS (in-process FastAPI client). +# --------------------------------------------------------------------------- + +fastapi = pytest.importorskip("fastapi", reason="FastAPI not available for mock-TCS test") +from fastapi.testclient import TestClient # noqa: E402 + +from pcs.agents.acu_interface.tests.mock_tcs import PREFIX, create_mock_tcs # noqa: E402 + + +def test_mock_tcs_records_contract_valid_path(built): + """POST the built Daisy body to the mock TCS: recorded + 200 (schema-valid).""" + result, _ = built + payload = result["payload"] + app = create_mock_tcs() + with TestClient(app) as client: + move_resp = client.post( + f"{PREFIX}/move-to", + json={"azimuth": result["encoder_az"], "elevation": result["encoder_el"]}, + ) + path_resp = client.post(f"{PREFIX}/path", json=payload) + + assert move_resp.status_code == 200 + assert path_resp.status_code == 200 + + recorder = app.state.recorder + assert len(recorder.path_bodies) == 1 + recorded = recorder.path_bodies[0] + assert set(recorded.keys()) == {"start_time", "coordsys", "points"} + assert recorded["coordsys"] == "Horizon" + assert all(len(p) == 5 for p in recorded["points"]) + assert recorded["points"][0][1] == pytest.approx(result["encoder_az"], abs=1e-9) diff --git a/pcs/agents/acu_interface/tests/test_dispatch_runner.py b/pcs/agents/acu_interface/tests/test_dispatch_runner.py new file mode 100644 index 0000000..eb3e958 --- /dev/null +++ b/pcs/agents/acu_interface/tests/test_dispatch_runner.py @@ -0,0 +1,463 @@ +"""Drive-the-runner test for the shared scan body. + +ACTUALLY EXECUTES ``ACUAgent._dispatch_scan_process``, the single runtime path +for ``source_scan`` / ``pong_scan`` / ``daisy_scan``, rather than only +inspecting its source. The sibling files unit-test the ocs-free primitives; this +covers the COMPOSITION the runner wires: the ``latch.update(...)`` call, the +slew-arrival comparison, the post-slew re-floor, and the ``_safe_abort`` routing. + +The agent imports the SO ``ocs`` framework, absent on a dev box (``ocs`` resolves +to the single-file OCS REST client, so ``ocs.ocs_agent`` is unimportable). +``twisted`` IS present, so we stub ONLY the three ``ocs`` names the module +imports, then drive the ``@inlineCallbacks`` body synchronously (``deferToThread`` +-> ``maybeDeferred``, ``dsleep`` -> fired no-op). On Linux/CI the stub is skipped +and the same tests run against the genuine module. The stub gives ``ocs`` no +package ``__path__``, so ``find_spec('ocs.ocs_agent')`` still fails and the +source-inspection skip-guards elsewhere stay skipped. +""" + +import importlib.util +import sys +import time +import types +from contextlib import contextmanager + +import pytest + +_AGENT = None +_IMPORT_ERR = None +try: + try: + _real_ocs = importlib.util.find_spec("ocs.ocs_agent") is not None + except (ImportError, ValueError, ModuleNotFoundError): + _real_ocs = False + if not _real_ocs: + import ocs # the single-file REST client + + ocs.ocs_agent = types.SimpleNamespace(param=lambda *a, **k: (lambda f: f)) + ocs.site_config = types.SimpleNamespace() + _otw = types.ModuleType("ocs.ocs_twisted") + _otw.TimeoutLock = type("TimeoutLock", (), {}) + sys.modules.setdefault("ocs.ocs_twisted", _otw) + ocs.ocs_twisted = _otw + from twisted.internet import defer + from twisted.python.failure import Failure + + from pcs.agents.acu_interface import agent as _AGENT +except Exception as exc: # pragma: no cover - environment-dependent + _IMPORT_ERR = exc + +pytestmark = pytest.mark.skipif( + _AGENT is None, reason=f"agent module not importable ({_IMPORT_ERR})" +) + +ENC_AZ, ENC_EL = 120.0, 45.0 + + +class _Log: + def info(self, *a, **k): + pass + + warn = error = debug = info + + +class _Resp: + def __init__(self, code=200, text="ok"): + self.status_code = code + self.text = text + + +class _FakeSession: + def __init__(self, status="running"): + self.status = status + + +class _FakeLock: + def __init__(self): + self.job = None + + @contextmanager + def acquire_timeout(self, timeout, job=None): + self.job = job + yield True + + +class _FakeTCS: + """Records calls. ``get_status`` returns a 'running' status (free < 9999, + which arms the latch) then a 'drained' one (free == 9999 + zero velocity, + which completes it). ``move_to`` optionally trips an injected callback so a + test can flip the session to 'stopping' mid-slew.""" + + def __init__(self, on_move_to=None): + self.calls = [] + self._poll = 0 + self.on_move_to = on_move_to + + def move_to(self, az, el): + self.calls.append(("move_to", az, el)) + if self.on_move_to: + self.on_move_to() + return _Resp(200) + + def scan_pattern(self, payload): + self.calls.append(("scan_pattern", dict(payload))) + return _Resp(200) + + def abort(self): + self.calls.append(("abort",)) + return {"status": "ok"} + + def get_status(self): + self._poll += 1 + running = self._poll == 1 + return { + "Qty of free program track stack positions": 5000 if running else 9999, + "Azimuth current velocity": 0.4 if running else 0.0, + "Elevation current velocity": 0.0, + "Azimuth mode": "ProgramTrack", + "Elevation mode": "ProgramTrack", + } + + +def _canned_build(**kwargs): + # Default to a near-now start so the post-slew re-floor is a ~no-op + # (drift ~ 0) for the happy/gate POST paths. The stale-refusal test + # overrides start_time into the deep past to drive drift > the cap. + now = kwargs.get("now_unix", time.time()) + start = kwargs.pop("_start_time", now + 5.0) + return { + "encoder_az": ENC_AZ, + "encoder_el": ENC_EL, + "payload": { + "start_time": start, + "coordsys": "Horizon", + "points": [[0.0, ENC_AZ, ENC_EL, 0.0, 0.0], + [1.0, ENC_AZ + 0.1, ENC_EL, 0.1, 0.0]], + }, + } + + +def _make_agent(tcs): + a = _AGENT.ACUAgent.__new__(_AGENT.ACUAgent) + a.log = _Log() + a.acu_read = tcs + a.azel_lock = _FakeLock() + # Converge the slew-arrival loop on the first poll: current == slew target. + # The runner now calls this off-reactor with the per-scan ``tcs`` + # (deferToThread -> maybeDeferred(fn, tcs)), so the stub must accept it. + a._current_encoder_azel = lambda *a, **k: (ENC_AZ, ENC_EL, "broadcast") + return a + + +@pytest.fixture +def sync_reactor(monkeypatch): + """Run the @inlineCallbacks body synchronously, no real reactor: deferToThread + runs the fn inline (maybeDeferred captures exceptions like the real one); + dsleep is an already-fired no-op.""" + monkeypatch.setattr( + _AGENT, + "threads", + types.SimpleNamespace( + deferToThread=lambda fn, *a, **k: defer.maybeDeferred(fn, *a, **k) + ), + ) + monkeypatch.setattr(_AGENT, "dsleep", lambda *a, **k: defer.succeed(None)) + + +def _run(d): + out = [] + d.addBoth(out.append) + assert out, "Deferred did not fire synchronously" + res = out[0] + if isinstance(res, Failure): + res.raiseException() + return res + + +def test_dispatch_runner_happy_path(sync_reactor): + """End-to-end through the REAL runner: slew arrives, POST 200, latch + completes. Asserts move_to + scan_pattern were called and the post-slew + re-floor left a near-now scan POSTable (drift below the cap).""" + tcs = _FakeTCS() + agent = _make_agent(tcs) + session = _FakeSession(status="running") + params = {"scan_params": {"body": "x"}, "scheduled_t0_unix": None} + + ok, msg = _run( + agent._dispatch_scan_process( + session, params, build_fn=_canned_build, job="source_scan", tcs=tcs + ) + ) + + assert ok is True + kinds = [c[0] for c in tcs.calls] + assert "move_to" in kinds and "scan_pattern" in kinds + # The POSTed body carries an absolute Unix start_time (the build's near-now + # start, re-floored a no-op): the runner wires the re-floor and the small + # drift is under MAX_REFLOOR_DRIFT_SEC, so the scan is NOT refused. + # (The stale-refusal path, drift over the cap, is its own test below.) + posted = next(c[1] for c in tcs.calls if c[0] == "scan_pattern") + assert posted["start_time"] >= 1e9 + assert tcs._poll >= 2 # latch needed the running->drained transition + + +def test_dispatch_runner_drives_constant_el(sync_reactor): + """constant_el_scan now shares the runner: driving _dispatch_scan_process + with build_constant_el_payload's job label exercises the SAME slew/gate/ + re-floor/POST/latch path. This is the net-new execution coverage the fold + unlocks: the CES Process body had zero execution before. The canned build + stands in for the real core (date-stable; the core itself is covered in + test_constant_el_scan.py).""" + tcs = _FakeTCS() + agent = _make_agent(tcs) + session = _FakeSession(status="running") + params = {"scan_params": {"elevation": 45.0}, "scheduled_t0_unix": None} + + ok, msg = _run( + agent._dispatch_scan_process( + session, params, build_fn=_canned_build, + job="constant_el_scan", tcs=tcs + ) + ) + + assert ok is True + kinds = [c[0] for c in tcs.calls] + assert "move_to" in kinds and "scan_pattern" in kinds + posted = next(c[1] for c in tcs.calls if c[0] == "scan_pattern") + assert posted["start_time"] >= 1e9 # post-slew re-floor fired + + +def test_dispatch_runner_aborts_mid_slew(sync_reactor): + """If the session goes 'stopping' during the slew, the runner routes through + _safe_abort -> tcs.abort() and never POSTs.""" + session = _FakeSession(status="running") + # move_to is between the pre-slew check and the slew loop, so tripping + # 'stopping' there drives the runner into the slew-loop abort branch. + tcs = _FakeTCS(on_move_to=lambda: setattr(session, "status", "stopping")) + agent = _make_agent(tcs) + params = {"scan_params": {"body": "x"}, "scheduled_t0_unix": None} + + ok, msg = _run( + agent._dispatch_scan_process( + session, params, build_fn=_canned_build, job="pong_scan", tcs=tcs + ) + ) + + assert ok is True + assert "abort" in msg.lower() + kinds = [c[0] for c in tcs.calls] + assert "abort" in kinds, "did not route through _safe_abort -> tcs.abort()" + assert "scan_pattern" not in kinds, "must not POST after a mid-slew abort" + + +# --------------------------------------------------------------------------- +# Slew-arrival gate: the dish must be position-arrived AND velocity-settled before +# /path is POSTed. Go TCS dequeues the next command only once its move is done +# (position within tol AND |velocity| < speedTol, commands.go:127-130); a /path +# POST while the mount is still settling returns 503 and the scan is dropped. +# These pin that the gate waits for the axes to stop. The broadcast carries no +# velocity, so the gate's velocity half is a status read; _current_encoder_azel is +# stubbed to report the dish already at the target, isolating the velocity gate. +# --------------------------------------------------------------------------- + + +class _VelGateTCS(_FakeTCS): + """``get_status`` reports a non-zero azimuth velocity for the first + ``settle_after`` polls (mount still moving) then a stopped + drained status. + Position is reported arrived from poll one (via the stubbed + ``_current_encoder_azel``), so only the velocity condition gates.""" + + def __init__(self, settle_after=2): + super().__init__() + self.settle_after = settle_after + + def get_status(self): + self._poll += 1 + moving = self._poll <= self.settle_after + return { + "Qty of free program track stack positions": 9999, + "Azimuth current velocity": 0.5 if moving else 0.0, + "Elevation current velocity": 0.0, + "Azimuth mode": "ProgramTrack", + "Elevation mode": "ProgramTrack", + } + + +def test_slew_gate_waits_for_velocity_to_settle(sync_reactor): + """Position is arrived from the first poll, but the azimuth axis is still + moving for two status reads. The gate must NOT POST until the velocity drops + below tolerance, so /path is POSTed only after >= settle_after+1 status + polls (the extra polls are the gate spinning on the still-moving axis).""" + tcs = _VelGateTCS(settle_after=2) + agent = _make_agent(tcs) + session = _FakeSession(status="running") + params = {"scan_params": {"body": "x"}, "scheduled_t0_unix": None} + + ok, msg = _run( + agent._dispatch_scan_process( + session, params, build_fn=_canned_build, job="daisy_scan", tcs=tcs + ) + ) + + assert ok is True + kinds = [c[0] for c in tcs.calls] + assert "scan_pattern" in kinds, "gate never released; /path not POSTed" + # The first two status reads saw a moving azimuth axis (velocity 0.5); the + # gate could only break on the third (velocity 0.0). A position-only gate + # would have POSTed after zero status reads. + assert tcs._poll >= tcs.settle_after + 1 + + +def test_slew_gate_times_out_if_axes_never_stop(sync_reactor, monkeypatch): + """If the mount is position-arrived but its axes never stop, the gate must + NOT POST: it fails the Process on the slew timeout instead of POSTing into + a still-settling mount (which Go TCS would reject with 503). Drive the + deadline into the past so the bounded wait expires deterministically.""" + monkeypatch.setattr(_AGENT, "SLEW_TIMEOUT_SEC", -1.0) + + # Position arrived, but azimuth velocity stays high forever. + tcs = _VelGateTCS(settle_after=10**9) + agent = _make_agent(tcs) + session = _FakeSession(status="running") + params = {"scan_params": {"body": "x"}, "scheduled_t0_unix": None} + + ok, msg = _run( + agent._dispatch_scan_process( + session, params, build_fn=_canned_build, job="source_scan", tcs=tcs + ) + ) + + assert ok is False + assert "timed out" in msg.lower() + kinds = [c[0] for c in tcs.calls] + assert "scan_pattern" not in kinds, "must not POST while the axes are moving" + + +def test_axes_stopped_requires_both_velocities_present_and_below_tol(): + """Unit-check the helper the gate uses: both axis velocities must be present + AND below TCS_SPEED_TOL. A missing velocity is 'not known to be stopped'.""" + agent = _make_agent(_FakeTCS()) + tol = _AGENT.TCS_SPEED_TOL + assert agent._axes_stopped( + {"Azimuth current velocity": 0.0, "Elevation current velocity": 0.0}) + assert agent._axes_stopped( + {"Azimuth current velocity": tol / 2, "Elevation current velocity": -tol / 2}) + # One axis still moving. + assert not agent._axes_stopped( + {"Azimuth current velocity": 0.5, "Elevation current velocity": 0.0}) + # Velocity absent (e.g. the broadcast, which carries no velocity). + assert not agent._axes_stopped({"Azimuth current velocity": 0.0}) + assert not agent._axes_stopped({}) + + +# --------------------------------------------------------------------------- +# The runner reads the current encoder position OFF the reactor and +# hands the position helper the PER-SCAN ``tcs``, so the fallback status read uses +# the scan's own Session, not the shared ``self.acu_read`` (which the monitor +# owns). Proves the wiring: ``_current_encoder_azel`` is invoked with the per-scan +# client. +# --------------------------------------------------------------------------- + + +def test_runner_calls_current_encoder_azel_off_reactor_with_tcs(sync_reactor): + """The runner passes the per-scan ``tcs`` to ``_current_encoder_azel``. + + A spy records its positional args; asserting the helper was called with the + SAME per-scan client the runner was handed is the structural guarantee that the + fallback status read can never touch the monitor's shared ``self.acu_read`` + Session.""" + tcs = _FakeTCS() + agent = _make_agent(tcs) + seen = [] + + def _spy(*a): + seen.append(a) + return (ENC_AZ, ENC_EL, "broadcast") + + agent._current_encoder_azel = _spy + session = _FakeSession(status="running") + params = {"scan_params": {"body": "x"}, "scheduled_t0_unix": None} + + ok, _ = _run( + agent._dispatch_scan_process( + session, params, build_fn=_canned_build, job="source_scan", tcs=tcs + ) + ) + + assert ok is True + assert seen, "_current_encoder_azel was never called" + # Every call received exactly the per-scan tcs as its single positional arg. + assert all(a == (tcs,) for a in seen), ( + f"expected _current_encoder_azel(tcs); got calls {seen!r}") + + +def test_dispatch_runner_refuses_on_stale_refloor(sync_reactor): + """If the post-slew re-floor advances start_time past + MAX_REFLOOR_DRIFT_SEC the runner REFUSES to POST: the baked az/el track is + stale (tracks the source's old sky position) and the re-floor only shifts WHEN + the scan plays, not WHERE. A deep-past build start forces drift past the cap; + assert ``ok is False``, the message names the staleness, and ``scan_pattern`` + was NEVER called. The happy-path test (drift ~ 0 -> POST) is the control.""" + tcs = _FakeTCS() + agent = _make_agent(tcs) + session = _FakeSession(status="running") + params = {"scan_params": {"body": "x"}, "scheduled_t0_unix": None} + + # start_time = now - 200 s: the re-floor bumps it to now+buffer, a drift of + # ~ 200 + buffer s >> MAX_REFLOOR_DRIFT_SEC (30 s). + stale_build = lambda **k: _canned_build(**k, _start_time=k["now_unix"] - 200) + + ok, msg = _run( + agent._dispatch_scan_process( + session, params, build_fn=stale_build, job="source_scan", tcs=tcs + ) + ) + + assert ok is False + low = msg.lower() + assert "stale" in low or "re-floor" in low, f"unexpected refusal message: {msg!r}" + kinds = [c[0] for c in tcs.calls] + assert "scan_pattern" not in kinds, "must not POST a stale (drift > cap) trajectory" + + +def test_dispatch_runner_aborts_in_slew_settle_window(sync_reactor): + """Regression (HIGH): an abort landing AFTER the slew-arrival gate breaks but + BEFORE the /path POST must not POST the scan. The gate's final velocity read is + off-reactor, on which the session can flip to 'stopping'; without the re-check + the runner would POST the cancelled scan (then the completion loop sends + /abort, racing /path at the ACU). Assert /path is NEVER POSTed and the runner + routes through _safe_abort.""" + session = _FakeSession(status="running") + + class _AbortAtSettleTCS(_FakeTCS): + # First status read reports the axes already stopped (so the gate breaks) + # AND flips the session to 'stopping', landing the abort exactly in the + # post-break, pre-POST window. + def get_status(self): + self._poll += 1 + if self._poll == 1: + session.status = "stopping" + return { + "Qty of free program track stack positions": 9999, + "Azimuth current velocity": 0.0, + "Elevation current velocity": 0.0, + "Azimuth mode": "ProgramTrack", + "Elevation mode": "ProgramTrack", + } + + tcs = _AbortAtSettleTCS() + agent = _make_agent(tcs) + params = {"scan_params": {"body": "x"}, "scheduled_t0_unix": None} + + ok, msg = _run( + agent._dispatch_scan_process( + session, params, build_fn=_canned_build, job="source_scan", tcs=tcs + ) + ) + + assert ok is True + assert "abort" in msg.lower() + kinds = [c[0] for c in tcs.calls] + assert "scan_pattern" not in kinds, ( + "POSTed a scan that was aborted in the slew-settle -> POST window") + assert "abort" in kinds, "did not route through _safe_abort -> tcs.abort()" diff --git a/pcs/agents/acu_interface/tests/test_monitor_loop.py b/pcs/agents/acu_interface/tests/test_monitor_loop.py new file mode 100644 index 0000000..d992580 --- /dev/null +++ b/pcs/agents/acu_interface/tests/test_monitor_loop.py @@ -0,0 +1,232 @@ +"""Execution coverage for the always-on ``monitor`` Process's status read. + +Background, the bug this guards +--------------------------------- +The ``monitor`` Process is on by default (NOT gated by ``azel_lock``) and polls +the ACU. Before the fix it read ``self.acu_read.get_status()`` with a bare +``yield``, ``get_status`` returns a plain dict, so the synchronous ``requests`` +GET ran ON THE REACTOR THREAD. With the Pacemaker throttle commented out, a +slow-but-alive TCS froze the reactor up to the 30 s HTTP read timeout per poll, +stalling the 200 Hz broadcast and every slew/completion gate. + +The fix moves that GET off the reactor (``deferToThread``). The handlers +(priming guard + in-loop) trap ``SystemExit`` (``aculib`` does ``sys.exit(-1)`` on +``ConnectionError``) so a hard ACU drop logs + continues instead of tearing down +the monitor (a bare ``except Exception`` does NOT catch ``SystemExit``). + +What this file proves, by EXECUTING ``ACUAgent.monitor`` +--------------------------------------------------------- +``_get_status`` is an un-invokable nested closure, so we drive ``monitor()`` +(priming ``_get_status()`` executes the off-reactor GET, one loop turn executes +the in-loop handler): + +1. The status read is dispatched via ``deferToThread``, not inline on the + reactor thread (executed not source-inspected). +2. A ``SystemExit`` from ``get_status`` is CAUGHT (priming guard + in-loop + handler): the monitor publishes ``acu_error`` / marks disconnected and exits + cleanly. Negative control: a bare ``except Exception`` handler lets the + ``SystemExit`` escape, documenting why the handler widens to + ``(Exception, SystemExit)``. + +Harness, same stub as ``test_dispatch_runner`` (runs on Windows + WSL): stub the +three ``ocs`` names, drive ``@inlineCallbacks`` synchronously (``deferToThread`` -> +a RECORDING pass-through via ``maybeDeferred``, which captures ``SystemExit`` as a +``Failure`` like the real one; ``dsleep`` -> fired no-op). A fake session flips +``running -> done`` so the loop turns once. +""" + +import importlib.util +import sys +import types + +import pytest + + +_AGENT = None +_IMPORT_ERR = None +try: + try: + _real_ocs = importlib.util.find_spec("ocs.ocs_agent") is not None + except (ImportError, ValueError, ModuleNotFoundError): + _real_ocs = False + if not _real_ocs: + import ocs # the single-file REST client + + ocs.ocs_agent = types.SimpleNamespace(param=lambda *a, **k: (lambda f: f)) + ocs.site_config = types.SimpleNamespace() + _otw = types.ModuleType("ocs.ocs_twisted") + _otw.TimeoutLock = type("TimeoutLock", (), {}) + sys.modules.setdefault("ocs.ocs_twisted", _otw) + ocs.ocs_twisted = _otw + from twisted.internet import defer + from twisted.python.failure import Failure + + from pcs.agents.acu_interface import agent as _AGENT +except Exception as exc: # pragma: no cover - environment-dependent + _IMPORT_ERR = exc + +pytestmark = pytest.mark.skipif( + _AGENT is None, reason=f"agent module not importable ({_IMPORT_ERR})" +) + + +class _Log: + def info(self, *a, **k): + pass + + warn = warning = error = debug = critical = info + + +class _FakeSession: + """A monitor session whose ``status`` the test controls. ``monitor`` reassigns + ``.data`` itself (agent.py:387), so a plain settable attribute is enough.""" + + def __init__(self, status="running"): + self.status = status + self.data = {} + + +class _RecordingThreads: + """Stand-in for ``agent.threads``: records every ``deferToThread`` call and + runs the fn via ``maybeDeferred`` (which captures ``SystemExit`` as a Failure, + matching the real ``deferToThread``).""" + + def __init__(self): + self.deferred_calls = [] + + def deferToThread(self, fn, *a, **k): + self.deferred_calls.append(fn) + return defer.maybeDeferred(fn, *a, **k) + + +def _make_monitor_agent(get_status): + """A real ``ACUAgent`` carrying only what ``monitor`` touches up to / through + its status read. ``self.acu_read.get_status`` is the supplied callable.""" + a = _AGENT.ACUAgent.__new__(_AGENT.ACUAgent) + a.log = _Log() + a.platform_type = "latp" + a.scan_params = {} + a.acu_read = types.SimpleNamespace(get_status=get_status) + a.agent = types.SimpleNamespace(publish_to_feed=lambda *a, **k: None) + return a + + +@pytest.fixture +def sync_reactor(monkeypatch): + """Drive ``monitor`` synchronously and hand it the recording ``threads``.""" + rec = _RecordingThreads() + monkeypatch.setattr(_AGENT, "threads", rec) + monkeypatch.setattr(_AGENT, "dsleep", lambda *a, **k: defer.succeed(None)) + return rec + + +def _drain(d): + """Collect a synchronously-fired Deferred's result/Failure (do not re-raise, + the monitor is expected to RETURN cleanly even on the SystemExit path).""" + out = [] + d.addBoth(out.append) + assert out, "monitor Deferred did not fire synchronously" + return out[0] + + +# --------------------------------------------------------------------------- +# 1. The status read runs OFF the reactor (via deferToThread). +# --------------------------------------------------------------------------- + + +def test_monitor_get_status_runs_off_reactor(sync_reactor): + """Drive ``monitor`` with a recorder ``get_status`` and a session that is + already 'done' (so the priming ``_get_status()`` runs the off-reactor GET but + the loop turns zero times). Assert the GET was dispatched THROUGH + ``deferToThread``, i.e. it did not run inline on the reactor thread (executed).""" + polls = [] + + def _get_status(): + polls.append(1) + return {"Azimuth current position": 1.0} + + agent = _make_monitor_agent(_get_status) + # 'done' from the start: the priming call still runs (executing the GET), then + # ``while session.status in ['running']`` is immediately false -> no loop + # iteration -> we never reach the brittle field-mapping block. + session = _FakeSession(status="done") + + result = _drain(agent.monitor(session, {})) + + assert not isinstance(result, Failure), f"monitor raised: {result}" + assert polls, "get_status was never called" + # The decisive assertion: the recorder saw the bound get_status go through + # deferToThread. A bare ``yield self.acu_read.get_status()`` would have + # called it inline and the recorder would be empty. + assert agent.acu_read.get_status in sync_reactor.deferred_calls, \ + "monitor's get_status did NOT run via deferToThread (reactor-block regression)" + + +# --------------------------------------------------------------------------- +# 2. A SystemExit from get_status is caught, not propagated. +# --------------------------------------------------------------------------- + + +def test_monitor_survives_systemexit_from_get_status(sync_reactor): + """``aculib.get_status`` does ``sys.exit(-1)`` on a ConnectionError. The + monitor must CATCH that ``SystemExit`` (priming guard + in-loop handler), + mark disconnected / publish ``acu_error``, and exit cleanly, never + let it tear the Process down. Drive: get_status raises SystemExit on the + priming call AND the first in-loop poll, then the session flips to 'done'.""" + published = [] + poll = {"n": 0} + session = _FakeSession(status="running") + + def _get_status(): + poll["n"] += 1 + # 1st call = priming; 2nd = in-loop. After the in-loop raise + # is handled, flip to 'done' so the loop exits after exactly one turn. + if poll["n"] >= 2: + session.status = "done" + raise SystemExit("aculib sys.exit(-1) on ConnectionError") + + agent = _make_monitor_agent(_get_status) + agent.agent = types.SimpleNamespace( + publish_to_feed=lambda feed, block: published.append((feed, block))) + + result = _drain(agent.monitor(session, {})) + + assert not isinstance(result, Failure), \ + f"SystemExit escaped the monitor (the handlers did not catch it): {result}" + # Got past the priming guard AND through one in-loop handler. + assert poll["n"] >= 2, "monitor did not reach the in-loop SystemExit handler" + assert session.data.get("connected") is False, \ + "monitor did not mark the session disconnected on the SystemExit path" + assert any(feed == "acu_error" for feed, _ in published), \ + "in-loop handler did not publish an acu_error block" + + +def test_control_bare_except_lets_systemexit_escape(sync_reactor): + """NEGATIVE CONTROL: a monitor-like loop whose in-loop handler is a bare + ``except Exception`` (the old narrow form) lets a ``SystemExit`` ESCAPE, this + is exactly what the handler's widening to ``(Exception, SystemExit)`` prevents. We + reproduce the minimal loop shape locally (driving the real ``monitor`` with a + narrowed handler would require editing the agent), so the control documents + the necessity of the widening deterministically.""" + + @defer.inlineCallbacks + def _bare_except_loop(session, get_status): + # Mirror the monitor's in-loop try/except, but with the OLD narrow handler. + while session.status in ["running"]: + try: + yield sync_reactor.deferToThread(get_status) + except Exception: # noqa: BLE001 - deliberately NOT (Exception, SystemExit) + session.status = "done" + continue + return True + + session = _FakeSession(status="running") + + def _se(): + raise SystemExit("boom") + + result = _drain(_bare_except_loop(session, _se)) + + assert isinstance(result, Failure) and result.type is SystemExit, ( + "control did not reproduce the escape: a bare ``except Exception`` should " + f"let SystemExit propagate, got {result!r}") diff --git a/pcs/agents/acu_interface/tests/test_phase3_agent_wiring.py b/pcs/agents/acu_interface/tests/test_phase3_agent_wiring.py new file mode 100644 index 0000000..6602779 --- /dev/null +++ b/pcs/agents/acu_interface/tests/test_phase3_agent_wiring.py @@ -0,0 +1,256 @@ +"""Wiring guards: the agent + the dispatch-window centralization. + +Two light-weight, date-independent concerns: + +1. **Dispatch-window centralization (ocs-free).** Confirm + ``build_constant_el_payload`` and all three scan cores call the shared + :func:`floor_dispatch_start` + :func:`enforce_dispatch_delay` helpers (no + divergent inline copy), by source inspection of ``trajectory.py``. Catches a + future edit that re-inlines the buffer/delay logic and lets the four cores drift + apart. + +2. **Agent wiring (skip-guarded).** Where the ocs framework is importable, confirm + the agent references the three new ``build_*`` cores, defines the three Process + methods + the ``abort`` task, and registers them. Skipped where ocs is + unavailable; the per-task logic is unit-tested ocs-free in the sibling files. + +Imports ``trajectory.py`` directly; the agent import is guarded. +""" + +import importlib.util +import inspect +import re +from pathlib import Path + +import pytest + +from pcs.agents.acu_interface import trajectory as traj_mod + + +# --------------------------------------------------------------------------- +# 1. Dispatch-window centralization (ocs-free): the shared helpers exist and +# every core calls them, no divergent inline copy. +# --------------------------------------------------------------------------- + + +def test_shared_dispatch_helpers_exist(): + """The three shared dispatch-window helpers are public on trajectory.py.""" + assert callable(traj_mod.floor_dispatch_start) + assert callable(traj_mod.enforce_dispatch_delay) + assert callable(traj_mod.refloor_payload_start_time) + + +@pytest.mark.parametrize( + "fn_name", + [ + "build_constant_el_payload", + "build_source_payload", + "build_pong_payload", + "build_daisy_payload", + ], +) +def test_every_core_calls_the_centralized_f4_helpers(fn_name): + """All four payload cores call the shared floor + delay helpers. + + Source-level check: each ``build_*_payload`` must reference + ``floor_dispatch_start`` (the buffer floor) and ``enforce_dispatch_delay`` + (the too-far-out guard), so the dispatch-window logic lives in ONE place and + the four cores cannot diverge. ``build_constant_el_payload`` in particular + must NOT carry a re-inlined copy of the old ``max(scheduled_t0 or 0, now + + buffer)`` expression. + """ + src = inspect.getsource(getattr(traj_mod, fn_name)) + assert "floor_dispatch_start(" in src, f"{fn_name} does not call floor_dispatch_start" + assert "enforce_dispatch_delay(" in src, f"{fn_name} does not call enforce_dispatch_delay" + + +def test_constant_el_has_no_divergent_inline_floor(): + """The CE core was refactored: it floors via the shared helper, not inline. + + Guards the refactor. If someone re-inlines the buffer floor in the CE core, + the centralization regresses. The old inline form built ``Time(actual_t0 + _unix, ...)`` from a local ``max(...)``; the refactored core gets its anchor + from ``floor_dispatch_start`` instead. + """ + src = inspect.getsource(traj_mod.build_constant_el_payload) + assert "actual_t0 = floor_dispatch_start(" in src + assert "actual_t0_unix = max(" not in src + + +def test_shared_dispatch_runner_wires_the_process_guards(): + """The shared ``_dispatch_scan_process`` body references every Process-level + guard, caught even where ocs is unavailable. + + All four typed scans now run through ``agent._dispatch_scan_process``; no + ocs-free test can EXECUTE it (the agent needs the operations framework). Read + ``agent.py`` as text (no import) and assert every Process-level guard lives + in the ``_dispatch_scan_process`` body specifically (incl. + ``threads.deferToThread``, the off-reactor wrap), so a regression that + strips a guard from the SHARED runner is caught here. + """ + agent_src = Path(traj_mod.__file__).with_name("agent.py").read_text(encoding="utf-8") + start = agent_src.index("def _dispatch_scan_process(") + nxt = re.search(r"\n (?:@|def )", agent_src[start + 1:]) + body = agent_src[start: start + 1 + nxt.start()] if nxt else agent_src[start:] + for guard in ( + "ScanCompletionLatch(", + "tcs_response_status(", + "refloor_payload_start_time(", + "_safe_abort", + "threads.deferToThread", + ): + assert guard in body, f"_dispatch_scan_process is missing {guard!r}" + + +def test_typed_scans_build_fresh_clients_not_shared_acu_read(): + """Each typed scan + the standalone abort task builds a FRESH per-scan Go + TCS client via _make_tcs(); none reuses self.acu_read. requests.Session is + not thread-safe. The monitor's reactor-thread status reads must not share + a Session with a scan's thread-pool POSTs, and the abort task's /abort POST + must not collide with a running scan's status reads. Source-grep (no import) + so it runs on a dev box without the ocs operations framework. + """ + src = Path(traj_mod.__file__).with_name("agent.py").read_text(encoding="utf-8") + # GAP 1: all four scan wrappers hand the runner a fresh client. + assert src.count("tcs=self._make_tcs()") >= 4, \ + "expected all four typed scans to pass tcs=self._make_tcs()" + # GAP 2: the standalone abort task moved off the shared client (EDIT 9). + assert "self._safe_abort, self._make_tcs()" in src, \ + "abort task must use a fresh _make_tcs() client, not self.acu_read" + # GAP 3: constant_el_scan actually DELEGATES to the runner (not re-inlined). + assert "build_fn=build_constant_el_payload" in src, \ + "constant_el_scan must delegate to _dispatch_scan_process" + # The shared runner must NOT re-introduce the self.acu_read reuse. + start = src.index("def _dispatch_scan_process(") + nxt = re.search(r"\n (?:@|def )", src[start + 1:]) + runner = src[start: start + 1 + nxt.start()] if nxt else src[start:] + assert "self.acu_read" not in runner, \ + "runner reuses shared self.acu_read (concurrency hazard reintroduced)" + + +def _method_body(src: str, name: str) -> str: + """Slice one agent method body: real ``def name(`` -> next def/decorator. + + Anchors on a real 4-space-indented ``def`` (``\\n def name(``) so the + commented-out stubs (``#def az_scan():`` etc.) that precede some methods are + not matched. + """ + start = src.index(f"\n def {name}(") + 1 + nxt = re.search(r"\n (?:@|def )", src[start + 1:]) + return src[start: start + 1 + nxt.start()] if nxt else src[start:] + + +@pytest.mark.parametrize("op", ["go_to", "az_scan", "fromfile_scan"]) +def test_legacy_ops_read_status_503_safe(op): + """The three legacy Tasks read the HTTP status via ``tcs_response_status``, + not by dereferencing ``msg.status_code`` / ``msg.text`` directly. + + aculib ``post()`` returns ``{}`` on a 503 (and ``scan_pattern_from_file`` + returns ``{}`` on a 503 too, it formerly returned None on EVERY call), so + ``msg.status_code`` / ``msg.text`` on that non-Response raised + ``AttributeError`` and tore the Task down. The behavioural decision + (``{}``/``None`` -> not-200 -> graceful failure) is executed and covered in + ``test_constant_el_scan`` via ``tcs_response_status`` directly; this is the + WIRING regression guard that the legacy ops actually route through it. + Source-grep (no import) so it runs without the ocs operations framework. + """ + src = Path(traj_mod.__file__).with_name("agent.py").read_text(encoding="utf-8") + body = _method_body(src, op) + assert "tcs_response_status(" in body, \ + f"{op} does not read its status via tcs_response_status" + # The crash-prone raw dereferences must be gone from the body. + assert "msg.status_code" not in body, \ + f"{op} still reads msg.status_code (crashes on a {{}}/None 503 return)" + assert "msg.text" not in body, \ + f"{op} still reads msg.text (crashes on a {{}}/None 503 return)" + + +def test_abort_task_stops_scan_processes_and_excludes_infra(): + """The standalone ``abort`` Task stops any running typed scan Process so an + out-of-band abort cannot strand ``azel_lock``. + + Source-grep (no import, Windows-safe). Asserts the ``abort`` body iterates + ``SCAN_PROCESS_OPS`` and calls ``self.agent.stop(...)`` on each (in addition + to the bare ``/abort`` it already sent), and that ``SCAN_PROCESS_OPS`` lists + the four typed scans but NOT the always-on infrastructure Processes + (``broadcast`` / ``monitor`` must keep running across an abort). Execution + coverage of the actual stop behaviour lives in + ``test_abort_stops_scan_process.py`` (needs the real ocs framework).""" + src = Path(traj_mod.__file__).with_name("agent.py").read_text(encoding="utf-8") + body = _method_body(src, "abort") + assert "SCAN_PROCESS_OPS" in body, \ + "abort does not iterate SCAN_PROCESS_OPS to stop running scans" + assert "self.agent.stop(" in body, \ + "abort does not call self.agent.stop() to release a stranded azel_lock" + + # SCAN_PROCESS_OPS is the four typed scans, and explicitly NOT the always-on + # broadcast/monitor Processes (stopping those would kill the position feed). + m = re.search(r"SCAN_PROCESS_OPS\s*=\s*\(([^)]*)\)", src) + assert m, "SCAN_PROCESS_OPS tuple not found" + ops = m.group(1) + for scan in ("constant_el_scan", "source_scan", "pong_scan", "daisy_scan"): + assert scan in ops, f"SCAN_PROCESS_OPS missing {scan!r}" + assert "broadcast" not in ops and "monitor" not in ops, \ + "SCAN_PROCESS_OPS must exclude the always-on broadcast/monitor Processes" + + +# --------------------------------------------------------------------------- +# 2. Agent wiring (skip-guarded on the ocs operations framework). +# +# NB: a module literally named ``ocs`` IS importable here (the OCS REST client), +# but it is NOT the SO operations framework the agent imports. Guard on the +# submodule the agent actually needs (``ocs.ocs_agent``) so the skip is correct. +# --------------------------------------------------------------------------- + + +def _ocs_framework_available() -> bool: + try: + return importlib.util.find_spec("ocs.ocs_agent") is not None + except (ImportError, ValueError, ModuleNotFoundError): + return False + + +@pytest.mark.skipif( + not _ocs_framework_available(), + reason="ocs operations framework not importable; agent-wiring test skipped", +) +def test_agent_wires_phase3_tasks_and_cores(): + """The agent imports the three new cores and wires the four new ops. + + Skipped where ocs is unavailable. Does not exercise the Process loops (that + needs a live reactor + session); guards against the agent drifting out of + sync with the scan cores / registrations it depends on. + """ + from pcs.agents.acu_interface import agent as agent_mod + + src = inspect.getsource(agent_mod) + # The three new ocs-free cores are imported and used. + for core in ("build_constant_el_payload", "build_source_payload", + "build_pong_payload", "build_daisy_payload"): + assert core in src, f"agent does not reference {core}" + # The three new Process methods + the standalone abort task are defined. + for method in ("def constant_el_scan(", "def source_scan(", "def pong_scan(", + "def daisy_scan(", "def abort("): + assert method in src, f"agent does not define {method!r}" + # ... and registered next to constant_el_scan. + for reg in ( + "register_process('constant_el_scan'", + "register_process('source_scan'", + "register_process('pong_scan'", + "register_process('daisy_scan'", + "register_task('abort'", + ): + assert reg in src, f"agent does not register {reg!r}" + # ... as non-blocking (blocking=False) so they stay abortable mid-scan. A + # blocking=True Process would hold the reactor thread and defeat the + # cooperative stop the Process model is chosen for. + for op, kind in (("constant_el_scan", "process"), ("source_scan", "process"), + ("pong_scan", "process"), ("daisy_scan", "process"), + ("abort", "task")): + pat = rf"register_{kind}\(\s*'{op}'[^)]*blocking=False" + assert re.search(pat, src), f"{op} is not registered blocking=False" + # The three scan Processes wire the abortable stop handler. + assert "_simple_process_stop" in src + # The shared abort path is reused by both the standalone task and the + # in-Process stop handlers. + assert "_safe_abort(" in src diff --git a/pcs/agents/acu_interface/tests/test_pong_scan.py b/pcs/agents/acu_interface/tests/test_pong_scan.py new file mode 100644 index 0000000..c5c2a96 --- /dev/null +++ b/pcs/agents/acu_interface/tests/test_pong_scan.py @@ -0,0 +1,307 @@ +"""Tests for the FYST Pong-scan dispatch core + Go TCS contract. + +PRIMARY (no server, no ocs): drive +:func:`pcs.agents.acu_interface.trajectory.build_pong_payload` directly and assert +the returned ``/path`` body satisfies every rule the Go TCS enforces in +``commands.go`` (``pathCmd.Check()`` + ``checkAzEl``), mirroring +``test_constant_el_scan.py``: + +- exactly the three keys ``{start_time, coordsys, points}``; ``coordsys == + "Horizon"``; +- ``start_time`` absolute Unix and >= now + 10 s (the + :data:`SCAN_DISPATCH_BUFFER_SEC` floor, stronger than the 9.8 s reject); +- ``points`` is N x 5; every consecutive ``dt >= 0.05`` s; +- the first 100 points satisfy az in [-180, 360], el in [-90, 180], + ``|vaz| <= 3.0``, ``|vel| <= 1.5`` (velocities present); +- the slew-target az is in range and ``points[0][1] == encoder_az`` (wrap align). + +Plus the dispatch-core guards: + +- Velocity frame: ``plan_pong_scan``'s ``velocity`` is ON-SKY (tangent-plane) + deg/s, NOT mount-frame. The realised mount az velocity is ``~velocity / + cos(el)``, *larger* than the requested on-sky value (opposite of the + source/constant-el convention); +- Hardware-dynamics escalation: a velocity/acceleration breach anywhere raises + ``TrajectoryValidationError``; +- Dispatch-delay: Pong takes ``start_time`` literally, so its resolved start + equals the floored anchor and the guard passes trivially; the shared + :func:`enforce_dispatch_delay` is tested directly for raising-when-far, and the + buffer floor is verified. + +DATE-STABILITY: Pong uses ``start_time`` literally, so the only date dependence is +whether the field is observable (sun-safety disabled in the fixture). Every build +threads a FIXED ``Time`` epoch. At 2026-06-15T13:00 UTC the RA=80, Dec=-40 field +sits at el~45, comfortably inside the FYST limits. + +Imports ``trajectory.py`` directly, never ``agent.py``. +""" + +import warnings + +import numpy as np +import pytest +from astropy.time import Time + +from fyst_trajectories import get_fyst_site + +from pcs.agents.acu_interface.trajectory import ( + MAX_DISPATCH_DELAY_SEC, + SCAN_DISPATCH_BUFFER_SEC, + DispatchDelayError, + TrajectoryValidationError, + build_pong_payload, + enforce_dispatch_delay, +) + +# Go TCS /path contract constants (commands.go checkAzEl + pathCmd.Check()). +AZ_MIN_TCS = -180.0 # commands.go:18 +AZ_MAX_TCS = 360.0 # commands.go:19 +EL_MIN_TCS = -90.0 # commands.go:24 +EL_MAX_TCS = 180.0 # commands.go:25 +AZ_SPEED_MAX_TCS = 3.0 # commands.go:20 +EL_SPEED_MAX_TCS = 1.5 # commands.go:26 +MIN_DT_TCS = 0.05 # commands.go:261 +MIN_LEAD_TCS = 9.8 # commands.go:253 +FIRST_N_CHECKED = 100 # commands.go:269 + +# A representative Pong over a field well inside the FYST limits at the epoch. +REPRESENTATIVE_PARAMS = dict( + ra_center=80.0, + dec_center=-40.0, + width=2.0, + height=2.0, + velocity=0.5, # ON-SKY deg/s (tangent-plane) + spacing=0.1, + num_terms=4, +) + +# Epoch at which the field is observable (el~45). Pong uses start_time literally +# so there is no forward-search dependence, only observability + sun. See +# module docstring. +FIXED_EPOCH = Time("2026-06-15T13:00:00", scale="utc") +FIXED_NOW = FIXED_EPOCH.unix + + +@pytest.fixture +def site(): + # Sun avoidance disabled so the wrap choice is purely geometric. The Go TCS + # contract is independent of sun avoidance. + return get_fyst_site(sun_avoidance_enabled=False) + + +@pytest.fixture +def built(site): + """A built Pong payload from representative params at the fixed epoch.""" + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + result = build_pong_payload( + scan_params=REPRESENTATIVE_PARAMS, + current_az=200.0, + current_el=60.0, + site=site, + now_unix=FIXED_NOW, + max_dispatch_delay_sec=float("inf"), + ) + return result, FIXED_NOW + + +# --------------------------------------------------------------------------- +# PRIMARY: Go TCS /path contract (pathCmd.Check + checkAzEl) +# --------------------------------------------------------------------------- + + +def test_payload_has_exactly_three_keys(built): + result, _ = built + assert set(result["payload"].keys()) == {"start_time", "coordsys", "points"} + + +def test_coordsys_is_horizon(built): + result, _ = built + assert result["payload"]["coordsys"] == "Horizon" + + +def test_start_time_absolute_and_respects_buffer_floor(built): + result, now = built + start_time = result["payload"]["start_time"] + assert start_time > 1e9 + assert start_time >= now + MIN_LEAD_TCS # commands.go:253 + assert start_time >= now + SCAN_DISPATCH_BUFFER_SEC - 1e-6 # the 10 s floor + + +def test_points_is_n_by_5(built): + result, _ = built + points = result["payload"]["points"] + assert len(points) > 0 # commands.go:246 "no points in path" + assert all(len(p) == 5 for p in points) + + +def test_consecutive_dt_at_least_50ms(built): + result, _ = built + times = np.array([p[0] for p in result["payload"]["points"]]) + assert np.diff(times).min() >= MIN_DT_TCS # commands.go:261 + + +def test_first_100_points_satisfy_checkAzEl(built, site): + result, _ = built + points = result["payload"]["points"] + el_min = site.telescope_limits.elevation.min + el_max = site.telescope_limits.elevation.max + for i, p in enumerate(points[:FIRST_N_CHECKED]): + t, az, el, vaz, vel = p + assert AZ_MIN_TCS <= az <= AZ_MAX_TCS, f"point {i}: az {az} out of TCS range" + assert EL_MIN_TCS <= el <= EL_MAX_TCS, f"point {i}: el {el} out of TCS range" + assert el_min <= el <= el_max, f"point {i}: el {el} out of site range" + assert abs(vaz) <= AZ_SPEED_MAX_TCS, f"point {i}: |vaz| {vaz} > {AZ_SPEED_MAX_TCS}" + assert abs(vel) <= EL_SPEED_MAX_TCS, f"point {i}: |vel| {vel} > {EL_SPEED_MAX_TCS}" + + +def test_velocities_present_and_non_null(built): + result, _ = built + for i, p in enumerate(result["payload"]["points"][:FIRST_N_CHECKED]): + assert p[3] is not None and np.isfinite(p[3]), f"point {i}: az_vel null/non-finite" + assert p[4] is not None and np.isfinite(p[4]), f"point {i}: el_vel null/non-finite" + + +def test_slew_target_in_range(built, site): + result, _ = built + assert site.telescope_limits.azimuth.is_in_range(result["encoder_az"]) + assert site.telescope_limits.elevation.is_in_range(result["encoder_el"]) + + +def test_wrap_alignment_first_point_equals_encoder_az(built): + result, _ = built + assert result["payload"]["points"][0][1] == pytest.approx(result["encoder_az"], abs=1e-9) + + +# --------------------------------------------------------------------------- +# ON-SKY velocity (the realised mount az rate is larger, not equal). +# --------------------------------------------------------------------------- + + +def test_velocity_is_on_sky_not_mount_frame(built): + """Pong velocity is on-sky, so the realised mount az rate exceeds it. + + Unlike constant-el/source (mount-frame pass-through), ``plan_pong_scan`` + takes an ON-SKY scan speed and maps it to the mount frame via the field + geometry: the realised azimuth-coordinate rate is ``~velocity / cos(el)``, + which at el~45 (cos ~0.71) inflates the requested 0.5 deg/s to ~0.7 deg/s. + The peak |az velocity| in the posted body must therefore be GREATER than the + requested on-sky velocity, the opposite of the source/constant-el frame. + (A naive mount-frame pass-through would peak at ~0.5; this catches that + regression.) + """ + result, _ = built + vaz = np.array([abs(p[3]) for p in result["payload"]["points"]]) + assert vaz.max() > REPRESENTATIVE_PARAMS["velocity"] + 1e-3 + + +# --------------------------------------------------------------------------- +# Hardware-dynamics escalation (velocity / acceleration, whole trajectory). +# --------------------------------------------------------------------------- + + +def test_too_fast_velocity_escalates_h3(site): + """An on-sky velocity that pushes the mount az rate past 3.0 deg/s raises. + + On-sky velocity 4.0 at el~45 -> mount az ~5.6 deg/s, well over the Go TCS + hardware ceiling (3.0, commands.go:20). The agent must reject it BEFORE the + POST (Go TCS only validates the first 100 points). Date-stable fixed ``now``. + """ + fast = dict(REPRESENTATIVE_PARAMS, velocity=4.0, spacing=0.5) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + with pytest.raises(TrajectoryValidationError, match=r"velocity|acceleration"): + build_pong_payload( + scan_params=fast, + current_az=200.0, + current_el=60.0, + site=site, + now_unix=FIXED_NOW, + max_dispatch_delay_sec=float("inf"), + ) + + +# --------------------------------------------------------------------------- +# Dispatch-delay guard. Pong's start == the floored anchor, so the guard +# passes trivially on a real scan; the shared helper is tested directly for the +# raising-when-far behaviour. +# --------------------------------------------------------------------------- + + +def test_pong_passes_default_dispatch_delay_guard(site): + """Pong's literal start sits just past the buffer, inside the 30-min default.""" + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + result = build_pong_payload( + scan_params=REPRESENTATIVE_PARAMS, + current_az=200.0, + current_el=60.0, + site=site, + now_unix=FIXED_NOW, + ) # default max_dispatch_delay_sec + delay = result["payload"]["start_time"] - FIXED_NOW + assert 0.0 < delay <= MAX_DISPATCH_DELAY_SEC + + +def test_shared_dispatch_delay_helper_raises_when_far(): + """The centralized dispatch-delay guard raises when a resolved start is too far out. + + Date-independent (pure arithmetic on supplied Unix times): a resolved start + two hours past ``now`` exceeds the 30-min default and raises + ``DispatchDelayError``. This is the same helper every typed task shares. + """ + now = 1_000_000_000.0 + with pytest.raises(DispatchDelayError, match=r"after dispatch"): + enforce_dispatch_delay(now + 7200.0, now, MAX_DISPATCH_DELAY_SEC, context="pong scan") + # Within the bound it does not raise. + enforce_dispatch_delay(now + 60.0, now, MAX_DISPATCH_DELAY_SEC, context="pong scan") + + +def test_scheduled_t0_in_past_is_floored(site): + """The buffer floor advances a past scheduled_t0 to now + buffer.""" + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + result = build_pong_payload( + scan_params=REPRESENTATIVE_PARAMS, + current_az=200.0, + current_el=60.0, + site=site, + scheduled_t0_unix=FIXED_NOW - 1000.0, + now_unix=FIXED_NOW, + max_dispatch_delay_sec=float("inf"), + ) + assert result["payload"]["start_time"] >= FIXED_NOW + SCAN_DISPATCH_BUFFER_SEC - 1e-6 + + +# --------------------------------------------------------------------------- +# SECONDARY: drive the body through a mock Go TCS (in-process FastAPI client). +# --------------------------------------------------------------------------- + +fastapi = pytest.importorskip("fastapi", reason="FastAPI not available for mock-TCS test") +from fastapi.testclient import TestClient # noqa: E402 + +from pcs.agents.acu_interface.tests.mock_tcs import PREFIX, create_mock_tcs # noqa: E402 + + +def test_mock_tcs_records_contract_valid_path(built): + """POST the built Pong body to the mock TCS: recorded + 200 (schema-valid).""" + result, _ = built + payload = result["payload"] + app = create_mock_tcs() + with TestClient(app) as client: + move_resp = client.post( + f"{PREFIX}/move-to", + json={"azimuth": result["encoder_az"], "elevation": result["encoder_el"]}, + ) + path_resp = client.post(f"{PREFIX}/path", json=payload) + + assert move_resp.status_code == 200 + assert path_resp.status_code == 200 + + recorder = app.state.recorder + assert len(recorder.path_bodies) == 1 + recorded = recorder.path_bodies[0] + assert set(recorded.keys()) == {"start_time", "coordsys", "points"} + assert recorded["coordsys"] == "Horizon" + assert all(len(p) == 5 for p in recorded["points"]) + assert recorded["points"][0][1] == pytest.approx(result["encoder_az"], abs=1e-9) diff --git a/pcs/agents/acu_interface/tests/test_source_scan.py b/pcs/agents/acu_interface/tests/test_source_scan.py new file mode 100644 index 0000000..111294e --- /dev/null +++ b/pcs/agents/acu_interface/tests/test_source_scan.py @@ -0,0 +1,462 @@ +"""Tests for the FYST source-tracking CES dispatch core + Go TCS contract. + +PRIMARY (no server, no ocs): drive +:func:`pcs.agents.acu_interface.trajectory.build_source_payload` directly and +assert the returned ``/path`` body satisfies every rule the Go TCS enforces in +``commands.go`` (``pathCmd.Check()`` + ``checkAzEl``), mirroring +``test_constant_el_scan.py``: + +- exactly the three keys ``{start_time, coordsys, points}``; ``coordsys == + "Horizon"``; +- ``start_time`` absolute Unix and >= now + 10 s (the + :data:`SCAN_DISPATCH_BUFFER_SEC` floor, stronger than the 9.8 s reject); +- ``points`` is N x 5; every consecutive ``dt >= 0.05`` s; +- the first 100 points satisfy az in [-180, 360], el in [-90, 180], + ``|vaz| <= 3.0``, ``|vel| <= 1.5`` (velocities present); +- the slew-target az is in range and ``points[0][1] == encoder_az`` (wrap align). + +Plus the source_scan-specific guards: + +- the **centred gate**: an off-centre ``footprint`` or non-``None`` + ``boresight_rot`` raises ``ValueError`` (Nasmyth sign + boresight_rot are + UNCONFIRMED, so the off-centre path is gated off); +- velocity frame: the commanded az velocity is the planner's solved MOUNT-frame + drift (small), NOT an on-sky-scaled rate; +- hardware-dynamics escalation: a velocity/acceleration breach anywhere raises + ``TrajectoryValidationError``; +- dispatch-delay: a far ``el_bore`` crossing raises ``DispatchDelayError``. + +DATE-STABILITY: ``plan_source_ces`` searches forward for the ``el_bore`` crossing, +so the resolved start depends on wall-clock. Every build threads a FIXED ``Time`` +epoch. At 2026-06-15T16:30 UTC Jupiter rising reaches el=35 ~2.4 min out (PROMPT, +inside the 30-min default); at 13:00 UTC it is ~hours out (FAR, trips the +dispatch-delay guard). Contract tests pass ``max_dispatch_delay_sec=float("inf")``. + +Imports ``trajectory.py`` directly, never ``agent.py``, the agent needs the +ocs/twisted framework, the whole reason the core is factored ocs-free. The +Process-level contracts are covered ocs-free in ``test_constant_el_scan.py``. +""" + +import warnings + +import numpy as np +import pytest +from astropy.time import Time + +from fyst_trajectories import get_fyst_site + +from pcs.agents.acu_interface.trajectory import ( + SCAN_DISPATCH_BUFFER_SEC, + DispatchDelayError, + TrajectoryValidationError, + build_source_payload, +) + +# Go TCS /path contract constants (commands.go checkAzEl + pathCmd.Check()). +AZ_MIN_TCS = -180.0 # commands.go:18 +AZ_MAX_TCS = 360.0 # commands.go:19 +EL_MIN_TCS = -90.0 # commands.go:24 +EL_MAX_TCS = 180.0 # commands.go:25 +AZ_SPEED_MAX_TCS = 3.0 # commands.go:20 +EL_SPEED_MAX_TCS = 1.5 # commands.go:26 +MIN_DT_TCS = 0.05 # commands.go:261 +MIN_LEAD_TCS = 9.8 # commands.go:253 +FIRST_N_CHECKED = 100 # commands.go:269 + +# A representative centred Jupiter rising source-CES. +REPRESENTATIVE_PARAMS = dict( + body="jupiter", + el_bore=35.0, + mode="rising", +) + +# Epoch at which REPRESENTATIVE_PARAMS resolve a PROMPT crossing (~2.4 min out), +# so the build succeeds and the contract assertions run. See module docstring. +FIXED_PROMPT_EPOCH = Time("2026-06-15T16:30:00", scale="utc") +FIXED_PROMPT_NOW = FIXED_PROMPT_EPOCH.unix + +# Epoch at which the same field's crossing is hours out, trips the guard. +FIXED_FAR_EPOCH = Time("2026-06-15T13:00:00", scale="utc") +FIXED_FAR_NOW = FIXED_FAR_EPOCH.unix + + +@pytest.fixture +def site(): + # Sun avoidance disabled so the wrap choice is purely geometric and the + # contract assertions are deterministic. The Go TCS contract (bounds + + # velocity + timing) is independent of sun avoidance. + return get_fyst_site(sun_avoidance_enabled=False) + + +@pytest.fixture +def built(site): + """A built centred source-CES payload from the prompt epoch. + + ``max_dispatch_delay_sec=float("inf")`` disables the dispatch-delay guard: + this fixture asserts the ``/path`` contract, not dispatch timeliness. + """ + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + result = build_source_payload( + scan_params=REPRESENTATIVE_PARAMS, + current_az=200.0, + current_el=60.0, + site=site, + now_unix=FIXED_PROMPT_NOW, + max_dispatch_delay_sec=float("inf"), + ) + return result, FIXED_PROMPT_NOW + + +# --------------------------------------------------------------------------- +# PRIMARY: Go TCS /path contract (pathCmd.Check + checkAzEl) +# --------------------------------------------------------------------------- + + +def test_payload_has_exactly_three_keys(built): + result, _ = built + assert set(result["payload"].keys()) == {"start_time", "coordsys", "points"} + + +def test_coordsys_is_horizon(built): + result, _ = built + assert result["payload"]["coordsys"] == "Horizon" + + +def test_start_time_absolute_and_respects_buffer_floor(built): + result, now = built + start_time = result["payload"]["start_time"] + assert start_time > 1e9 # absolute Unix seconds, not relative + assert start_time >= now + MIN_LEAD_TCS # commands.go:253 + assert start_time >= now + SCAN_DISPATCH_BUFFER_SEC - 1e-6 # the 10 s floor + + +def test_points_is_n_by_5(built): + result, _ = built + points = result["payload"]["points"] + assert len(points) > 0 # commands.go:246 "no points in path" + assert all(len(p) == 5 for p in points) + + +def test_consecutive_dt_at_least_50ms(built): + result, _ = built + times = np.array([p[0] for p in result["payload"]["points"]]) + assert np.diff(times).min() >= MIN_DT_TCS # commands.go:261 + + +def test_first_100_points_satisfy_checkAzEl(built, site): + result, _ = built + points = result["payload"]["points"] + el_min = site.telescope_limits.elevation.min + el_max = site.telescope_limits.elevation.max + for i, p in enumerate(points[:FIRST_N_CHECKED]): + t, az, el, vaz, vel = p + assert AZ_MIN_TCS <= az <= AZ_MAX_TCS, f"point {i}: az {az} out of TCS range" + assert EL_MIN_TCS <= el <= EL_MAX_TCS, f"point {i}: el {el} out of TCS range" + assert el_min <= el <= el_max, f"point {i}: el {el} out of site range" + assert abs(vaz) <= AZ_SPEED_MAX_TCS, f"point {i}: |vaz| {vaz} > {AZ_SPEED_MAX_TCS}" + assert abs(vel) <= EL_SPEED_MAX_TCS, f"point {i}: |vel| {vel} > {EL_SPEED_MAX_TCS}" + + +def test_velocities_present_and_non_null(built): + result, _ = built + for i, p in enumerate(result["payload"]["points"][:FIRST_N_CHECKED]): + assert p[3] is not None and np.isfinite(p[3]), f"point {i}: az_vel null/non-finite" + assert p[4] is not None and np.isfinite(p[4]), f"point {i}: el_vel null/non-finite" + + +def test_source_ces_holds_elevation_fixed(built): + """A source CES is a constant-elevation scan: el is (numerically) fixed.""" + result, _ = built + el = np.array([p[2] for p in result["payload"]["points"]]) + # Boresight el is held at el_bore; only the source drifts across the FOV. + assert el.max() - el.min() < 1e-6 + + +def test_slew_target_in_range(built, site): + result, _ = built + assert site.telescope_limits.azimuth.is_in_range(result["encoder_az"]) + assert site.telescope_limits.elevation.is_in_range(result["encoder_el"]) + + +def test_wrap_alignment_first_point_equals_encoder_az(built): + result, _ = built + assert result["payload"]["points"][0][1] == pytest.approx(result["encoder_az"], abs=1e-9) + + +# --------------------------------------------------------------------------- +# Source gate: centred + uncommanded-rotator only. +# --------------------------------------------------------------------------- + + +def test_off_centre_footprint_is_gated(site): + """An off-centre single-module footprint raises a descriptive error.""" + bad = dict(REPRESENTATIVE_PARAMS, footprint="i1") + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + with pytest.raises(ValueError, match=r"centred PrimeCam footprint"): + build_source_payload( + scan_params=bad, + current_az=200.0, + current_el=60.0, + site=site, + now_unix=FIXED_PROMPT_NOW, + ) + + +def test_commanded_boresight_rot_is_gated(site): + """A commanded (non-None) boresight_rot raises a descriptive error.""" + bad = dict(REPRESENTATIVE_PARAMS, boresight_rot=0.0) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + with pytest.raises(ValueError, match=r"boresight"): + build_source_payload( + scan_params=bad, + current_az=200.0, + current_el=60.0, + site=site, + now_unix=FIXED_PROMPT_NOW, + ) + + +def test_centre_aliases_pass_the_gate(site): + """Both "c" and "center" name the on-axis module and pass the gate.""" + for footprint in ("c", "center"): + params = dict(REPRESENTATIVE_PARAMS, footprint=footprint) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + result = build_source_payload( + scan_params=params, + current_az=200.0, + current_el=60.0, + site=site, + now_unix=FIXED_PROMPT_NOW, + max_dispatch_delay_sec=float("inf"), + ) + assert set(result["payload"].keys()) == {"start_time", "coordsys", "points"} + + +# --------------------------------------------------------------------------- +# Mount-frame velocity (NOT on-sky-scaled). +# --------------------------------------------------------------------------- + + +def test_velocity_is_mount_frame_drift_not_on_sky(built, site): + """The commanded az velocity is the solved MOUNT-frame drift, not on-sky. + + ``plan_source_ces`` solves a small azimuth drift rate ``v_az`` (mount frame) + and bakes it into a ConstantElScanConfig.az_speed (also mount frame). The + realised peak |az velocity| in the posted body must equal that solved drift + magnitude to within numerical tolerance, it is NOT the source's on-sky az + rate divided by cos(el). A drift of a few hundredths deg/s is the expected + magnitude for a slowly-moving planet near el=35. + """ + result, _ = built + # Re-derive the solved drift from the same build (params-only sibling shares + # the compute kernel, but we read it back off the trajectory here). + vaz = np.array([abs(p[3]) for p in result["payload"]["points"]]) + # Mount-frame drift is small (hundredths deg/s), and crucially far below the + # azimuth speed ceiling, a cos(el)-inflated on-sky rate would be larger. + assert vaz.max() < 0.3, f"peak |vaz| {vaz.max()} unexpectedly large for a planet drift" + assert vaz.max() > 0.0 # there IS a commanded drift + + +# --------------------------------------------------------------------------- +# Hardware-dynamics escalation (velocity / acceleration, whole trajectory). +# +# A REAL source-CES cannot breach the velocity ceiling via a faster ``v_az`` +# without first breaching the azimuth *position* bounds (a constant drift over a +# multi-hundred-second pass accumulates thousands of degrees), so that path +# correctly raises AzimuthBoundsError first, a hard pre-POST rejection too. We +# therefore exercise the velocity/acceleration escalation that +# build_source_payload routes through by driving the shared +# ``_escalate_hardware_dynamics`` helper on a synthetic over-fast trajectory +# (the genuine unit of that contract, the Go TCS only validates the first +# 100 points, so the agent enforces the ceiling everywhere). +# --------------------------------------------------------------------------- + + +def test_over_fast_v_az_is_rejected_before_post(site): + """Bounds: an over-large v_az is rejected (not silently POSTed). + + An overridden drift far above the ceiling makes the source-CES az run off + the telescope, so ``validate_trajectory`` rejects it on position bounds + BEFORE the POST, a hard pre-POST rejection, which is the safety contract + (the velocity-specific escalation is unit-tested via the shared helper + below). Either failure mode is a ``PointingError`` subclass / + ``TrajectoryValidationError``; assert it does not return a body. + """ + from fyst_trajectories.exceptions import PointingError + + fast = dict(REPRESENTATIVE_PARAMS, v_az=5.0) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + with pytest.raises((TrajectoryValidationError, PointingError)): + build_source_payload( + scan_params=fast, + current_az=200.0, + current_el=60.0, + site=site, + now_unix=FIXED_PROMPT_NOW, + max_dispatch_delay_sec=float("inf"), + ) + + +def test_hardware_dynamics_helper_escalates_velocity_breach(): + """Unit: the shared escalation raises on a too-fast synthetic trajectory. + + ``build_source_payload`` (and the pong/daisy cores) route their dynamics + check through ``_escalate_hardware_dynamics``. Drive it directly on a + synthetic constant-velocity az track at 4.0 deg/s (> the 3.0 deg/s Go TCS + hardware ceiling, commands.go:20) and on a 2.0 deg/s el track (> the 1.5 + deg/s ceiling, commands.go:26); each must raise. Date-independent (synthetic + arrays). + """ + import dataclasses + + from astropy.time import Time as _Time + + from fyst_trajectories.trajectory import Trajectory + + from pcs.agents.acu_interface.trajectory import _escalate_hardware_dynamics + + t = np.arange(0.0, 5.0, 0.1) + start = _Time("2026-06-15T16:30:00", scale="utc") + # Az breach: 4.0 deg/s > 3.0 ceiling (el fixed). + az_fast = Trajectory( + times=t, + az=100.0 + 4.0 * t, + el=np.full_like(t, 50.0), + az_vel=np.full_like(t, 4.0), + el_vel=np.zeros_like(t), + start_time=start, + ) + with pytest.raises(TrajectoryValidationError, match=r"az velocity"): + _escalate_hardware_dynamics(az_fast, "Synthetic") + # El breach: 2.0 deg/s > 1.5 ceiling (az fixed). + el_fast = dataclasses.replace( + az_fast, + az=np.full_like(t, 100.0), + el=50.0 + 2.0 * t, + az_vel=np.zeros_like(t), + el_vel=np.full_like(t, 2.0), + ) + with pytest.raises(TrajectoryValidationError, match=r"el velocity"): + _escalate_hardware_dynamics(el_fast, "Synthetic") + + +def test_too_high_az_accel_raises_via_analytic_guard(site): + """A source ``az_accel`` whose quintic peak (1.5 * az_accel) exceeds + the Go TCS hardware ceiling is rejected pre-POST. + + ``plan_source_ces`` reuses the CE quintic turnaround, whose peak |az accel| + is ``1.5 * az_accel`` by design. ``np.gradient`` (in + ``_escalate_hardware_dynamics``) under-resolves that short, sharp spike at + the dispatch timestep, so ``build_source_payload`` adds the same ANALYTIC + guard ``build_constant_el_payload`` has. ``az_accel=5.0`` -> peak ``7.5`` + deg/s^2 > the ``6.0`` az ceiling, so it must raise (the gradient alone misses + it and would POST). Driven end-to-end through ``build_source_payload`` on a + real centred Jupiter source, so it also exercises the source escalation wiring. + """ + bad = dict(REPRESENTATIVE_PARAMS, az_accel=5.0) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + with pytest.raises(TrajectoryValidationError, match=r"acceleration"): + build_source_payload( + scan_params=bad, + current_az=200.0, + current_el=60.0, + site=site, + now_unix=FIXED_PROMPT_NOW, + max_dispatch_delay_sec=float("inf"), + ) + + +# --------------------------------------------------------------------------- +# Dispatch-delay guard. +# --------------------------------------------------------------------------- + + +def test_dispatch_delay_far_out_crossing_raises(site): + """A crossing that resolves hours out raises DispatchDelayError. + + Date-stable fixed ``now``: at 2026-06-15T13:00 UTC Jupiter's el=35 rising + crossing is hours away (far past the 30-min default), so the guard refuses + to slew and hold ``azel_lock``. Default ``max_dispatch_delay_sec`` (no + override) is exercised on purpose. + """ + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + with pytest.raises(DispatchDelayError, match=r"after dispatch"): + build_source_payload( + scan_params=REPRESENTATIVE_PARAMS, + current_az=200.0, + current_el=60.0, + site=site, + now_unix=FIXED_FAR_NOW, + ) + + +def test_dispatch_delay_prompt_crossing_passes(site): + """A promptly-reachable crossing passes the default dispatch-delay guard.""" + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + result = build_source_payload( + scan_params=REPRESENTATIVE_PARAMS, + current_az=200.0, + current_el=60.0, + site=site, + now_unix=FIXED_PROMPT_NOW, + ) + delay = result["payload"]["start_time"] - FIXED_PROMPT_NOW + assert 0.0 < delay <= 1800.0 + + +def test_scheduled_t0_in_past_is_floored(site): + """The buffer floor advances a past scheduled_t0 to now + buffer.""" + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + result = build_source_payload( + scan_params=REPRESENTATIVE_PARAMS, + current_az=200.0, + current_el=60.0, + site=site, + scheduled_t0_unix=FIXED_PROMPT_NOW - 1000.0, + now_unix=FIXED_PROMPT_NOW, + max_dispatch_delay_sec=float("inf"), + ) + assert result["payload"]["start_time"] >= FIXED_PROMPT_NOW + SCAN_DISPATCH_BUFFER_SEC - 1e-6 + + +# --------------------------------------------------------------------------- +# SECONDARY: drive the body through a mock Go TCS (in-process FastAPI client). +# --------------------------------------------------------------------------- + +fastapi = pytest.importorskip("fastapi", reason="FastAPI not available for mock-TCS test") +from fastapi.testclient import TestClient # noqa: E402 + +from pcs.agents.acu_interface.tests.mock_tcs import PREFIX, create_mock_tcs # noqa: E402 + + +def test_mock_tcs_records_contract_valid_path(built): + """POST the built source body to the mock TCS: recorded + 200 (schema-valid).""" + result, _ = built + payload = result["payload"] + app = create_mock_tcs() + with TestClient(app) as client: + move_resp = client.post( + f"{PREFIX}/move-to", + json={"azimuth": result["encoder_az"], "elevation": result["encoder_el"]}, + ) + path_resp = client.post(f"{PREFIX}/path", json=payload) + + assert move_resp.status_code == 200 + assert path_resp.status_code == 200 + + recorder = app.state.recorder + assert len(recorder.path_bodies) == 1 + recorded = recorder.path_bodies[0] + assert set(recorded.keys()) == {"start_time", "coordsys", "points"} + assert recorded["coordsys"] == "Horizon" + assert all(len(p) == 5 for p in recorded["points"]) + assert recorded["points"][0][1] == pytest.approx(result["encoder_az"], abs=1e-9) diff --git a/pcs/agents/acu_interface/trajectory.py b/pcs/agents/acu_interface/trajectory.py new file mode 100644 index 0000000..c59b5b4 --- /dev/null +++ b/pcs/agents/acu_interface/trajectory.py @@ -0,0 +1,1135 @@ +"""OCS-free trajectory helpers for the PCS ACU-interface agent. + +Dispatch-time *core* of the FYST typed scan tasks. Imports only +:mod:`fyst_trajectories` and the standard library, deliberately **no** ``ocs`` +/ ``twisted``, so it is unit-testable in the minimal environment where the OCS +operations framework (and hence the agent module that wraps these helpers) is +unavailable. + +A typed PCS task calls ``fyst_trajectories`` at +*dispatch* time to build a full az/el trajectory, then POSTs it to the FYST Go +TCS ``/path`` endpoint. The Go TCS owns refraction and the hardware bounds; this +layer produces a body the Go TCS ``pathCmd.Check()`` will accept plus a sun-safe +encoder slew target to reach the trajectory's start. + +Four typed tasks share this core, one ocs-free ``build_*_payload`` each +(:func:`build_constant_el_payload`, :func:`build_source_payload`, +:func:`build_pong_payload`, :func:`build_daisy_payload`). They differ only in +which planner they call and which velocity frame it expects: constant-el and +source take MOUNT-frame azimuth-coordinate deg/s (``cos(el)`` already applied +upstream, never re-applied); pong and daisy take ON-SKY deg/s. + +Shared across all four: the dispatch-window helpers (:func:`floor_dispatch_start` +buffer floor, :func:`enforce_dispatch_delay` too-far-out guard, +:func:`refloor_payload_start_time` post-slew re-floor); the sun-safe slew target ++ 360-deg wrap alignment (:func:`_choose_slew_and_align`); and the dynamics +escalation against the Go TCS hardware ceilings (:func:`_escalate_velocity_warning` +for constant-el, :func:`_escalate_hardware_dynamics` for the multi-axis tasks), +with constant-el and source adding an analytic ``1.5 * az_accel`` quintic +turnaround guard. +""" + +import dataclasses +import warnings + +import numpy as np +from astropy.time import Time + +from fyst_trajectories.dispatch import choose_encoder_solution +from fyst_trajectories.exceptions import VelocityLimitWarning +from fyst_trajectories.planning import ( + FieldRegion, + plan_constant_el_scan, + plan_daisy_scan, + plan_pong_scan, + plan_source_ces, +) +from fyst_trajectories.site import Site +from fyst_trajectories.trajectory_utils import to_path_payload, validate_trajectory + +#: Minimum lead time (seconds) applied to a scan's start time at dispatch. +#: The Go TCS ``/path`` receiver hard-rejects a ``start_time`` less than 9.8 s +#: in the future (``commands.go:253``), and the Vertex ProgramTrack ICD wants +#: the track to start ~5-10 s ahead to pre-fill its stack; 10 s clears both. The +#: task computes ``actual_t0 = max(scheduled_t0, now + SCAN_DISPATCH_BUFFER_SEC)``. +SCAN_DISPATCH_BUFFER_SEC: float = 10.0 + +#: Go TCS *hardware* acceleration ceilings (deg/s^2). These are the HARDWARE +#: bounds, NOT the conservative operational limits in fyst_trajectories.site +#: (1.0/0.5); the dispatch accel guard checks hardware so nominal scans pass. +TCS_AZ_MAX_ACCELERATION: float = 6.0 # commands.go:21 azimuthAccelMax +TCS_EL_MAX_ACCELERATION: float = 1.5 # commands.go:27 elevationAccelMax + +#: Go TCS *hardware* velocity ceilings (deg/s), the quantity ``checkAzEl`` +#: actually enforces (strict ``> max``). Used by the multi-axis pong/daisy/source +#: dynamics escalation, whose elevation MOVES: the site el-velocity limit (1.0) +#: is TIGHTER than this hardware ceiling (1.5), so escalating the library +#: ``VelocityLimitWarning`` would wrongly reject an el rate in (1.0, 1.5] the Go +#: TCS would accept. The guard instead compares realised az/el velocity against +#: THESE ceilings, over the WHOLE trajectory (Go TCS validates only the first +#: 100 points). ``build_constant_el_payload`` escalates the library warning +#: directly instead, because a CE scan holds el fixed (el_vel == 0) and the site +#: az-velocity limit (3.0) already equals this hardware az ceiling. +TCS_AZ_MAX_VELOCITY: float = 3.0 # commands.go:20 azimuthSpeedMax +TCS_EL_MAX_VELOCITY: float = 1.5 # commands.go:26 elevationSpeedMax + +#: Free-program-track-stack count that signals the scan has DRAINED. +#: ``maxFreeProgramTrackStack`` is 10000; ``startPattern`` calls +#: ``ProgramTrackClear()`` (free -> 10000) and only THEN spawns the upload +#: goroutine (``telescope.go:164-172``), so a freshly cleared, not-yet-uploaded +#: stack reads 10000. A played-out track drains to exactly one point left, i.e. +#: free == 9999 (Go's own ``isDone`` keys on strict ``== 9999``, +#: ``commands.go:195``). The completion check MUST use strict equality to 9999, +#: not ``>= 9999``: the empty pre-upload stack (10000) satisfies ``>= 9999`` and +#: would be a false "complete" on the first poll after POST, before the scan ran. +TCS_PROGRAM_TRACK_DRAINED: int = 9999 # maxFreeProgramTrackStack-1 (commands.go:16,195) + +#: Axis-velocity magnitude (deg/s) below which an axis is "stopped". +TCS_SPEED_TOL: float = 1e-4 # commands.go:15 speedTol + +#: Maximum allowed delay (s) between dispatch and the resolved scan start. +#: plan_constant_el_scan treats start_time as a forward-search anchor, so a +#: below-horizon field can resolve a crossing many hours out (up to +#: max_search_hours, default 12 h). The PCS task slews to the scan start +#: immediately and holds azel_lock until start_time, so an unbounded delay parks +#: the dish on empty sky for hours and blocks all other azel ops. Applied to both +#: dispatch paths. The root-semantics fix (take the elevation from upstream so +#: the resolve is always ~now and this cap rarely binds) is deferred to the +#: survey-to-execution handoff; this is the interim guard, not its substitute. +MAX_DISPATCH_DELAY_SEC: float = 1800.0 # 30 min + + +#: Maximum tolerated post-slew re-floor advance (s) of a sidereal-tracked scan's +#: start_time before its baked az/el track is too stale to POST. The az/el samples +#: are frozen at the build-time start; advancing start_time by ``delta`` replays +#: them ``delta`` s late, so the boresight lags the sky by ~az_rate*delta (az_rate +#: ~3-27 arcsec/s at FYST fixed el). 30 s caps the worst-case lag at ~0.014 deg +#: (~50 arcsec), inside az_padding and a small fraction of a PrimeCam module FOV. +#: delta is normally 0 (scheduled / comfortably-future scans); it grows only for a +#: dispatch-now scan whose crossing landed near the buffer floor and whose slew ran +#: long. A breach means the slew ate the lead so badly the geometry is stale. +#: Refuse (retryable) rather than scan drifted sky. Re-deriving on re-floor is the +#: proper fix but reshapes the dispatch flow (it would re-pick the slew target +#: after the dish already slewed); deferred. Tune against commissioning slews. +MAX_REFLOOR_DRIFT_SEC: float = 30.0 + + +class TrajectoryValidationError(RuntimeError): + """Raised when a built trajectory would breach a Go-TCS-enforced limit. + + A velocity/acceleration breach anywhere in the trajectory is turned into a + hard failure before the body is POSTed, because the Go TCS validates only the + first 100 points and ``validate_trajectory`` only warns. + """ + + +class DispatchDelayError(RuntimeError): + """Raised when the resolved scan start_time is too far past dispatch.""" + + +def floor_dispatch_start(scheduled_t0_unix: float | None, now_unix: float) -> Time: + """Apply the dispatch-buffer floor to a scan's start anchor (shared). + + Returns ``max(scheduled_t0_unix or 0, now_unix + SCAN_DISPATCH_BUFFER_SEC)`` + as an absolute ``astropy.time.Time``, the ``start_time`` the planners take. + The floor guarantees the search anchor (and so the posted ``start_time``, + which is at or after it) clears the Go TCS minimum lead (``< ~9.8 s`` + rejected, ``commands.go:253``). Used by every typed scan task's payload core. + + ``scheduled_t0_unix`` is ``None`` to dispatch as soon as the buffer allows; + a past value is floored away. ``now_unix`` is caller-supplied so the helper + stays deterministic and testable. + """ + actual_t0_unix = max(scheduled_t0_unix or 0.0, now_unix + SCAN_DISPATCH_BUFFER_SEC) + return Time(actual_t0_unix, format="unix") + + +def enforce_dispatch_delay( + resolved_start_unix: float, + now_unix: float, + max_dispatch_delay_sec: float, + *, + context: str = "scan", +) -> None: + """Reject a scan whose resolved start is too far past dispatch (shared). + + The elevation-searching planners (``plan_constant_el_scan``, + ``plan_source_ces``) treat ``start_time`` as a forward-search anchor, so a + below-horizon field can resolve a crossing many hours out (up to + ``max_search_hours``). The task slews to the scan start immediately and holds + ``azel_lock`` until ``start_time``, so an unbounded delay parks the dish on + empty sky for hours and blocks all other azel ops. Raise before any slew is + computed. Pong and daisy take ``start_time`` literally (no forward search), + so their resolved start equals the floored anchor and this passes trivially; + applied uniformly so all four tasks share one path. + + Pass ``max_dispatch_delay_sec=float("inf")`` to disable (e.g. contract tests + not concerned with timeliness). ``context`` is woven into the error message. + + Raises + ------ + DispatchDelayError + If ``resolved_start_unix - now_unix`` exceeds ``max_dispatch_delay_sec``. + """ + delay = resolved_start_unix - now_unix + if delay > max_dispatch_delay_sec: + raise DispatchDelayError( + f"Resolved {context} start_time is {delay / 3600:.2f} h after dispatch " + f"(> {max_dispatch_delay_sec / 3600:.2f} h limit). The target does not " + f"reach the requested geometry promptly; a scan-now dispatch must pass a " + f"promptly-reachable elevation. Refusing to slew and hold azel_lock." + ) + + +class ScanCompletionLatch: + """Decide when a POSTed ``/path`` scan has genuinely completed. + + The Go TCS ``/path`` endpoint is fire-and-forget: HTTP 200 means "accepted", + not "done", and the Go TCS never calls back. The PCS Process detects + completion by polling the ACU status for the stack-drained signal. Two + hazards make a naive ``free >= 9999`` check wrong on the first poll after POST: + + 1. **Empty pre-upload stack.** ``startPattern`` calls ``ProgramTrackClear()`` + (free -> 10000) and only then spawns the upload goroutine + (``telescope.go:164-172``). For a brief window after the POST returns 200 + the stack is *empty* (free == 10000), which satisfies ``>= 9999``, a + false "complete". The real drained signal is the strict free == 9999 the + Go ``isDone`` uses (``commands.go:195``); 10000 means "not uploaded yet". + 2. **No running-observed latch.** Even strict free == 9999 is ambiguous on + the *first* poll (a transient 9999 read before the upload goroutine pushes + points). So a drained reading is honored only once the scan has been + observed RUNNING at least once, after ANY of: a non-empty stack seen + (``free < 9999``); ACU ProgramTrack mode active; or wall-clock reached the + absolute ``start_time``. This is the latch. + + A tiny mutable state machine so the agent's poll loop stays a thin wrapper and + the decision is unit-testable without ``ocs`` / ``twisted``. Feed each poll's + status to :meth:`update`; it returns ``True`` exactly once the scan is + confirmed complete. The agent keeps its own absolute-time backstop and abort + handling around this. + + ``start_time_unix`` is the POSTed trajectory's absolute start; reaching it is + one of the three arming conditions (covers a status stream that never surfaces + a non-empty stack or a mode string). ``drained_free`` /``speed_tol`` default + to :data:`TCS_PROGRAM_TRACK_DRAINED` / :data:`TCS_SPEED_TOL`. + """ + + #: ACU az/el mode strings that count as "ProgramTrack active". Matched + #: case-insensitively as a substring so vendor variants ("ProgramTrack", + #: "Program Track", "ProgramTrackTime") all arm the latch. + _PROGRAM_TRACK_MODE_TOKEN = "programtrack" + + def __init__( + self, + start_time_unix: float, + *, + drained_free: int = TCS_PROGRAM_TRACK_DRAINED, + speed_tol: float = TCS_SPEED_TOL, + ) -> None: + self.start_time_unix = float(start_time_unix) + self.drained_free = int(drained_free) + self.speed_tol = float(speed_tol) + #: Set once the scan has been observed running at least once. Until + #: then a drained reading is NOT accepted (guards the first-poll race). + self.running_observed = False + + def _mode_is_program_track(self, mode) -> bool: + if mode is None: + return False + normalized = str(mode).strip().lower().replace(" ", "") + return self._PROGRAM_TRACK_MODE_TOKEN in normalized + + def update( + self, + *, + free, + vaz, + vel, + now_unix: float, + az_mode=None, + el_mode=None, + ) -> bool: + """Fold one status poll into the latch; return ``True`` if complete. + + ``free`` is the free program-track stack count (``Qty of free program + track stack positions`` / ``Free_upload_positions``); ``vaz``/``vel`` the + az/el current velocity (deg/s); any may be ``None`` if the read did not + surface it. ``az_mode``/``el_mode`` ProgramTrack either arms the latch. + + Returns ``True`` exactly when the latch has armed (running observed) AND + the stack is strictly drained AND both axis velocities are below + ``speed_tol``. + """ + # Arm the latch on ANY running-observed signal. A non-empty stack means + # the upload happened and points remain; the scan is (or was) running. + if free is not None and free < self.drained_free: + self.running_observed = True + if self._mode_is_program_track(az_mode) or self._mode_is_program_track(el_mode): + self.running_observed = True + if now_unix >= self.start_time_unix: + self.running_observed = True + + if not self.running_observed: + # Pre-run window: the stack may read 10000 (empty, pre-upload) or a + # transient 9999 before the goroutine pushes points. Do NOT accept a + # drained signal yet. + return False + + return ( + free is not None + and free == self.drained_free + and vaz is not None + and abs(vaz) < self.speed_tol + and vel is not None + and abs(vel) < self.speed_tol + ) + + +def _choose_slew_and_align( + traj, + *, + current_az: float, + current_el: float, + obstime: Time, + site: Site, + sun_safe, +): + """Pick a sun-safe encoder slew target and wrap-align the trajectory to it. + + Shared step 3 + 4 for every typed scan task's payload core: + + 1. Choose a sun-safe encoder ``(az, el)`` for the trajectory's first sample + via :func:`fyst_trajectories.dispatch.choose_encoder_solution`, sunning + the wrap against ``obstime``, the RESOLVED scan start the planner found, + not the dispatch anchor: the Sun moves in the gap and the slew target must + be safe when the dish actually arrives. + 2. Shift the WHOLE trajectory azimuth by the 360-deg multiple that lands its + first sample on the chosen encoder azimuth, so the posted ``/path`` and + the slew target share one azimuth wrap. + + ``current_az``/``current_el`` are the current encoder position (deg, 200 Hz + broadcast); ``sun_safe`` is forwarded to ``choose_encoder_solution``. Returns + ``(encoder_az, encoder_el, aligned_traj)`` with ``aligned_traj.az[0] == + encoder_az``. Raises ``PointingError`` from ``choose_encoder_solution`` (goal + el out of range, no in-range wrap, or every wrap sun-blocked). + """ + enc_az, enc_el = choose_encoder_solution( + current_az=current_az, + current_el=current_el, + goal_az=float(traj.az[0]), + goal_el=float(traj.el[0]), + obstime=obstime, + site=site, + sun_safe=sun_safe, + ) + # Trajectory is (effectively) immutable in library use, so replace it rather + # than mutating in place. + shift = round((enc_az - float(traj.az[0])) / 360.0) * 360.0 + if shift: + traj = dataclasses.replace(traj, az=traj.az + shift) + return enc_az, enc_el, traj + + +def _escalate_velocity_warning(caught, scan_label: str) -> None: + """Escalate a library :class:`VelocityLimitWarning` to a hard error. + + Used by :func:`build_constant_el_payload` only. ``validate_trajectory`` only + *warns* on a velocity breach, but velocity is the quantity the Go TCS + ``checkAzEl`` enforces, so a breach must abort BEFORE the POST. Safe for a CE + scan: its elevation is held fixed (el_vel == 0) and the site az-velocity limit + (3.0) equals the Go TCS hardware az ceiling, so the library warning fires at + exactly the Go TCS contract. The multi-axis tasks use + :func:`_escalate_hardware_dynamics` instead (their site el-velocity limit is + tighter than hardware). ``caught`` is the warnings list captured around + ``validate_trajectory``; raises :class:`TrajectoryValidationError` on any + :class:`VelocityLimitWarning`. + """ + velocity_breaches = [ + str(w.message) for w in caught if issubclass(w.category, VelocityLimitWarning) + ] + if velocity_breaches: + raise TrajectoryValidationError( + f"{scan_label} trajectory exceeds a velocity limit the Go TCS " + f"enforces; refusing to POST. " + " ".join(velocity_breaches) + ) + + +def _escalate_hardware_dynamics(traj, scan_label: str) -> None: + """Escalate a Go-TCS *hardware* velocity / acceleration breach. + + Used by the multi-axis tasks (:func:`build_source_payload`, + :func:`build_pong_payload`, :func:`build_daisy_payload`) where elevation + MOVES. Computes realised az/el velocity + acceleration via ``np.gradient`` + over the WHOLE trajectory and raises :class:`TrajectoryValidationError` if any + exceeds the Go TCS *hardware* ceiling. Three reasons it does NOT just escalate + the library ``validate_trajectory_dynamics`` warnings: + + 1. Velocity: the library warns at the *site* limits, whose el value (1.0 + deg/s) is TIGHTER than the hardware ceiling (1.5); escalating it would + reject an el rate in (1.0, 1.5] the Go TCS would accept. + 2. Acceleration: ``checkAzEl`` does not check acceleration at all, so the + library's site accel warning (az 1.0, el 0.5) is far below the hardware + ceilings (6.0, 1.5): nominal pong/daisy turnarounds already exceed the + site limit but sit well inside hardware, so the guard MUST compare against + hardware or it would reject every scan. + 3. Whole-trajectory: the Go TCS validates only the first 100 points + (commands.go:269), so the agent enforces the ceiling everywhere. + + Pong/daisy turnarounds are smooth and well-resolved at the 0.1 s timestep, so + np.gradient is right for their acceleration. The CE AND source quintic + turnaround is NOT (its short, sharp profile np.gradient under-resolves), so + build_constant_el_payload and build_source_payload each add a separate analytic + az-acceleration guard (peak = 1.5 * az_accel); this helper still covers their + velocity (and source's el dynamics). + """ + times = np.asarray(traj.times, dtype=float) + if times.size < 2: + return + az = np.unwrap(np.asarray(traj.az, dtype=float), period=360.0) + el = np.asarray(traj.el, dtype=float) + az_vel = np.gradient(az, times) + el_vel = np.gradient(el, times) + max_vaz = float(np.abs(az_vel).max()) + max_vel = float(np.abs(el_vel).max()) + + breaches: list[str] = [] + if max_vaz > TCS_AZ_MAX_VELOCITY: + breaches.append( + f"peak az velocity {max_vaz:.3f} deg/s exceeds the Go TCS hardware " + f"ceiling {TCS_AZ_MAX_VELOCITY} deg/s (commands.go:20)." + ) + if max_vel > TCS_EL_MAX_VELOCITY: + breaches.append( + f"peak el velocity {max_vel:.3f} deg/s exceeds the Go TCS hardware " + f"ceiling {TCS_EL_MAX_VELOCITY} deg/s (commands.go:26)." + ) + + if times.size >= 4: + az_accel = np.gradient(az_vel, times) + el_accel = np.gradient(el_vel, times) + max_aaz = float(np.abs(az_accel).max()) + max_ael = float(np.abs(el_accel).max()) + if max_aaz > TCS_AZ_MAX_ACCELERATION: + breaches.append( + f"peak az acceleration {max_aaz:.3f} deg/s^2 exceeds the Go TCS " + f"hardware ceiling {TCS_AZ_MAX_ACCELERATION} deg/s^2 (commands.go:21)." + ) + if max_ael > TCS_EL_MAX_ACCELERATION: + breaches.append( + f"peak el acceleration {max_ael:.3f} deg/s^2 exceeds the Go TCS " + f"hardware ceiling {TCS_EL_MAX_ACCELERATION} deg/s^2 (commands.go:27)." + ) + + if breaches: + raise TrajectoryValidationError( + f"{scan_label} trajectory breaches a Go TCS hardware dynamics ceiling " + f"anywhere along the path (the Go TCS only validates the first 100 " + f"points); refusing to POST. " + " ".join(breaches) + ) + + +def build_constant_el_payload( + *, + scan_params: dict, + current_az: float, + current_el: float, + site: Site, + sun_safe=None, + scheduled_t0_unix: float | None = None, + now_unix: float, + max_dispatch_delay_sec: float = MAX_DISPATCH_DELAY_SEC, +) -> dict: + """Build a Go TCS ``/path`` body and sun-safe slew target for a CE scan. + + Dispatch-time core of the ``constant_el_scan`` typed task. Given a scan + specification and the telescope's current encoder position, this: + + 1. Applies the :data:`SCAN_DISPATCH_BUFFER_SEC` floor to the start time + (``actual_t0 = max(scheduled_t0_unix or 0, now_unix + buffer)``). + 2. Builds the trajectory via + :func:`fyst_trajectories.planning.plan_constant_el_scan`. The scan's + ``velocity`` is passed straight through as mount-frame + azimuth-coordinate deg/s (*not* scaled by ``cos(el)``). + 3. Picks a sun-safe encoder ``(az, el)`` to slew to via + :func:`fyst_trajectories.dispatch.choose_encoder_solution`, current- + position-aware. + 4. Shifts the whole trajectory azimuth by the 360-deg multiple that aligns + its first sample to the chosen encoder azimuth (wrap alignment), then + re-validates. + 5. Escalates a velocity breach to :class:`TrajectoryValidationError`; + position-bounds breaches already raise. + 6. Returns the encoder target plus the exact three-key ``/path`` body. + + Parameters + ---------- + scan_params : dict + Constant-elevation scan spec, modeled on + :func:`~fyst_trajectories.planning.plan_constant_el_scan`. Required: + ``ra_center``, ``dec_center``, ``width``, ``height`` (deg, the + :class:`~fyst_trajectories.planning.FieldRegion`); ``elevation`` (deg); + ``velocity`` (deg/s, mount-frame azimuth-coordinate, sent to the ACU + as-is, NOT cos(el)-scaled). Optional (defaults match the planner): + ``rising``, ``angle``, ``az_accel``, ``timestep``, ``az_padding``, + ``max_search_hours``, ``step_seconds``, ``lsa_window``. + current_az, current_el : float + Current encoder position (deg, 200 Hz broadcast); telescope-range + encoder values, not astropy ``[0, 360)``. + site : Site + FYST site configuration (limits, sun-avoidance config). + sun_safe : callable, optional + Predicate ``(az_deg, el_deg, time) -> bool`` for + :func:`~fyst_trajectories.dispatch.choose_encoder_solution`. Defaults to + the site's scalar exclusion check. + scheduled_t0_unix : float or None, optional + Scheduled start (Unix s), or ``None`` to dispatch as soon as the buffer + allows. The planner's *search anchor*, not the literal start: + ``plan_constant_el_scan`` searches forward for the elevation crossing and + starts there, so the posted ``start_time`` is the resolved crossing (>= + the floored anchor >= ``now + buffer``). + now_unix : float + Current wall-clock Unix time (caller-supplied; deterministic/testable). + max_dispatch_delay_sec : float, optional + Max delay between ``now_unix`` and the resolved start before + :class:`DispatchDelayError`. Defaults to :data:`MAX_DISPATCH_DELAY_SEC`; + ``float("inf")`` disables it. + + Returns + ------- + dict + ``{"encoder_az": float, "encoder_el": float, "payload": dict}``: the + exact three-key Go TCS ``/path`` body ``{"start_time", "coordsys", + "points"}`` (``coordsys == "Horizon"``), with + ``payload["points"][0][1] == encoder_az`` (wrap alignment). + + Raises + ------ + KeyError + If a required ``scan_params`` key is missing. + PointingError + If the goal el is out of range or every in-range az wrap is sun-blocked. + AzimuthBoundsError, ElevationBoundsError + If the (wrap-shifted) trajectory leaves the telescope limits anywhere. + TrajectoryValidationError + If az/el velocity exceeds the site limit anywhere, or the quintic + turnaround's peak az accel (``1.5 * az_accel``) exceeds the Go TCS + hardware ceiling. + DispatchDelayError + If the resolved ``start_time`` is more than ``max_dispatch_delay_sec`` + after ``now_unix``. + """ + actual_t0 = floor_dispatch_start(scheduled_t0_unix, now_unix) + + # Build the trajectory. Velocity passes straight through as mount-frame + # azimuth-coordinate deg/s (no cos(el) scaling). + field = FieldRegion( + ra_center=scan_params["ra_center"], + dec_center=scan_params["dec_center"], + width=scan_params["width"], + height=scan_params["height"], + ) + lsa_window = scan_params.get("lsa_window") + block = plan_constant_el_scan( + field=field, + elevation=scan_params["elevation"], + velocity=scan_params["velocity"], + site=site, + start_time=actual_t0, + rising=scan_params.get("rising", True), + angle=scan_params.get("angle", 0.0), + az_accel=scan_params.get("az_accel", 1.0), + timestep=scan_params.get("timestep", 0.1), + az_padding=scan_params.get("az_padding", 2.0), + max_search_hours=scan_params.get("max_search_hours", 12.0), + step_seconds=scan_params.get("step_seconds", 30.0), + lsa_window=tuple(lsa_window) if lsa_window is not None else None, + ) + traj = block.trajectory + + # Dispatch-delay guard: reject a crossing that resolves too far out before + # any slew is computed, else the task slews to it and holds azel_lock until + # start_time. + enforce_dispatch_delay( + float(block.trajectory.start_time.unix), + now_unix, + max_dispatch_delay_sec, + context=( + f"constant-el scan (ra={scan_params['ra_center']}, " + f"dec={scan_params['dec_center']}, el={scan_params['elevation']})" + ), + ) + + # Sun-safe encoder slew target + wrap alignment. Sun the wrap against the + # resolved scan start, not the dispatch anchor: start_time is a forward-search + # anchor, so the trajectory can begin many minutes after actual_t0, the Sun + # moves in that gap, and the slew target must be safe when the dish arrives. + enc_az, enc_el, traj = _choose_slew_and_align( + traj, + current_az=current_az, + current_el=current_el, + obstime=block.trajectory.start_time, + site=site, + sun_safe=sun_safe, + ) + + # Validate + escalate. validate_trajectory raises on a position breach over + # the whole trajectory (closing the Go TCS first-100-point gap for position) + # but only warns on dynamics, so escalate a velocity breach (the quantity + # checkAzEl enforces) to a hard error. Escalating the library warning directly + # is safe here: el is fixed (el_vel == 0) and the site az-velocity limit equals + # the hardware az ceiling. Multi-axis tasks use _escalate_hardware_dynamics. + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always") + validate_trajectory(traj, site) # may raise Azimuth/ElevationBoundsError + _escalate_velocity_warning(caught, "Constant-el") + + # Acceleration guard: the CE quintic turnaround peaks at 1.5 * az_accel by + # design (fyst_trajectories.patterns.turnarounds.quintic_turnaround). Compute + # it analytically, not via np.gradient, which under-resolves the short + # turnaround at the 0.1 s timestep and would pass a true >6 deg/s^2 scan. + # checkAzEl ignores acceleration, so escalate az here before POST (el fixed). + az_accel = abs(scan_params.get("az_accel", 1.0)) + peak_az_accel = 1.5 * az_accel + if peak_az_accel > TCS_AZ_MAX_ACCELERATION: + raise TrajectoryValidationError( + f"Constant-el scan az_accel={az_accel:.3f} deg/s^2 produces a quintic " + f"turnaround peak acceleration of {peak_az_accel:.3f} deg/s^2, which " + f"exceeds the Go TCS hardware ceiling of {TCS_AZ_MAX_ACCELERATION} " + f"deg/s^2 (commands.go:21); refusing to POST." + ) + + # Assemble the three-key /path body (coordsys defaults to "Horizon"). After + # the wrap shift, payload["points"][0][1] == enc_az. + payload = to_path_payload(traj) + return {"encoder_az": enc_az, "encoder_el": enc_el, "payload": payload} + + +# Off-centre source_ces is gated on two currently-unconfirmed quantities. See +# build_source_payload for the gate and the full rationale. +_OFF_CENTRE_GATE_NOTE = ( + "off-centre (single-module) source_scan and an explicit boresight_rot both " + "depend on the Nasmyth port direction and the boresight rotation value, " + "neither of which is confirmed yet; guessing either rotates the focal " + "plane by up to 2*el deg, so only centred scans are supported for now." +) + + +def build_source_payload( + *, + scan_params: dict, + current_az: float, + current_el: float, + site: Site, + sun_safe=None, + scheduled_t0_unix: float | None = None, + now_unix: float, + max_dispatch_delay_sec: float = MAX_DISPATCH_DELAY_SEC, +) -> dict: + """Build a Go TCS ``/path`` body + sun-safe slew target for a source-track CES. + + Dispatch-time core of the ``source_scan`` typed task. Drags a moving source + (planet or sidereal point) across the *centred* PrimeCam focal plane at a + fixed boresight elevation, via + :func:`fyst_trajectories.planning.plan_source_ces`. Mirrors + :func:`build_constant_el_payload` in structure: dispatch-buffer floor, plan, + too-far-out guard, sun-safe slew target + wrap alignment, validate + + hardware-dynamics escalation, assemble the three-key body. + + **Centred only (the source_scan gate).** Builds the on-axis, full-array case: + ``footprint="c"`` (the PrimeCam centre module, ``(dx, dy) = (0, 0)``) with + ``boresight_rot`` left ``None`` (uncommanded rotator). The off-centre + single-module case and an explicit commanded ``boresight_rot`` are GATED OFF: + ``plan_source_ces``'s off-centre boresight recovery and cover projection + rotate the footprint by ``nasmyth_sign * el_bore + boresight_rot``, and both + the Nasmyth sign and the ``boresight_rot`` value are currently UNCONFIRMED. A + wrong sign rotates the focal plane by up to ``2 * el`` degrees, so rather than + guess, this raises :class:`ValueError` for an off-centre ``footprint`` or a + non-``None`` ``boresight_rot``. Remove the gate once those are confirmed. + + Velocity frame: ``plan_source_ces`` builds from a + :class:`~fyst_trajectories.ConstantElScanConfig` whose ``az_speed`` is the + solved per-leg drift in MOUNT-frame azimuth-coordinate deg/s; ``cos(el)`` is + already implicit in the elevation-fixed azimuth track and is NOT re-applied + (the solved drift ``v_az`` is likewise a mount-frame rate). So like + constant-el (unlike pong/daisy), the commanded az velocity is mount-frame. + + Parameters + ---------- + scan_params : dict + Source-CES spec, modeled on + :func:`~fyst_trajectories.planning.plan_source_ces`. Provide a source, + ``body`` (solar-system name) OR both ``ra`` and ``dec`` (deg), plus + ``el_bore`` (deg, required). Optional (defaults match the planner): + ``footprint`` (MUST be ``"c"``/``"center"`` while the gate stands), + ``boresight_rot`` (MUST be ``None`` while the gate stands), ``mode`` + (``"rising"``/``"setting"``), ``pm_ra``, ``pm_dec``, ``ref_epoch`` (ISO + str or ``Time``), ``timestep``, ``sampling_step_seconds``, ``az_accel``, + ``az_padding``, ``az_branch``, ``allow_partial``, ``v_az``. Search window + is the floored anchor (``night=actual_t0``) with ``mode``; pass a + ``window`` 2-sequence of ISO strings / ``Time`` to override. + current_az, current_el : float + Current encoder position (deg, 200 Hz broadcast). + site : Site + FYST site configuration. + sun_safe : callable, optional + Sun-safety predicate for ``choose_encoder_solution``. + scheduled_t0_unix : float or None, optional + Scheduled start (Unix s), or ``None`` to dispatch as soon as the buffer + allows. Used as the ``plan_source_ces`` search anchor. + now_unix : float + Current wall-clock Unix time (caller-supplied; deterministic). + max_dispatch_delay_sec : float, optional + Max delay before :class:`DispatchDelayError`. Defaults to + :data:`MAX_DISPATCH_DELAY_SEC`; ``float("inf")`` disables it. + + Returns + ------- + dict + ``{"encoder_az", "encoder_el", "payload"}``: the three-key Go TCS + ``/path`` body; ``payload["points"][0][1] == encoder_az`` (wrap align). + + Raises + ------ + ValueError + On an off-centre ``footprint`` / non-``None`` ``boresight_rot`` (the + centred-only gate), or an incompatible source/window combo. + KeyError + If ``el_bore`` (or a source spec) is missing. + PointingError, TargetNotObservableError + If the source never reaches ``el_bore``, the goal el is out of range, or + every in-range az wrap is sun-blocked. + AzimuthBoundsError, ElevationBoundsError + If the (wrap-shifted) trajectory leaves the telescope limits anywhere. + TrajectoryValidationError + If az/el velocity or acceleration exceeds a Go TCS hardware ceiling + anywhere. + DispatchDelayError + If the resolved start is more than ``max_dispatch_delay_sec`` out. + """ + actual_t0 = floor_dispatch_start(scheduled_t0_unix, now_unix) + + # Centred-only gate: a wrong Nasmyth sign / boresight_rot rotates the + # off-centre footprint by up to 2*el deg, so refuse the off-centre and + # commanded-rotation paths rather than guess (see docstring). + footprint = scan_params.get("footprint", "c") + if footprint not in ("c", "center"): + raise ValueError( + f"source_scan currently supports only the centred PrimeCam footprint " + f'("c"/"center"), got {footprint!r}: {_OFF_CENTRE_GATE_NOTE}' + ) + if scan_params.get("boresight_rot") is not None: + raise ValueError( + f"source_scan currently supports only an uncommanded boresight " + f"rotator (boresight_rot=None), got " + f"{scan_params.get('boresight_rot')!r}: {_OFF_CENTRE_GATE_NOTE}" + ) + + # Build the source-tracking CES (centred footprint "c"). Velocity is + # mount-frame: plan_source_ces bakes the solved drift into a + # ConstantElScanConfig.az_speed, and the v_az drift is also a mount-frame az + # rate; neither is cos(el)-scaled here. + window = scan_params.get("window") + ref_epoch = scan_params.get("ref_epoch") + if isinstance(ref_epoch, str): + ref_epoch = Time(ref_epoch, scale="utc") + if window is not None: + w0, w1 = window + window = ( + Time(w0, scale="utc") if isinstance(w0, str) else w0, + Time(w1, scale="utc") if isinstance(w1, str) else w1, + ) + night = None + mode = scan_params.get("mode") + else: + night = actual_t0 + mode = scan_params.get("mode", "rising") + block = plan_source_ces( + body=scan_params.get("body"), + ra=scan_params.get("ra"), + dec=scan_params.get("dec"), + pm_ra=scan_params.get("pm_ra", 0.0), + pm_dec=scan_params.get("pm_dec", 0.0), + ref_epoch=ref_epoch, + footprint=footprint, + el_bore=scan_params["el_bore"], + boresight_rot=None, + window=window, + night=night, + mode=mode, + site=site, + timestep=scan_params.get("timestep", 0.1), + sampling_step_seconds=scan_params.get("sampling_step_seconds", 30.0), + az_accel=scan_params.get("az_accel", 1.0), + az_padding=scan_params.get("az_padding", 0.5), + az_branch=scan_params.get("az_branch"), + allow_partial=scan_params.get("allow_partial", False), + v_az=scan_params.get("v_az"), + ) + traj = block.trajectory + + # Dispatch-delay guard: plan_source_ces searches forward for the el_bore + # crossing, so a far crossing can resolve hours out; reject before any slew + # is computed. + src_label = scan_params.get("body") or ( + f"ra={scan_params.get('ra')}, dec={scan_params.get('dec')}" + ) + enforce_dispatch_delay( + float(block.trajectory.start_time.unix), + now_unix, + max_dispatch_delay_sec, + context=f"source scan ({src_label}, el_bore={scan_params['el_bore']})", + ) + + # Sun-safe encoder slew target + wrap alignment. Sun the wrap at the resolved + # scan start. + enc_az, enc_el, traj = _choose_slew_and_align( + traj, + current_az=current_az, + current_el=current_el, + obstime=block.trajectory.start_time, + site=site, + sun_safe=sun_safe, + ) + + # Validate position over the whole trajectory (raises on a bounds breach, + # closing the Go TCS first-100 gap) + escalate a Go TCS hardware velocity or + # acceleration breach. El moves in a source CES, so the hardware-ceiling check, + # not the tighter site-limit library warning, is the correct Go TCS contract; + # see _escalate_hardware_dynamics. + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + validate_trajectory(traj, site) # may raise Azimuth/ElevationBoundsError + _escalate_hardware_dynamics(traj, "Source-CES") + + # Analytic turnaround-accel guard: plan_source_ces builds on a + # ConstantElScanConfig, so the az axis uses the same quintic turnaround as a + # CE scan (peak |az accel| = 1.5 * az_accel by design). np.gradient (in + # _escalate_hardware_dynamics) under-resolves that short spike at the 0.1 s + # timestep, so guard az accel analytically here too, mirroring + # build_constant_el_payload, the only pre-POST gate for the quintic. + az_accel = abs(scan_params.get("az_accel", 1.0)) + peak_az_accel = 1.5 * az_accel + if peak_az_accel > TCS_AZ_MAX_ACCELERATION: + raise TrajectoryValidationError( + f"Source-CES scan az_accel={az_accel:.3f} deg/s^2 produces a quintic " + f"turnaround peak acceleration of {peak_az_accel:.3f} deg/s^2, which " + f"exceeds the Go TCS hardware ceiling of {TCS_AZ_MAX_ACCELERATION} " + f"deg/s^2 (commands.go:21); refusing to POST." + ) + + # Assemble the three-key /path body (points[0][1] == enc_az after align). + payload = to_path_payload(traj) + return {"encoder_az": enc_az, "encoder_el": enc_el, "payload": payload} + + +def build_pong_payload( + *, + scan_params: dict, + current_az: float, + current_el: float, + site: Site, + sun_safe=None, + scheduled_t0_unix: float | None = None, + now_unix: float, + max_dispatch_delay_sec: float = MAX_DISPATCH_DELAY_SEC, +) -> dict: + """Build a Go TCS ``/path`` body + sun-safe slew target for a Pong scan. + + Dispatch-time core of the ``pong_scan`` typed task. Covers a rectangular + RA/Dec field with a curvy-box Pong pattern via + :func:`fyst_trajectories.planning.plan_pong_scan`. Mirrors + :func:`build_constant_el_payload` in structure (dispatch floor, plan, + too-far-out guard, sun-safe slew + wrap align, validate + hardware-dynamics + escalation, assemble). + + Velocity frame: ``plan_pong_scan``'s ``velocity`` is ON-SKY (tangent-plane / + sky-offset) deg/s, a DIFFERENT frame from constant-el/source. The planner + maps it to az/el via the field's instantaneous geometry, so the realised + mount-frame az velocity is ``~velocity / cos(el)`` and is NOT caller-scaled; + pass the astronomer's on-sky scan speed directly. The hardware-ceiling + escalation below checks the realised mount-frame dynamics. + + Parameters + ---------- + scan_params : dict + Pong spec, modeled on :func:`~fyst_trajectories.planning.plan_pong_scan`. + Required: ``ra_center``, ``dec_center``, ``width``, ``height`` (deg, the + :class:`~fyst_trajectories.planning.FieldRegion`); ``velocity`` (ON-SKY + deg/s), ``spacing`` (deg), ``num_terms`` (int). Optional (defaults match + the planner): ``angle``, ``n_cycles``, ``timestep``. + current_az, current_el : float + Current encoder position (deg, 200 Hz broadcast). + site : Site + FYST site configuration. + sun_safe : callable, optional + Sun-safety predicate for ``choose_encoder_solution``. + scheduled_t0_unix : float or None, optional + Scheduled start (Unix s), or ``None`` to dispatch as soon as the buffer + allows. Pong takes ``start_time`` LITERALLY (no forward search), so the + posted ``start_time`` equals the floored anchor. + now_unix : float + Current wall-clock Unix time (caller-supplied; deterministic). + max_dispatch_delay_sec : float, optional + Dispatch-delay bound; defaults to :data:`MAX_DISPATCH_DELAY_SEC`. Pong's + start equals the floored anchor, so this passes trivially; applied + uniformly for parity with the elevation-searching tasks. + + Returns + ------- + dict + ``{"encoder_az", "encoder_el", "payload"}`` (three-key ``/path`` body); + ``payload["points"][0][1] == encoder_az``. + + Raises + ------ + KeyError + If a required key is missing. + ValueError + If ``n_cycles < 1`` (from ``plan_pong_scan``). + TargetNotObservableError, AzimuthBoundsError, ElevationBoundsError + If the field is unobservable at the start time or the trajectory leaves + the telescope limits anywhere. + PointingError + If every in-range az wrap is sun-blocked. + TrajectoryValidationError + If az/el velocity or acceleration exceeds a Go TCS hardware ceiling + anywhere. + DispatchDelayError + If the resolved start is more than ``max_dispatch_delay_sec`` out. + """ + actual_t0 = floor_dispatch_start(scheduled_t0_unix, now_unix) + + # Build the Pong trajectory. velocity is on-sky deg/s, a different frame from + # constant-el/source; passed straight to the planner, which maps it to the + # mount frame via the field geometry. + field = FieldRegion( + ra_center=scan_params["ra_center"], + dec_center=scan_params["dec_center"], + width=scan_params["width"], + height=scan_params["height"], + ) + block = plan_pong_scan( + field=field, + velocity=scan_params["velocity"], + spacing=scan_params["spacing"], + num_terms=scan_params["num_terms"], + site=site, + start_time=actual_t0, + timestep=scan_params.get("timestep", 0.1), + angle=scan_params.get("angle", 0.0), + n_cycles=scan_params.get("n_cycles", 1), + ) + traj = block.trajectory + + # Dispatch-delay guard: trivially satisfied for Pong (starts at the floored + # anchor); applied for parity with CE/source. + enforce_dispatch_delay( + float(block.trajectory.start_time.unix), + now_unix, + max_dispatch_delay_sec, + context=( + f"pong scan (ra={scan_params['ra_center']}, dec={scan_params['dec_center']})" + ), + ) + + # Sun-safe encoder slew target + wrap alignment. + enc_az, enc_el, traj = _choose_slew_and_align( + traj, + current_az=current_az, + current_el=current_el, + obstime=block.trajectory.start_time, + site=site, + sun_safe=sun_safe, + ) + + # Validate position over the whole trajectory + escalate a Go TCS hardware + # velocity or acceleration breach. El moves in a Pong, so the hardware ceiling, + # not the tighter site-limit library warning, is the Go TCS contract. + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + validate_trajectory(traj, site) # may raise Azimuth/ElevationBoundsError + _escalate_hardware_dynamics(traj, "Pong") + + payload = to_path_payload(traj) + return {"encoder_az": enc_az, "encoder_el": enc_el, "payload": payload} + + +def build_daisy_payload( + *, + scan_params: dict, + current_az: float, + current_el: float, + site: Site, + sun_safe=None, + scheduled_t0_unix: float | None = None, + now_unix: float, + max_dispatch_delay_sec: float = MAX_DISPATCH_DELAY_SEC, +) -> dict: + """Build a Go TCS ``/path`` body + sun-safe slew target for a Daisy scan. + + Dispatch-time core of the ``daisy_scan`` typed task. Covers a point source + with a constant-velocity petal (Daisy) pattern via + :func:`fyst_trajectories.planning.plan_daisy_scan`. Mirrors + :func:`build_constant_el_payload` in structure (dispatch floor, plan, + too-far-out guard, sun-safe slew + wrap align, validate + hardware-dynamics + escalation, assemble). + + Velocity frame: ``plan_daisy_scan``'s ``velocity`` is ON-SKY (tangent-plane / + sky-offset) deg/s, the SAME frame as Pong, DIFFERENT from constant-el/source. + Pass the astronomer's on-sky scan speed directly; the planner maps it to the + mount frame. The hardware-ceiling escalation below checks the realised + mount-frame dynamics. + + Parameters + ---------- + scan_params : dict + Daisy spec, modeled on + :func:`~fyst_trajectories.planning.plan_daisy_scan`. Required: ``ra``, + ``dec`` (deg, source centre); ``radius`` (deg), ``velocity`` (ON-SKY + deg/s), ``turn_radius`` (deg), ``avoidance_radius`` (deg >= 0), + ``start_acceleration`` (deg/s^2), ``duration`` (s). Optional (defaults + match the planner): ``y_offset``, ``timestep``. + current_az, current_el : float + Current encoder position (deg, 200 Hz broadcast). + site : Site + FYST site configuration. + sun_safe : callable, optional + Sun-safety predicate for ``choose_encoder_solution``. + scheduled_t0_unix : float or None, optional + Scheduled start (Unix s), or ``None`` to dispatch as soon as the buffer + allows. Daisy takes ``start_time`` LITERALLY (no forward search), so the + posted ``start_time`` equals the floored anchor. + now_unix : float + Current wall-clock Unix time (caller-supplied; deterministic). + max_dispatch_delay_sec : float, optional + Dispatch-delay bound; defaults to :data:`MAX_DISPATCH_DELAY_SEC`. Daisy's + start equals the floored anchor, so this passes trivially; applied + uniformly for parity with the elevation-searching tasks. + + Returns + ------- + dict + ``{"encoder_az", "encoder_el", "payload"}`` (three-key ``/path`` body); + ``payload["points"][0][1] == encoder_az``. + + Raises + ------ + KeyError + If a required key is missing. + TargetNotObservableError, AzimuthBoundsError, ElevationBoundsError + If the source is unobservable at the start time or the trajectory leaves + the telescope limits anywhere. + PointingError + If every in-range az wrap is sun-blocked. + TrajectoryValidationError + If az/el velocity or acceleration exceeds a Go TCS hardware ceiling + anywhere. + DispatchDelayError + If the resolved start is more than ``max_dispatch_delay_sec`` out. + """ + actual_t0 = floor_dispatch_start(scheduled_t0_unix, now_unix) + + # Build the Daisy trajectory. velocity is on-sky deg/s, same frame as Pong, + # different from constant-el/source; passed straight to the planner, which + # maps it to the mount frame. + block = plan_daisy_scan( + ra=scan_params["ra"], + dec=scan_params["dec"], + radius=scan_params["radius"], + velocity=scan_params["velocity"], + turn_radius=scan_params["turn_radius"], + avoidance_radius=scan_params["avoidance_radius"], + start_acceleration=scan_params["start_acceleration"], + site=site, + start_time=actual_t0, + timestep=scan_params.get("timestep", 0.1), + duration=scan_params["duration"], + y_offset=scan_params.get("y_offset", 0.0), + ) + traj = block.trajectory + + # Dispatch-delay guard: trivially satisfied for Daisy (starts at the floored + # anchor); applied for parity with CE/source. + enforce_dispatch_delay( + float(block.trajectory.start_time.unix), + now_unix, + max_dispatch_delay_sec, + context=f"daisy scan (ra={scan_params['ra']}, dec={scan_params['dec']})", + ) + + # Sun-safe encoder slew target + wrap alignment. + enc_az, enc_el, traj = _choose_slew_and_align( + traj, + current_az=current_az, + current_el=current_el, + obstime=block.trajectory.start_time, + site=site, + sun_safe=sun_safe, + ) + + # Validate position over the whole trajectory + escalate a Go TCS hardware + # velocity or acceleration breach. El moves in a Daisy, so the hardware ceiling, + # not the tighter site-limit library warning, is the Go TCS contract. + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + validate_trajectory(traj, site) # may raise Azimuth/ElevationBoundsError + _escalate_hardware_dynamics(traj, "Daisy") + + payload = to_path_payload(traj) + return {"encoder_az": enc_az, "encoder_el": enc_el, "payload": payload} + + +def refloor_payload_start_time(payload: dict, now_unix: float) -> dict: + """Re-apply the dispatch-buffer floor to a built payload's ``start_time``. + + :func:`build_constant_el_payload` floors ``start_time`` at *build* time, but + the PCS task then slews to the scan start and that slew can block up to + ``SLEW_TIMEOUT_SEC`` (180 s). A dispatch-now scan whose resolved crossing + landed only just above the 10 s floor can, by the time the slew finishes, be + *less* than the Go TCS ``pathCmd.Check()`` minimum lead (rejected at + ``< ~9.8 s`` out, ``commands.go:253``), so the POST would fail. Re-floor with + a *fresh* ``now`` just before POST so the body still clears the receiver. + + Only ``start_time`` is absolute; ``points`` carry RELATIVE seconds in column 0 + (``to_path_payload``), so advancing the start just shifts the whole scan later + in wall-clock with no point re-serialisation. The completion-loop end time + (``start_time + points[-1][0]``) reads this same ``start_time``, so it stays + consistent. The floor never moves the start earlier (it is a ``max``), so a + comfortably-future crossing is untouched. + + The re-floor shifts *when* the scan plays, not *where*; the caller bounds the + resulting sidereal staleness via :data:`MAX_REFLOOR_DRIFT_SEC`. ``payload`` is + mutated in place and returned; ``now_unix`` is read *after* the slew completes. + """ + payload["start_time"] = max( + float(payload["start_time"]), now_unix + SCAN_DISPATCH_BUFFER_SEC + ) + return payload + + +def refloor_drift_seconds(original_start_unix: float, payload: dict) -> float: + """Return how many seconds the re-floor advanced a payload's ``start_time``. + + :func:`refloor_payload_start_time` advances only ``start_time``; the baked + az/el samples are frozen at the build-time start, so this delta is exactly + how stale the sidereal-tracked geometry now is (the boresight lags the sky by + ~az_rate * delta). Pure / ocs-free so the agent's dispatch path stays a thin + wrapper. Returns ``>= 0`` (the re-floor never regresses ``start_time``). + """ + return max(0.0, float(payload["start_time"]) - float(original_start_unix)) + + +def tcs_response_status(response) -> int | None: + """Read an HTTP status code from an ``aculib`` TCS return, 503-safe. + + ``aculib.observatory_control_system.post()`` returns the ``requests`` + ``Response`` on success but short-circuits a **HTTP 503** to a bare ``{}`` + (aculib.py:122-124), the status the Go TCS returns when a command is rejected + because a prior one is still running (e.g. ``/path`` POSTed before the slew + settled, or a ``move-to`` the ACU could not accept). A caller doing + ``response.status_code`` on that ``{}`` raises ``AttributeError`` *before* its + ``!= 200`` guard can turn the rejection into a graceful failure. This reads the + code defensively so a non-``Response`` (``{}``, ``None``) maps to ``None``: + "not 200, treat as rejected". Factored out so the decision is unit-testable + without ``ocs`` / ``twisted``. Compare the result against ``200``. + """ + return getattr(response, "status_code", None) diff --git a/pcs/agents/adam/agent.py b/pcs/agents/adam/agent.py new file mode 100644 index 0000000..e146f62 --- /dev/null +++ b/pcs/agents/adam/agent.py @@ -0,0 +1,194 @@ +import time +import argparse +from serial import Serial, EIGHTBITS, STOPBITS_ONE, PARITY_NONE +import serial +from ocs import ocs_agent, site_config +from ocs.ocs_twisted import TimeoutLock + +from pcs.drivers.adam import Module + +class Adam_Agent: + """Class to control and retrieve data from the Adam scale for the ModCam LN2 dewar + + This Agent is meant to be an example for Agent development, and provides a + clean starting point when developing a new Agent. + Parameters: + agent (OCSAgent): OCSAgent object from :func:`ocs.ocs_agent.init_site_agent`. + + Attributes: + agent (OCSAgent): OCSAgent object from :func:`ocs.ocs_agent.init_site_agent`. + """ + + def __init__(self, agent, port="/dev/ADAM", f_sample=0.5): #, timeout=1): + + self.agent = agent + self.log = agent.log + self.lock = TimeoutLock() + + self.port = port + #self.timeout = timeout + self.f_sample = f_sample + + self.initialized = False + + #register weight feed + agg_params = {'frame_length': 60} + self.agent.register_feed('weight', + record=True, + agg_params=agg_params, + buffer_time=1) + + #@ocs_agent.param('auto_acquire', default=False, type=bool) + def init_adam(self, session, params): + """init_lakeshore(auto_acquire=False) + + **Task** - Perform first time setup of the Lakeshore 425 Module. + + Parameters: + auto_acquire (bool, optional): Default is False. Starts data + acquisition after initialization if True. + + """ + if params is None: + params = {} + + auto_acquire = params.get('auto_acquire', False) #params['auto_acquire'] + + if self.initialized: + return True, "Already Initialized Module" + + with self.lock.acquire_timeout(0, job='init') as acquired: + if not acquired: + self.log.warn("Could not start init because " + "{} is already running".format(self.lock.job)) + return False, "Could not acquire lock." + + #self.dev = usb.core.find(idVendor = self.vid, idProduct = self.pid) + self.module = Module(port = self.port) + #print(self.module) + self.module.connect() + if self.module is None: + raise ValueError('Device not found') + #self.log.info(self.dev.get_id()) + print("Initialized Adam: {!s}".format(self.module)) + + self.initialized = True + + # Start data acquisition if requested + if auto_acquire: + self.agent.start('acq') + + return True, 'Adam initialized.' + + @ocs_agent.param('sampling_frequency', type=float, default = 0.5) #2.5) + @ocs_agent.param('test_mode', type = bool, default = False) + def acq(self, session, params=None): + + if params is None: + params = {} + f_sample = params['sampling_frequency'] + if f_sample is None: + f_sample = self.f_sample + + sleep_time = 1. / f_sample - 0.01 + + with self.lock.acquire_timeout(0, job='acq') as acquired: + if not acquired: + self.log.warn("Could not start init because " + "{} is already running".format(self.lock.job)) + return False, "Could not acquire lock." + + + session.set_status('running') + + self.take_data = True + + session.data = {'fields': {}} + + while self.take_data: + current_time = time.time() + data = { + 'timestamp': current_time, + 'block_name': 'weight', + 'data': {} + } + + weight = self.module.read_weight() + #print(weight) + + try: + weight = self.module.read_weight() + except Exception as e: + print(f"EXCEPTION in read_weight: {type(e).__name__}: {e}") + import traceback; traceback.print_exc() + time.sleep(1) + continue + + #weight = weight_line['weight'] + + data['data']['weight'] = weight + #print(data) + field_dict = {'weight': weight} + session.data['fields'].update(field_dict) + #print(session.data) + self.agent.publish_to_feed('weight', data) + + session.data['fields'].update({'timestamp': current_time}) + + #time.sleep(sleep_time) + #print(data['data']['weight']) + + self.agent.feeds['weight'].flush_buffer() + + return True, 'Acquisition exited cleanly.' + + def _stop_acq(self, session, params=None): + """ + Stops acq process. + """ + if self.take_data: + session.set_status('stopping') + self.take_data = False + return True, 'requested to stop taking data.' + else: + return False, 'acq is not currently running' + +def make_parser(parser=None): + if parser is None: + parser = argparse.ArgumentParser() + + pgroup = parser.add_argument_group('Agent Options') + pgroup.add_argument('--port', type=str, + help="Port of Adam scale. Defaults to /dev/ADAM if not specified.") + pgroup.add_argument('--mode', type=str, choices=['init', 'acq'], + help="Starting action for the agent.") + pgroup.add_argument('--sampling-frequency', type=float, + help="Sampling frequency for data acquisition") + + return parser + + + +def main(args=None): + + parser = make_parser() + + args = site_config.parse_args(agent_class='Adam_Agent', parser=parser, args=args) + + init_params = False + if args.mode == 'init': + init_params = {'auto_acquire': False} + elif args.mode == 'acq': + init_params = {'auto_acquire': True} + + agent, runner = ocs_agent.init_site_agent(args) + adam = Adam_Agent(agent) + + agent.register_task('init_adam', adam.init_adam, startup=init_params) + agent.register_process('acq', adam.acq, adam._stop_acq) + + runner.run(agent, auto_reconnect=True) + + +if __name__ == '__main__': + main() diff --git a/pcs/agents/bluefors_tc/agent.py b/pcs/agents/bluefors_tc/agent.py index 286a272..e18a35d 100644 --- a/pcs/agents/bluefors_tc/agent.py +++ b/pcs/agents/bluefors_tc/agent.py @@ -1,5 +1,6 @@ # Bluefors TC agent +import os, time import argparse import threading from contextlib import contextmanager @@ -7,6 +8,7 @@ from ocs import ocs_agent, site_config from ocs.ocs_twisted import Pacemaker, TimeoutLock from twisted.internet import reactor +import txaio from pcs.drivers.bluefors_tc import BFTC @@ -155,8 +157,8 @@ def init_bftc(self, session, params=None): print("Initialized BF TC module: {!s}".format(self.module)) session.add_message("BF TC initilized with ID: %s" % self.module.id) - self.thermometers = [channel.name for channel in self.module.channels] - + #self.thermometers = [channel.name for channel in self.module.channels] + self.thermometers = [channel.channel_num for channel in self.module.channels] self.initialized = True # Start data acquisition if requested @@ -322,7 +324,7 @@ def main(args=None): bftc_agent = Bluefors_TC_Agent(agent, args.serial_number, args.ip_address) - agent.register_task('init_bftc', bftc_agent.bftc, + agent.register_task('init_bftc', bftc_agent.init_bftc, startup=init_params) agent.register_process('acq', bftc_agent.acq, bftc_agent._stop_acq) # And many more to come... diff --git a/pcs/agents/dymo/agent.py b/pcs/agents/dymo/agent.py new file mode 100644 index 0000000..e6515b8 --- /dev/null +++ b/pcs/agents/dymo/agent.py @@ -0,0 +1,186 @@ +import time +import argparse +import usb.core +import usb.util +from ocs import ocs_agent, site_config +from ocs.ocs_twisted import TimeoutLock + +from pcs.drivers.dymo import Module + +class DymoAgent: + """Class to control and retrieve data from the Dymo scale for the ModCam LN2 dewar + + This Agent is meant to be an example for Agent development, and provides a + clean starting point when developing a new Agent. + Parameters: + agent (OCSAgent): OCSAgent object from :func:`ocs.ocs_agent.init_site_agent`. + + Attributes: + agent (OCSAgent): OCSAgent object from :func:`ocs.ocs_agent.init_site_agent`. + """ + + def __init__(self, agent, vid=0x0922, pid=0x8009, f_sample = 0.5): #, timeout=1): + + self.agent = agent + self.log = agent.log + self.lock = TimeoutLock() + + self.vid = vid + self.pid = pid + #self.timeout = timeout + self.f_sample = f_sample + + self.initialized = False + + #register weight feed + agg_params = {'frame_length': 60} + self.agent.register_feed('weight', + record=True, + agg_params=agg_params, + buffer_time=1) + + #@ocs_agent.param('auto_acquire', default=False, type=bool) + def init_dymo(self, session, params): + """init_lakeshore(auto_acquire=False) + + **Task** - Perform first time setup of the Lakeshore 425 Module. + + Parameters: + auto_acquire (bool, optional): Default is False. Starts data + acquisition after initialization if True. + + """ + if params is None: + params = {} + + auto_acquire = params.get('auto_acquire', False) #params['auto_acquire'] + + if self.initialized: + return True, "Already Initialized Module" + + with self.lock.acquire_timeout(0, job='init') as acquired: + if not acquired: + self.log.warn("Could not start init because " + "{} is already running".format(self.lock.job)) + return False, "Could not acquire lock." + + #self.dev = usb.core.find(idVendor = self.vid, idProduct = self.pid) + self.module = Module(vid = self.vid, pid = self.pid) + print(self.module) + if self.module is None: + raise ValueError('Device not found') + #self.log.info(self.dev.get_id()) + print("Initialized Dymo: {!s}".format(self.module)) + + self.initialized = True + + # Start data acquisition if requested + if auto_acquire: + self.agent.start('acq') + + return True, 'Dymo initialized.' + + @ocs_agent.param('sampling_frequency', type=float, default = 0.5) #2.5) + @ocs_agent.param('test_mode', type = bool, default = False) + def acq(self, session, params=None): + + if params is None: + params = {} + f_sample = params['sampling_frequency'] + if f_sample is None: + f_sample = self.f_sample + + sleep_time = 1. / f_sample - 0.01 + + with self.lock.acquire_timeout(0, job='acq') as acquired: + if not acquired: + self.log.warn("Could not start init because " + "{} is already running".format(self.lock.job)) + return False, "Could not acquire lock." + + + session.set_status('running') + + self.take_data = True + + session.data = {'fields': {}} + + while self.take_data: + current_time = time.time() + data = { + 'timestamp': current_time, + 'block_name': 'weight', + 'data': {} + } + + weight = self.module.read_weight() + + #weight = weight_line['weight'] + + data['data']['weight'] = weight + + field_dict = {'weight': weight} + session.data['fields'].update(field_dict) + + self.agent.publish_to_feed('weight', data) + + session.data['fields'].update({'timestamp': current_time}) + time.sleep(sleep_time) + #print(data['data']['weight']) + + self.agent.feeds['weight'].flush_buffer() + + return True, 'Acquisition exited cleanly.' + + def _stop_acq(self, session, params=None): + """ + Stops acq process. + """ + if self.take_data: + session.set_status('stopping') + self.take_data = False + return True, 'requested to stop taking data.' + else: + return False, 'acq is not currently running' + +def make_parser(parser=None): + if parser is None: + parser = argparse.ArgumentParser() + + pgroup = parser.add_argument_group('Agent Options') + pgroup.add_argument('--vid', type=str, + help="VendorID of Dymo scale. Defaults to 0x0922 if not specified.") + pgroup.add_argument('--pid', type=str, + help="ProductID of Dymo scale. Defaults to 0x8009 if not specified.") + pgroup.add_argument('--mode', type=str, choices=['init', 'acq'], + help="Starting action for the agent.") + pgroup.add_argument('--sampling-frequency', type=float, + help="Sampling frequency for data acquisition") + + return parser + + + +def main(args=None): + + parser = make_parser() + + args = site_config.parse_args(agent_class='DymoAgent', parser=parser, args=args) + + init_params = False + if args.mode == 'init': + init_params = {'auto_acquire': False} + elif args.mode == 'acq': + init_params = {'auto_acquire': True} + + agent, runner = ocs_agent.init_site_agent(args) + dymo = DymoAgent(agent) + + agent.register_task('init_dymo', dymo.init_dymo, startup=init_params) + agent.register_process('acq', dymo.acq, dymo._stop_acq) + + runner.run(agent, auto_reconnect=True) + + +if __name__ == '__main__': + main() diff --git a/pcs/agents/pfeiffer_singlegauge/__init__.py b/pcs/agents/pfeiffer_singlegauge/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pcs/agents/pfeiffer_singlegauge/agent.py b/pcs/agents/pfeiffer_singlegauge/agent.py new file mode 100644 index 0000000..b2ceb56 --- /dev/null +++ b/pcs/agents/pfeiffer_singlegauge/agent.py @@ -0,0 +1,240 @@ +# Script to log and readout pfeiffer TPG 366 gauge contoller +# via Ethernet connection +# Zhilei Xu, Tanay Bhandarkar + +import argparse +import os +import socket +import time + +import numpy as np +import txaio +from ocs import ocs_agent, site_config +from ocs.ocs_twisted import TimeoutLock + +# For logging +txaio.use_twisted() + +BUFF_SIZE = 128 +ENQ = '\x05' + + +class Pfeiffer: + """CLASS to control and retrieve data from the pfeiffer tpg366 + pressure gauge controller + + + Args: + ip_address: IP address of the deivce + porti (int): 8000 (fixed for the device) + + Attributes: + read_pressure reads the pressure from one channel (given as an argument) + read_pressure_all reads pressures from the six channels + close closes the socket + """ + + def __init__(self, ip_address, port, timeout=10, + f_sample=1.): + self.ip_address = ip_address + self.port = port + self.comm = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + self.comm.connect((self.ip_address, self.port)) + self.comm.settimeout(timeout) + self.log = txaio.make_logger() + + def channel_power(self): + """ + Function to check the power status of all channels. + + Args: + None + + Returns: + List of channel states. + + """ + msg = 'SEN\r\n' + self.comm.send(msg.encode()) + self.comm.recv(BUFF_SIZE).decode() + self.comm.send(ENQ.encode()) + read_str = self.comm.recv(BUFF_SIZE).decode() + power_str = read_str.split('\r') + power_states = np.array(power_str[0].split(','), dtype=int) + if any(chan == 1 for chan in power_states): + channel_states = [index + 1 for index, state in enumerate(power_states) if state == 1] + self.log.debug("The following channels are off: {}".format(channel_states)) + return channel_states + + def read_pressure(self, ch_no): + """ + Function to measure the pressure of one given channel + ch_no is the chanel to be measured (e.g. 1-6) + returns the measured pressure as a float + + Args: + ch_no: The channel to be measured (1-6) + + Returns: + pressure as a float + """ + msg = 'PR%d\r\n' % ch_no + self.comm.send(msg.encode()) + self.comm.recv(BUFF_SIZE).decode() + self.comm.send(ENQ.encode()) + read_str = self.comm.recv(BUFF_SIZE).decode() + pressure_str = read_str.split(',')[-1].split('\r')[0] + pressure = float(pressure_str) + return pressure + + def read_pressure_all(self): + """measure the pressure of all channel + Return an array of 6 pressure values as a float array + + Args: + None + + Returns: + 6 element array corresponding to each channels + pressure reading, as floats + """ + msg = 'PRX\r\n' + self.comm.send(msg.encode()) + # Could use this to catch exemptions, for troubleshooting + self.comm.recv(BUFF_SIZE).decode() + self.comm.send(ENQ.encode()) + read_str = self.comm.recv(BUFF_SIZE).decode() + pressure_str = read_str.split('\r')[0] + gauge_states = pressure_str.split(',')[::2] + gauge_states = np.array(gauge_states, dtype=int) + pressures = pressure_str.split(',')[1::2] + pressures = [float(p) for p in pressures] + if any(state != 0 for state in gauge_states): + index = np.where(gauge_states != 0) + for j in index[0]: + pressures[j] = 0. + return pressures + + def close(self): + """Close the socket of the connection""" + self.comm.close() + + +class PfeifferAgent: + + def __init__(self, agent, ip_address, port, f_sample=1.): + self.active = True + self.agent = agent + self.log = agent.log + self.lock = TimeoutLock() + self.f_sample = f_sample + self.take_data = False + self.gauge = Pfeiffer(ip_address, int(port)) + agg_params = {'frame_length': 60, } + self.agent.register_feed('pressure', + record=True, + agg_params=agg_params, + buffer_time=1) + + @ocs_agent.param('sampling_frequency', type=float, default=2) + @ocs_agent.param('test_mode', type=bool, default=False) + def acq(self, session, params=None): + """acq(sampling_frequency=2.5, test_mode=False) + + **Process** - Get pressures from the Pfeiffer gauges. + + Parameters: + sampling_frequency (float): Rate at which to get the pressures + [Hz]. Defaults to 2.5 Hz. + test_mode (bool): Run the Process loop only once. This is meant + only for testing. Defaults to False. + + """ + f_sample = params['sampling_frequency'] + if f_sample is None: + f_sample = self.f_sample + + sleep_time = 1. / f_sample - 0.01 + + with self.lock.acquire_timeout(timeout=0, job='init') as acquired: + # Locking mechanism stops code from proceeding if no lock acquired + if not acquired: + self.log.warn("Could not start init because {} is already running".format(self.lock.job)) + return False, "Could not acquire lock." + + self.take_data = True + while self.take_data: + data = { + 'timestamp': time.time(), + 'block_name': 'pressure', + 'data': {} + } + # Useful for debugging, but should separate to a task to cut + # down on queries in the main acq() loop. + # self.gauge.channel_power() + pressure = self.gauge.read_pressure(ch_no=1) + # Loop through all the channels on the device + #for channel in range(len(pressure)): + # data['data']["pressure_ch" + str(channel + 1)] = pressure[channel] + data['data']["pressure_ch" + str(1)] = pressure + + self.agent.publish_to_feed('pressure', data) + time.sleep(sleep_time) + + if params['test_mode']: + break + + self.agent.feeds['pressure'].flush_buffer() + return True, 'Acquistion exited cleanly' + + def _stop_acq(self, session, params=None): + """ + End pressure data acquisition + """ + if self.take_data: + self.take_data = False + self.gauge.close() + return True, 'requested to stop taking data.' + else: + return False, 'acq is not currently running' + + +def make_parser(parser=None): + """Build the argument parser for the Agent. Allows sphinx to automatically + build documentation based on this function. + + """ + if parser is None: + parser = argparse.ArgumentParser() + + pgroup = parser.add_argument_group('Agent Options') + pgroup.add_argument('--ip_address') + pgroup.add_argument('--port') + pgroup.add_argument("--mode", type=str, default='acq', choices=['acq', 'test']) + + return parser + + +def main(args=None): + # Start logging + txaio.start_logging(level=os.environ.get("LOGLEVEL", "info")) + + parser = make_parser() + args = site_config.parse_args(agent_class='PfeifferAgent', + parser=parser, + args=args) + + init_params = True + if args.mode == 'test': + init_params = {'test_mode': True} + + agent, runner = ocs_agent.init_site_agent(args) + pfeiffer_agent = PfeifferAgent(agent, args.ip_address, args.port) + agent.register_process('acq', pfeiffer_agent.acq, + pfeiffer_agent._stop_acq, startup=init_params) + agent.register_task('close', pfeiffer_agent._stop_acq) + runner.run(agent, auto_reconnect=True) + + +if __name__ == '__main__': + main() diff --git a/pcs/agents/teledyne/agent.py b/pcs/agents/teledyne/agent.py new file mode 100644 index 0000000..63d67d5 --- /dev/null +++ b/pcs/agents/teledyne/agent.py @@ -0,0 +1,240 @@ +from serial import Serial, EIGHTBITS, STOPBITS_ONE, PARITY_NONE +import serial +import time +from ocs import ocs_agent, site_config +from ocs.ocs_twisted import TimeoutLock +import argparse + +class Teledyne: + ''' + Class to control and retrieve data form the teledyne HVG-2020B + + Args: + port (fixed for this device) + + Attributes: + read_pressure reads current pressure from gauge + close closes the connection between computer and arduino + + ''' + def __init__(self, port, baud = 19200, timeout = 0.1): + + self.baud = baud + self.timeout = timeout + self.port = port + self.connection = Serial(self.port, baudrate=self.baud, bytesize=EIGHTBITS, parity=PARITY_NONE, stopbits=STOPBITS_ONE, timeout=timeout,xonxoff=False, rtscts=False) + ''' + try: + self.port = port + self.connection = Serial(self.port, baudrate=self.baud, bytesize=EIGHTBITS, parity=PARITY_NONE, stopbits=STOPBITS_ONE, timeout=None,xonxoff=False, rtscts=False) + + except OSError: + print(f'port {port} does not have teledyne connected. Procceding to scan all ports for teledyne device') + + ports = serial.tools.list_ports.comports() + for port in ports: + try: + self.connection = Serial(port= port.device, baudrate=19200, bytesize=EIGHTBITS, parity=PARITY_NONE, stopbits=STOPBITS_ONE, timeout=1,xonxoff=False, rtscts=False) + self.port = port.device + except serial.SerialException: + continue + + self.connection.write('s1\r\n'.encode('utf-8')) + time.sleep(0.1) + + lines = self.connection.readline() + if lines.replace(b'\r>',b'').decode() == 'HVG-2020 I/O V2.0': + print(f'teledyne found on port {port}') + return + ''' + + + def read_pressure(self): + """ + Send command to pressure gauge and returns a float with units of mbar + """ + self.connection.write('p\r\n'.encode("utf-8")) + time.sleep(0.1) + read = self.connection.readline().replace(b'\r>',b'').decode() + try: + return float(read) + except ValueError: + print(read) + return(-99) + '''There are abnormal data sometimes, therefore this is to make sure the data is normal else will return -99''' + + def check_connection(self): + + if not self.connection.is_open: + try: + self.connection.open() + except IOError as err: + print(err) + return False + + for i in range(3): + print(f'Connection Check {i}') + try: + self.connection.write('s1\r\n'.encode('utf-8')) + + result = self.connection.readline().replace(b'\r>',b'').decode() + print(f'recieved \"{result}\"') + if (result[:8] == 'HVG-2020'): + if (i > 0): print(f'connection check passed on {i}') + return True + + except IOError as err: + + print(err) + if self.connection.is_open: + print('port open but read write connection error') + if i < 2: continue + + self.connection.close() + print('read write error 3 times, closing port for now') + return True + else: + print('connection lost') + return False + return False + + def close(self): + """ + Closes connection with Teledyne Pressure gauge. + """ + self.connection.close() + + +class Teledyne_Agent: + + def __init__(self, agent, port, f_sample=2.5): + self.active = True + self.agent: ocs_agent.OCSAgent = agent + self.log = agent.log + self.lock = TimeoutLock() + self.port = port + self.f_sample = f_sample + self.take_data = False + self.gauge = Teledyne(port) + agg_params = {'frame_length': 60, } + self.agent.register_feed('pressure', + record=True, + agg_params=agg_params, + buffer_time=1) + + #Enables client to acquire pressure data from Teledyne pressure gauge + @ocs_agent.param('sampling_frequency', type=float, default = 2.5) + @ocs_agent.param('test_mode', type = bool, default = False) + def acq(self, session, params=None): + #Determining how many times per second to sample data, defaults to 2.5 times per second + if params is None: + params = {} + f_sample = params['sampling_frequency'] + if f_sample is None: + f_sample = self.f_sample + + sleep_time = 1. / f_sample - 0.01 + + #Ensures that multiple clients do not try to use function at same time + with self.lock.acquire_timeout(timeout=0, job='init') as acquired: + if not acquired: + self.log.warn("Could not start init because {} is already running".format(self.lock.job)) + return False, "Could not acquire lock." + + session.set_status('running') + + self.take_data = True + + session.data = {'fields': {}} + + + x = self.gauge.check_connection() + if not x: + print("Could not connect with pressure gauge. Check that proper port name of pressure gauge was given.") + + return False, 'ACQ not properly done' + print("Looking good!") + + #Creates data object, sampling pressure and related timestamp that can be used for automation script and Grafana display + while self.take_data: + current_time = time.time() + data = { + 'timestamp': current_time, + 'block_name': 'pressure', + 'data': {} + } + + try: + pressure_line = self.gauge.read_pressure() + except IOError: + if self.gauge.check_connection(): + print('read write io error, try again later') + continue + else: + return False, 'Connection Lost' + + data['data']['pressure'] = pressure_line + + field_dict = {'pressure': pressure_line} + session.data['fields'].update(field_dict) + + self.agent.publish_to_feed('pressure', data) + + session.data['fields'].update({'timestamp': current_time}) + time.sleep(sleep_time) + + #print('data taken successfully') + #print(pressure_line) + + if params['test_mode']: + break + + self.agent.feeds['pressure'].flush_buffer() + return True, 'Acquisition exited cleanly' + + + def stop_acq(self, session, params=None): + if self.take_data: + self.take_data = False + self.gauge.close() + print(f'port is now {str(self.gauge.connection.is_open)}') + return True, 'Requested to stop taking data.' + else: + return False, 'Acq is not currently running.' + + +def make_parser(parser=None): + """ + Makes an understandable accumulation of arguments for agent with site_config + """ + if parser is None: + parser = argparse.ArgumentParser() + + pgroup = parser.add_argument_group('Agent Options') + pgroup.add_argument('--port', type=str, help="Path to USB for the Teledyne Pressure Gauge") + pgroup.add_argument('--baud', type=int, default =19200) + pgroup.add_argument('--sampling_frequency', type=float, help='Sampling frequency for data acquisition', default = 2.5) + pgroup.add_argument("--mode", type=str, default='acq', choices=['acq', 'test']) + + return parser + +def main(args = None): + parser = make_parser() + args = site_config.parse_args(agent_class='TeledyneAgent', + parser=parser, + args=args) + + init_params = True + if args.mode == 'test': + init_params = {'test_mode': True} + + agent, runner = ocs_agent.init_site_agent(args) + teledyne_agent = Teledyne_Agent(agent, args.port, args.sampling_frequency) + agent.register_process('acq', teledyne_agent.acq, + teledyne_agent.stop_acq, + startup= init_params) + agent.register_task('close', teledyne_agent.stop_acq) + runner.run(agent, auto_reconnect=True) + +if __name__ == '__main__': + main() diff --git a/pcs/drivers/adam.py b/pcs/drivers/adam.py new file mode 100644 index 0000000..9d5efa3 --- /dev/null +++ b/pcs/drivers/adam.py @@ -0,0 +1,74 @@ +from serial import Serial #, EIGHTBITS, STOPBITS_ONE, PARITY_NONE +import serial +import time +#import math +#import usb.core +#import usb.util +#from twisted.internet import threads, reactor + +class Module: + """ + Allows communication to Dymo Module. + Contains list of inputs which can be read from. + """ + def __init__(self, port="/dev/ADAM"): + self.port = port + self.device = None + + def connect(self): + """Call this via deferToThread, not in __init__.""" + self.device = Serial(self.port) + + def read_weight(self): + """Returns a Deferred — safe to call from the reactor thread.""" + # return threads.deferToThread(self._blocking_read) + +# def _blocking_read(self): + self.device.write(b'P\r\n') + # time.sleep() is OK here because we're in a thread, not the reactor + #import time; time.sleep(0.1) + #read = self.device.readline().decode().strip() + #parts = read.split() + try: + read = self.device.readline().decode().strip() + parts = read.split() + return float(parts[1]) + except (ValueError, IndexError): + return -99 + + #def __init__(self, port="/dev/ADAM"): + """ + Establish Serial communication. + """ + # self.port = port + # self.device = Serial(self.port) + #print(self.device) + + # was it found? + # if self.device is None: + # return None + + # use the first/default configuration + #try: + # device.set_configuration() + #except Exception: + # return None + + #return self.device + + +# def read_weight(self): + """ + Sends command to read weight from scale interface. + """ + # self.device.write(b'P\r\n') + #time.sleep(0.1) + # read = self.device.readline().decode().strip() + # value = float(read.split()[1]) + # unit = read.split()[2] + #print(value, unit) + # try: + # return value + # except ValueError: + # print(value) + # return(-99) diff --git a/pcs/drivers/dymo.py b/pcs/drivers/dymo.py new file mode 100644 index 0000000..cad82f4 --- /dev/null +++ b/pcs/drivers/dymo.py @@ -0,0 +1,100 @@ +import time +import math +import usb.core +import usb.util + +STATE_STABLE_ZERO = 2 +STATE_UNSTABLE_POSITIVE = 3 +STATE_STABLE_POSITIVE = 4 +STATE_NEGATIVE = 5 # returned both for stable and unstable values + +UNITS_KG = 3 +UNITS_LB = 12 + +SCALE_TENTHS = 255 +SCALE_HUNDREDTHS = 256 + +SLEEP_NO_DEVICE = 1 +SLEEP_STABLE = 1 +SLEEP_UNSTABLE = 0.1 + +def parse_reading(data): + + state_flag = data[1] + stable_states = [STATE_STABLE_ZERO, STATE_STABLE_POSITIVE] + is_stable = state_flag in stable_states + is_negative = state_flag == STATE_NEGATIVE + + scale_flag = data[3] + if scale_flag == SCALE_TENTHS: + scale_factor = 0.1 + elif scale_flag == SCALE_HUNDREDTHS: + scale_factor = 0.01 + else: + scale_factor = 100 # want an obviously wrong value + + weight = scale_factor * (data[4] + (256 * data[5])) + if is_negative: + weight = weight * -1 + + unit_flag = data[2] + if unit_flag == UNITS_KG: + unit = 'kg' + elif unit_flag == UNITS_LB: + unit = 'lbs' + else: + unit = unit_flag + + return { + 'is_stable': is_stable, + 'weight': math.trunc(weight*10)/10, + 'unit': unit + } + +class Module: + """ + Allows communication to Dymo Module. + Contains list of inputs which can be read from. + """ + def __init__(self, vid=0x0922, pid=0x8009): + """ + Establish USB communication. + """ + + self.device = usb.core.find(idVendor = vid, idProduct = pid) + #print(self.device) + + # was it found? + if self.device is None: + return None + + # use the first/default configuration + #try: + # device.set_configuration() + #except Exception: + # return None + + #return self.device + + + def read_weight(self): + """ + Sends command to read weight from scale interface. + """ + + if self.device.is_kernel_driver_active(0): + try: + self.device.detach_kernel_driver(0) + print("Kernel driver detached") + except usb.core.USBError as e: + print(f"Could not detach: {e}") + + endpoint = self.device[0][(0, 0)][0] + #print(endpoint) + data = self.device.read(endpoint.bEndpointAddress, endpoint.wMaxPacketSize) + #print(data) + data = parse_reading(data) + #print(data) + weight = data['weight'] + + return weight diff --git a/pcs/plugin.py b/pcs/plugin.py index d6049ca..8b9c60b 100644 --- a/pcs/plugin.py +++ b/pcs/plugin.py @@ -3,5 +3,10 @@ 'LS325Agent': {'module': 'pcs.agents.lakeshore325.agent', 'entry_point': 'main'}, 'RaritanAgent': {'module': 'pcs.agents.raritan_pdu.agent', 'entry_point': 'main'}, 'ACUAgent': {'module': 'pcs.agents.acu_interface.agent', 'entry_point': 'main'}, - 'Bluefors_TC_Agent': {'module': 'pcs.agents.bluefors_tc.agent', 'entry_point': 'main'} + 'Bluefors_TC_Agent': {'module': 'pcs.agents.bluefors_tc.agent', 'entry_point': 'main'}, + 'AdamCPWAgent': {'module': 'pcs.agents.adam_cpw.agent', 'entry_point': 'main'}, + 'DymoAgent': {'module': 'pcs.agents.dymo.agent', 'entry_point': 'main'}, + 'TeledyneAgent': {'module': 'pcs.agents.teledyne.agent', 'entry_point': 'main'}, + 'PfeifferAgent': {'module': 'pcs.agents.pfeiffer_singlegauge.agent', 'entry_point': 'main'}, + 'Adam_Agent':{'module': 'pcs.agents.adam.agent', 'entry_point': 'main'} } diff --git a/requirements.txt b/requirements.txt index ac9194e..5a2fcbd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -18,6 +18,11 @@ numpy # For ACU interface agent astropy pyyaml +soaculib @ git+https://github.com/simonsobs/soaculib.git@master + +# For Dymo scale agent +pyusb +libusb # Docs # see docs/requirements.txt