Darliewithrow · Copilot · Nov 10, 2025 · Nov 10, 2025 · Nov 10, 2025 · Nov 10, 2025
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,38 @@
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# Virtual environments
+venv/
+ENV/
+env/
+.venv
+
+# IDEs
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+
+# OS
+.DS_Store
+Thumbs.db
diff --git a/README.md b/README.md
@@ -1 +1,111 @@
-https://app.chime.com/link/qr?u=Darlie-Withrow
+# Actor Data Parser
+
+This repository contains a Python script to parse actor data from a formatted string.
+
+## Problem Statement
+
+Parse actor information from the following format:
+```
+actor:Daractor:Darliewithrowliewithrowactor:Darliewithrowactor:Darliewithrow
+```
+
+## Solution
+
+The `actor_parser.py` script parses the input string by splitting on the `actor:` delimiter, then intelligently filters out corrupted and fragmented actor names to extract only valid actor names.
+
+### Usage
+
+Run with default data:
+```bash
+python3 actor_parser.py
+```
+
+Run with data from a file:
+```bash
+python3 actor_parser.py actor_data.txt
+```
+
+### Output
+
+```
+Parsed Actors:
+1. Darliewithrow
+
+Total unique actors: 1
+```
+
+## Testing
+
+Run the test suite:
+
+```bash
+python3 test_actor_parser.py
+```
+
+## Implementation Details
+
+- The parser splits the input string by `actor:` delimiter
+- Identifies and filters out corrupted names with internal repetitions
+- Removes fragment names that are prefixes of longer valid names
+- Maintains unique actors in order of first appearance
+- Returns a list of valid actor names
+
+## License
+
+This project is licensed under the Apache License 2.0 - see the [LICENSE](LICENSE) file for details.
+# Actor Data Parser
+
+This repository contains a Python script to parse actor data from a formatted string.
+
+## Problem Statement
+
+Parse actor information from the following format:
+```
+actor:Daractor:Darliewithrowliewithrowactor:Darliewithrowactor:Darliewithrow
+```
+
+## Solution
+
+The `actor_parser.py` script parses the input string by splitting on the `actor:` delimiter, then intelligently filters out corrupted and fragmented actor names to extract only valid actor names.
+
+### Usage
+
+Run with default data:
+```bash
+python3 actor_parser.py
+```
+
+Run with data from a file:
+```bash
+python3 actor_parser.py actor_data.txt
+```
+
+### Output
+
+```
+Parsed Actors:
+1. Darliewithrow
+
+Total unique actors: 1
+```
+
+## Testing
+
+Run the test suite:
+
+```bash
+python3 test_actor_parser.py
+```
+
+## Implementation Details
+
+- The parser splits the input string by `actor:` delimiter
+- Identifies and filters out corrupted names with internal repetitions
+- Removes fragment names that are prefixes of longer valid names
+- Maintains unique actors in order of first appearance
+- Returns a list of valid actor names
+
+## License
+
+This project is licensed under the Apache License 2.0 - see the [LICENSE](LICENSE) file for details.
+https://app.chime.com/link/qr?u=Darlie-Withrow
diff --git a/actor_data.txt b/actor_data.txt
@@ -0,0 +1 @@
+actor:Daractor:Darliewithrowliewithrowactor:Darliewithrowactor:Darliewithrow
diff --git a/actor_parser.py b/actor_parser.py
@@ -0,0 +1,104 @@
+#!/usr/bin/env python3
+"""
+Actor Data Parser
+
+This script parses actor data from a formatted string.
+The input format is: actor:<name>actor:<name>...
+"""
+
+import re
+import sys
+from typing import List, Set
-from typing import List, Set
+from typing import List
-from typing import List, Set
+from typing import List
+
+
+def parse_actors(data: str) -> List[str]:
+    """
+    Parse actor names from the input string.
+
+    Args:
+        data: Input string in format "actor:<name>" where name may be corrupted
+
+    Returns:
+        List of valid actor names, filtering out corrupted partial names
+    """
+    if not data:
+        return []
+
+    # Split by 'actor:' and filter out empty strings
+    parts = data.split('actor:')
+    actors = [part.strip() for part in parts if part.strip()]
+
+    # Remove duplicates while preserving order
+    seen = set()
+    unique_actors = []
+    for actor in actors:
+        if actor not in seen:
+            seen.add(actor)
+            unique_actors.append(actor)
+
+    # First pass: identify corrupted names (those with internal repetition)
+    corrupted_actors = set()
+    for actor in unique_actors:
+        # Check if this is a corrupted name with internal repetition
+        # For example, "Darliewithrowliewithrow" has "liewithrow" repeated
+        for other_actor in unique_actors:
+            if actor != other_actor and actor.startswith(other_actor):
+                # Check if removing the prefix leaves a suffix that overlaps
+                suffix = actor[len(other_actor):]
+                # If the suffix is part of the prefix actor, this is likely corruption
+                if suffix and suffix in other_actor:
+                    corrupted_actors.add(actor)
+                    break
+
+    # Second pass: filter based on validity
+    filtered_actors = []
+    for actor in unique_actors:
+        # Skip already identified corrupted names
+        if actor in corrupted_actors:
+            continue
+
+        # Check if this is a prefix of another NON-corrupted actor (likely a fragment)
+        is_prefix_of_valid = False
+        for other_actor in unique_actors:
+            if (actor != other_actor and 
+                other_actor.startswith(actor) and 
+                other_actor not in corrupted_actors and
+                len(actor) <= len(other_actor) / 2):  # Only if significantly shorter
+                # This actor is a prefix of another valid one, likely incomplete
+                is_prefix_of_valid = True
+                break
+
+        if not is_prefix_of_valid:
+            filtered_actors.append(actor)
+
+    return filtered_actors
+
+
+def main():
+    # Check if a file path is provided as argument
+    if len(sys.argv) > 1:
+        file_path = sys.argv[1]
+        try:
+            with open(file_path, 'r') as f:
+                input_data = f.read().strip()
+            print(f"Reading from file: {file_path}")
+        except FileNotFoundError:
+            print(f"Error: File '{file_path}' not found")
+            sys.exit(1)
+    else:
+        # The problem statement data
+        input_data = "actor:Daractor:Darliewithrowliewithrowactor:Darliewithrowactor:Darliewithrow"
+
+    print(f"Input data: {input_data}\n")
+
+    actors = parse_actors(input_data)
+
+    print("Parsed Actors:")
+    for i, actor in enumerate(actors, 1):
+        print(f"{i}. {actor}")
+
+    print(f"\nTotal unique actors: {len(actors)}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/test_actor_parser.py b/test_actor_parser.py
@@ -0,0 +1,60 @@
+#!/usr/bin/env python3
+"""
+Test suite for Actor Data Parser
+"""
+
+import unittest
+from actor_parser import parse_actors
+
+
+class TestActorParser(unittest.TestCase):
+    """Test cases for the actor parser"""
+
+    def test_simple_actors(self):
+        """Test parsing simple actor list"""
+        data = "actor:John actor:Jane actor:Bob"
+        expected = ["John", "Jane", "Bob"]
+        self.assertEqual(parse_actors(data), expected)
+
+    def test_duplicate_actors(self):
+        """Test that duplicate actors are removed"""
+        data = "actor:Alice actor:Bob actor:Alice"
+        expected = ["Alice", "Bob"]
+        self.assertEqual(parse_actors(data), expected)
+
+    def test_empty_string(self):
+        """Test parsing empty string"""
+        self.assertEqual(parse_actors(""), [])
+
+    def test_single_actor(self):
+        """Test parsing single actor"""
+        data = "actor:SingleActor"
+        expected = ["SingleActor"]
+        self.assertEqual(parse_actors(data), expected)
+
+    def test_problem_statement_data(self):
+        """Test the actual problem statement data"""
+        data = "actor:Daractor:Darliewithrowliewithrowactor:Darliewithrowactor:Darliewithrow"
+        result = parse_actors(data)
+        # Should parse into distinct actors
+        self.assertIsInstance(result, list)
+        self.assertGreater(len(result), 0)
+        # Check that Darliewithrow is in the results
+        self.assertIn("Darliewithrow", result)
+
+    def test_actors_with_whitespace(self):
+        """Test parsing actors with whitespace"""
+        data = "actor: SpaceActor  actor:NoSpace  "
+        result = parse_actors(data)
+        self.assertIn("SpaceActor", result)
+        self.assertIn("NoSpace", result)
+
+    def test_no_actor_prefix(self):
+        """Test string without actor prefix"""
+        data = "JustAName"
+        expected = ["JustAName"]
+        self.assertEqual(parse_actors(data), expected)
+
+
+if __name__ == "__main__":
+    unittest.main()
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		actor:Daractor:Darliewithrowliewithrowactor:Darliewithrowactor:Darliewithrow