Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Python
__pycache__/
*.py[cod]
*$py.class
*.so
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg

# Virtual environments
venv/
ENV/
env/
.venv

# IDEs
.vscode/
.idea/
*.swp
*.swo
*~

# OS
.DS_Store
Thumbs.db
112 changes: 111 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1 +1,111 @@
https://app.chime.com/link/qr?u=Darlie-Withrow
# Actor Data Parser

This repository contains a Python script to parse actor data from a formatted string.

## Problem Statement

Parse actor information from the following format:
```
actor:Daractor:Darliewithrowliewithrowactor:Darliewithrowactor:Darliewithrow
```

## Solution

The `actor_parser.py` script parses the input string by splitting on the `actor:` delimiter, then intelligently filters out corrupted and fragmented actor names to extract only valid actor names.

### Usage

Run with default data:
```bash
python3 actor_parser.py
```

Comment thread
Darliewithrow marked this conversation as resolved.
Run with data from a file:
```bash
python3 actor_parser.py actor_data.txt
```

### Output

```
Parsed Actors:
1. Darliewithrow

Total unique actors: 1
```

## Testing

Run the test suite:

```bash
python3 test_actor_parser.py
```

## Implementation Details

- The parser splits the input string by `actor:` delimiter
- Identifies and filters out corrupted names with internal repetitions
- Removes fragment names that are prefixes of longer valid names
- Maintains unique actors in order of first appearance
- Returns a list of valid actor names

## License

This project is licensed under the Apache License 2.0 - see the [LICENSE](LICENSE) file for details.
# Actor Data Parser

This repository contains a Python script to parse actor data from a formatted string.

## Problem Statement

Parse actor information from the following format:
```
actor:Daractor:Darliewithrowliewithrowactor:Darliewithrowactor:Darliewithrow
```

## Solution

The `actor_parser.py` script parses the input string by splitting on the `actor:` delimiter, then intelligently filters out corrupted and fragmented actor names to extract only valid actor names.

### Usage

Run with default data:
```bash
python3 actor_parser.py
```

Run with data from a file:
```bash
python3 actor_parser.py actor_data.txt
```

### Output

```
Parsed Actors:
1. Darliewithrow

Total unique actors: 1
```

## Testing

Run the test suite:

```bash
python3 test_actor_parser.py
```

## Implementation Details

- The parser splits the input string by `actor:` delimiter
- Identifies and filters out corrupted names with internal repetitions
- Removes fragment names that are prefixes of longer valid names
- Maintains unique actors in order of first appearance
- Returns a list of valid actor names

## License

This project is licensed under the Apache License 2.0 - see the [LICENSE](LICENSE) file for details.
https://app.chime.com/link/qr?u=Darlie-Withrow
1 change: 1 addition & 0 deletions actor_data.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
actor:Daractor:Darliewithrowliewithrowactor:Darliewithrowactor:Darliewithrow
104 changes: 104 additions & 0 deletions actor_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
#!/usr/bin/env python3
"""
Actor Data Parser

This script parses actor data from a formatted string.
The input format is: actor:<name>actor:<name>...
"""

import re
Comment thread
Darliewithrow marked this conversation as resolved.
Comment thread
Darliewithrow marked this conversation as resolved.
import sys
from typing import List, Set

Copilot AI Nov 10, 2025

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The Set type is imported but never used in this module. Consider removing it to keep imports clean.

Suggested change
from typing import List, Set
from typing import List

Copilot uses AI. Check for mistakes.


def parse_actors(data: str) -> List[str]:
"""
Parse actor names from the input string.

Args:
data: Input string in format "actor:<name>" where name may be corrupted

Returns:
List of valid actor names, filtering out corrupted partial names
"""
if not data:
return []

# Split by 'actor:' and filter out empty strings
parts = data.split('actor:')
actors = [part.strip() for part in parts if part.strip()]

# Remove duplicates while preserving order
seen = set()
unique_actors = []
for actor in actors:
if actor not in seen:
seen.add(actor)
unique_actors.append(actor)

# First pass: identify corrupted names (those with internal repetition)
corrupted_actors = set()
for actor in unique_actors:
# Check if this is a corrupted name with internal repetition
# For example, "Darliewithrowliewithrow" has "liewithrow" repeated
for other_actor in unique_actors:
if actor != other_actor and actor.startswith(other_actor):
# Check if removing the prefix leaves a suffix that overlaps
suffix = actor[len(other_actor):]
# If the suffix is part of the prefix actor, this is likely corruption
if suffix and suffix in other_actor:
corrupted_actors.add(actor)
break

# Second pass: filter based on validity
filtered_actors = []
for actor in unique_actors:
# Skip already identified corrupted names
if actor in corrupted_actors:
continue

# Check if this is a prefix of another NON-corrupted actor (likely a fragment)
is_prefix_of_valid = False
for other_actor in unique_actors:
if (actor != other_actor and
other_actor.startswith(actor) and
other_actor not in corrupted_actors and
len(actor) <= len(other_actor) / 2): # Only if significantly shorter
Comment thread
Darliewithrow marked this conversation as resolved.
# This actor is a prefix of another valid one, likely incomplete
is_prefix_of_valid = True
break

if not is_prefix_of_valid:
filtered_actors.append(actor)

return filtered_actors


def main():
# Check if a file path is provided as argument
if len(sys.argv) > 1:
file_path = sys.argv[1]
try:
with open(file_path, 'r') as f:
input_data = f.read().strip()
print(f"Reading from file: {file_path}")
except FileNotFoundError:
print(f"Error: File '{file_path}' not found")
sys.exit(1)
else:
# The problem statement data
input_data = "actor:Daractor:Darliewithrowliewithrowactor:Darliewithrowactor:Darliewithrow"

print(f"Input data: {input_data}\n")

actors = parse_actors(input_data)

print("Parsed Actors:")
for i, actor in enumerate(actors, 1):
print(f"{i}. {actor}")

print(f"\nTotal unique actors: {len(actors)}")


if __name__ == "__main__":
main()
60 changes: 60 additions & 0 deletions test_actor_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#!/usr/bin/env python3
"""
Test suite for Actor Data Parser
"""

import unittest
from actor_parser import parse_actors


class TestActorParser(unittest.TestCase):
"""Test cases for the actor parser"""

def test_simple_actors(self):
"""Test parsing simple actor list"""
data = "actor:John actor:Jane actor:Bob"
expected = ["John", "Jane", "Bob"]
self.assertEqual(parse_actors(data), expected)

def test_duplicate_actors(self):
"""Test that duplicate actors are removed"""
data = "actor:Alice actor:Bob actor:Alice"
expected = ["Alice", "Bob"]
self.assertEqual(parse_actors(data), expected)

def test_empty_string(self):
"""Test parsing empty string"""
self.assertEqual(parse_actors(""), [])

def test_single_actor(self):
"""Test parsing single actor"""
data = "actor:SingleActor"
expected = ["SingleActor"]
self.assertEqual(parse_actors(data), expected)

def test_problem_statement_data(self):
"""Test the actual problem statement data"""
data = "actor:Daractor:Darliewithrowliewithrowactor:Darliewithrowactor:Darliewithrow"
result = parse_actors(data)
# Should parse into distinct actors
self.assertIsInstance(result, list)
self.assertGreater(len(result), 0)
# Check that Darliewithrow is in the results
self.assertIn("Darliewithrow", result)

def test_actors_with_whitespace(self):
"""Test parsing actors with whitespace"""
data = "actor: SpaceActor actor:NoSpace "
result = parse_actors(data)
self.assertIn("SpaceActor", result)
self.assertIn("NoSpace", result)

def test_no_actor_prefix(self):
"""Test string without actor prefix"""
data = "JustAName"
expected = ["JustAName"]
self.assertEqual(parse_actors(data), expected)


if __name__ == "__main__":
unittest.main()
Loading