diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 542f1ca..7e80239 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -17,7 +17,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install pyspark pandas pyarrow numpy py4j pytest coverage + pip install -r requirements.txt - name: Run tests with coverage run: | diff --git a/package-lock.json b/package-lock.json new file mode 100644 index 0000000..37c5277 --- /dev/null +++ b/package-lock.json @@ -0,0 +1,6 @@ +{ + "name": "testbricks", + "lockfileVersion": 3, + "requires": true, + "packages": {} +} diff --git a/package.json b/package.json new file mode 100644 index 0000000..0967ef4 --- /dev/null +++ b/package.json @@ -0,0 +1 @@ +{} diff --git a/publish.sh b/publish.sh new file mode 100755 index 0000000..a2d1f3e --- /dev/null +++ b/publish.sh @@ -0,0 +1,19 @@ +#!/bin/bash +set -e + +echo "Building testbrick package..." + +# Clean previous builds +rm -rf build/ dist/ *.egg-info/ + +# Install build dependencies +pip install setuptools wheel twine + +# Build the package +python setup.py sdist bdist_wheel + +# Publish to PyPI +echo "Publishing to PyPI..." +twine upload dist/* + +echo "Package published successfully!" \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..2cbe210 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,7 @@ +pyspark +pandas +pytest +pyarrow +numpy +py4j +coverage diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..a69f9d1 --- /dev/null +++ b/setup.py @@ -0,0 +1,31 @@ +from setuptools import setup, find_packages + +setup( + name="testbrick", + version="0.1.0", + description="A set of proxy objects to facilitate testing of Databricks notebooks in CI/CD pipelines", + author="Karan Gupta", + author_email="gkaran184@gmail.com", + packages=find_packages(where="src"), + package_dir={"": "src"}, + python_requires=">=3.8", + install_requires=[ + "pyspark", + "pandas", + "pyarrow", + "numpy", + "py4j", + ], + classifiers=[ + "Development Status :: 3 - Alpha", + "Intended Audience :: Developers", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", + ], +) \ No newline at end of file diff --git a/specs/workflow_sample.json b/specs/workflow_sample.json new file mode 100644 index 0000000..a2b2143 --- /dev/null +++ b/specs/workflow_sample.json @@ -0,0 +1,109 @@ +{ + "name": "gaming-ai-analytics", + "email_notifications": { + "no_alert_for_skipped_runs": false + }, + "webhook_notifications": {}, + "timeout_seconds": 0, + "schedule": { + "quartz_cron_expression": "55 0 20 * * ?", + "timezone_id": "Asia/Kolkata", + "pause_status": "PAUSED" + }, + "max_concurrent_runs": 1, + "tasks": [ + { + "task_key": "dimensions", + "run_if": "ALL_SUCCESS", + "notebook_task": { + "notebook_path": "/Workspace/Users/karangupta184@live.com/gaming-data-analytics/src/notebooks/modelling/auxillary_dims", + "base_parameters": { + "catalog": "steam", + "environment": "analytics", + "raw_location": "/Volumes/steam/raw/landing_zone" + }, + "source": "WORKSPACE" + }, + "timeout_seconds": 0, + "email_notifications": {}, + "webhook_notifications": {} + }, + { + "task_key": "reviews_fact", + "run_if": "ALL_SUCCESS", + "notebook_task": { + "notebook_path": "/Workspace/Users/karangupta184@live.com/gaming-data-analytics/src/notebooks/modelling/reviews_fact", + "base_parameters": { + "catalog": "steam", + "environment": "analytics", + "raw_location": "/Volumes/steam/raw/landing_zone", + "batch_size": "50000", + "ai_endpoint": "https://dbc-eaacd3e9-d6a7.cloud.databricks.com/serving-endpoints/databricks-llama-4-maverick/invocations" + }, + "source": "WORKSPACE" + }, + "timeout_seconds": 0, + "email_notifications": {}, + "webhook_notifications": {} + }, + { + "task_key": "quality_checks", + "depends_on": [ + { + "task_key": "dimensions" + }, + { + "task_key": "reviews_fact" + } + ], + "run_if": "ALL_SUCCESS", + "notebook_task": { + "notebook_path": "/Workspace/Users/karangupta184@live.com/gaming-data-analytics/src/notebooks/data_quality", + "base_parameters": { + "catalog": "steam", + "environment": "analytics" + }, + "source": "WORKSPACE" + }, + "timeout_seconds": 0, + "email_notifications": {}, + "webhook_notifications": {}, + "environment_key": "quality_checks_environment" + }, + { + "task_key": "semantic_layer", + "depends_on": [ + { + "task_key": "quality_checks" + } + ], + "run_if": "ALL_SUCCESS", + "notebook_task": { + "notebook_path": "/Workspace/Users/karangupta184@live.com/gaming-data-analytics/src/notebooks/modelling/semantic_layer", + "base_parameters": { + "catalog": "steam", + "environment": "analytics" + }, + "source": "WORKSPACE" + }, + "timeout_seconds": 0, + "email_notifications": {}, + "webhook_notifications": {} + } + ], + "queue": { + "enabled": true + }, + "environments": [ + { + "environment_key": "quality_checks_environment", + "spec": { + "dependencies": [ + "databricks-labs-dqx==0.10.0" + ], + "environment_version": "4" + } + } + ], + "performance_target": "PERFORMANCE_OPTIMIZED" +} \ No newline at end of file