From ff93b4a84be6785d2228908f69bf5029cd0370d6 Mon Sep 17 00:00:00 2001
From: samanthacatonio <samanthacatonio@gmail.com>
Date: Fri, 26 Jun 2026 16:42:48 -0400
Subject: [PATCH] =?UTF-8?q?c=C3=B3digo?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .env                                          |   6 +
 IMDB/IMDB_diretores.py                        |  97 ++++++++++++++++
 IMDB/IMDB_genero.py                           | 101 ++++++++++++++++
 IMDB/IMDB_metascore.py                        | 108 ++++++++++++++++++
 IMDB/IMDB_metascore_genero.py                 | 105 +++++++++++++++++
 IMDB/IMDB_nota_ano.py                         |  75 ++++++++++++
 .../__pycache__/db_connection.cpython-314.pyc | Bin 0 -> 973 bytes
 IMDB/db_connection.py                         |  22 ++++
 .../__pycache__/db_connection.cpython-314.pyc | Bin 0 -> 974 bytes
 .../__pycache__/retrieve_data.cpython-314.pyc | Bin 0 -> 2520 bytes
 sales/client_query.py                         |  45 ++++++++
 sales/db_connection.py                        |  22 ++++
 sales/queries.py                              |  35 ++++++
 sales/retrieve_data.py                        |  50 ++++++++
 sales/test_retrieve_data.py                   |  25 ++++
 15 files changed, 691 insertions(+)
 create mode 100644 .env
 create mode 100644 IMDB/IMDB_diretores.py
 create mode 100644 IMDB/IMDB_genero.py
 create mode 100644 IMDB/IMDB_metascore.py
 create mode 100644 IMDB/IMDB_metascore_genero.py
 create mode 100644 IMDB/IMDB_nota_ano.py
 create mode 100644 IMDB/__pycache__/db_connection.cpython-314.pyc
 create mode 100644 IMDB/db_connection.py
 create mode 100644 sales/__pycache__/db_connection.cpython-314.pyc
 create mode 100644 sales/__pycache__/retrieve_data.cpython-314.pyc
 create mode 100644 sales/client_query.py
 create mode 100644 sales/db_connection.py
 create mode 100644 sales/queries.py
 create mode 100644 sales/retrieve_data.py
 create mode 100644 sales/test_retrieve_data.py

diff --git a/.env b/.env
new file mode 100644
index 0000000..8791ced
--- /dev/null
+++ b/.env
@@ -0,0 +1,6 @@
+DB_HOST="35.199.115.174"
+DB_USER="looqbox-challenge"
+DB_PASSWORD="looq-challenge"
+DB_NAME="looqbox-challenge"
+DB_PORT=3306
+
diff --git a/IMDB/IMDB_diretores.py b/IMDB/IMDB_diretores.py
new file mode 100644
index 0000000..b984159
--- /dev/null
+++ b/IMDB/IMDB_diretores.py
@@ -0,0 +1,97 @@
+import pandas as pd
+import matplotlib.pyplot as plt
+from matplotlib.colors import LinearSegmentedColormap
+from db_connection import engine
+
+
+def get_movies():
+    query = """
+        SELECT
+            Director,
+            Rating
+        FROM `looqbox-challenge`.IMDB_movies
+        WHERE Director IS NOT NULL;
+    """
+    return pd.read_sql(query, engine)
+
+
+def calculate_top_directors(df, min_movies=3, top_n=15):
+    return (
+        df.groupby("Director")
+        .filter(lambda x: len(x) >= min_movies)
+        .groupby("Director")
+        .agg(
+            Avg_Rating=("Rating", "mean"),
+            Count=("Rating", "count")
+        )
+        .reset_index()
+        .sort_values("Avg_Rating", ascending=False)
+        .head(top_n)
+    )
+
+
+def plot_top_directors(df_directors):
+    looqbox_cmap = LinearSegmentedColormap.from_list(
+        "looqbox", ["#B0B0B0", "#3DBE6E"]
+    )
+
+    df_directors = df_directors.sort_values("Avg_Rating", ascending=True)
+
+    n = len(df_directors)
+    colors = [looqbox_cmap(i / (n - 1)) for i in range(n)]
+
+    fig, ax = plt.subplots(figsize=(11, 7))
+
+    bars = ax.barh(
+        df_directors["Director"],
+        df_directors["Avg_Rating"] - 5,
+        color=colors,
+        edgecolor="white",
+        linewidth=0.5,
+        left=5
+    )
+
+    for bar, rating, count in zip(bars, df_directors["Avg_Rating"], df_directors["Count"]):
+        ax.text(
+            bar.get_x() + bar.get_width() + 0.02,
+            bar.get_y() + bar.get_height() / 2,
+            f"{rating:.2f}  ({count} filmes)",
+            va="center",
+            fontsize=9,
+            color="#444444"
+        )
+
+    avg = df_directors["Avg_Rating"].mean()
+    ax.axvline(
+        avg,
+        color="#e74c3c",
+        linestyle="--",
+        linewidth=1.5,
+        label=f"Group Average: {avg:.2f}"
+    )
+
+    ax.set_xlim(5, df_directors["Avg_Rating"].max() + 0.5)
+    ax.set_title(
+        "Most Consistent Directors by IMDb Rating",
+        fontsize=14, fontweight="bold", pad=15
+    )
+    ax.set_xlabel("Average IMDb Rating", fontsize=11)
+    ax.set_ylabel("Director", fontsize=11)
+    ax.legend(fontsize=10)
+    ax.grid(axis="x", alpha=0.3)
+    ax.spines["top"].set_visible(False)
+    ax.spines["right"].set_visible(False)
+    plt.tight_layout()
+    plt.show()
+
+def main():
+    movies = get_movies()
+    top_directors = calculate_top_directors(movies)
+
+    print(top_directors.to_string(index=False))
+
+    plot_top_directors(top_directors)
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/IMDB/IMDB_genero.py b/IMDB/IMDB_genero.py
new file mode 100644
index 0000000..a00cc06
--- /dev/null
+++ b/IMDB/IMDB_genero.py
@@ -0,0 +1,101 @@
+import pandas as pd
+import matplotlib.pyplot as plt
+from db_connection import engine
+from matplotlib.colors import LinearSegmentedColormap
+
+def get_movies():
+    query = """
+        SELECT
+            Title,
+            Genre,
+            Rating
+        FROM `looqbox-challenge`.IMDB_movies
+        WHERE Genre IS NOT NULL;
+    """
+    return pd.read_sql(query, engine)
+
+
+def expand_genres(df):
+    df["Genre"] = df["Genre"].str.split(",")
+    df = df.explode("Genre")
+    df["Genre"] = df["Genre"].str.strip()
+    return df
+
+
+def calculate_avg_by_genre(df):
+    return (
+        df.groupby("Genre")
+        .agg(
+            Avg_Rating=("Rating", "mean"),
+            Count=("Title", "count")
+        )
+        .reset_index()
+        .sort_values("Avg_Rating", ascending=False)
+    )
+
+
+def plot_rating_by_genre(df_genre):
+    fig, ax = plt.subplots(figsize=(11, 8))
+
+    looqbox_cmap = LinearSegmentedColormap.from_list(
+        "looqbox", ["#B0B0B0", "#3DBE6E"]
+    )
+
+    n = len(df_genre)
+    colors = [looqbox_cmap(i / (n - 1)) for i in range(n)]
+    colors = colors[::-1] 
+
+    bars = ax.barh(
+        df_genre["Genre"],
+        df_genre["Avg_Rating"] - 5,
+        color=colors,
+        edgecolor="white",
+        linewidth=0.5,
+        left=5
+    )
+
+    for bar, rating, count in zip(bars, df_genre["Avg_Rating"], df_genre["Count"]):
+        ax.text(
+            bar.get_x() + bar.get_width() + 0.02,
+            bar.get_y() + bar.get_height() / 2,
+            f"{rating:.2f}  ({count} filmes)",
+            va="center",
+            fontsize=9,
+            color="#444444"
+        )
+
+    avg = df_genre["Avg_Rating"].mean()
+    ax.axvline(
+        avg,
+        color="#e74c3c",
+        linestyle="--",
+        linewidth=1.5,
+        label=f"Overall Average: {avg:.2f}"
+    )
+
+    ax.set_xlim(5, df_genre["Avg_Rating"].max() + 0.5)
+    ax.set_title("Average IMDb Rating by Genre", fontsize=14, fontweight="bold", pad=15)
+    ax.set_xlabel("Average Rating", fontsize=11)
+    ax.set_ylabel("Genre", fontsize=11)
+    ax.legend(fontsize=10)
+    ax.invert_yaxis()
+    ax.grid(axis="x", alpha=0.3)
+    ax.spines["top"].set_visible(False)
+    ax.spines["right"].set_visible(False)
+    plt.tight_layout()
+    plt.show()
+
+
+def main():
+    movies = get_movies()
+    movies = expand_genres(movies)
+
+    avg_by_genre = calculate_avg_by_genre(movies)
+
+    print(avg_by_genre.to_string(index=False))
+
+    plot_rating_by_genre(avg_by_genre)
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/IMDB/IMDB_metascore.py b/IMDB/IMDB_metascore.py
new file mode 100644
index 0000000..3c5c892
--- /dev/null
+++ b/IMDB/IMDB_metascore.py
@@ -0,0 +1,108 @@
+import pandas as pd
+import matplotlib.pyplot as plt
+from matplotlib.colors import LinearSegmentedColormap
+from db_connection import engine
+
+
+def get_movies():
+    query = """
+        SELECT
+            Title,
+            Rating,
+            Metascore,
+            Genre
+        FROM `looqbox-challenge`.IMDB_movies
+        WHERE Metascore IS NOT NULL;
+    """
+    return pd.read_sql(query, engine)
+
+
+def calculate_difference(df):
+    df["Rating_scaled"] = df["Rating"] * 10
+    df["Difference"] = df["Rating_scaled"] - df["Metascore"]
+    return df
+
+
+def get_looqbox_cmap():
+    return LinearSegmentedColormap.from_list(
+        "looqbox", ["#B0B0B0", "#3DBE6E"]
+    )
+
+
+def plot_divergence(df):
+    top = (
+        df.reindex(df["Difference"].abs().sort_values(ascending=False).index)
+        .head(20)
+        .sort_values("Difference")
+    )
+
+    cmap = get_looqbox_cmap()
+    norm = (top["Difference"] - top["Difference"].min()) / (
+        top["Difference"].max() - top["Difference"].min()
+    )
+    colors = [cmap(v) for v in norm]
+
+    fig, ax = plt.subplots(figsize=(11, 8))
+
+    bars = ax.barh(
+        top["Title"],
+        top["Difference"],
+        color=colors,
+        edgecolor="white",
+        linewidth=0.5
+    )
+
+    for bar, val in zip(bars, top["Difference"]):
+        x = bar.get_width()
+        ax.text(
+            x + (0.5 if x >= 0 else -0.5),
+            bar.get_y() + bar.get_height() / 2,
+            f"{val:+.1f}",
+            va="center",
+            ha="left" if x >= 0 else "right",
+            fontsize=9,
+            color="#444444"
+        )
+
+    ax.axvline(0, color="#444444", linewidth=1)
+
+    ax.text(
+        top["Difference"].max() * 0.3, -2,
+        "► Público gostou mais",
+        color="#3DBE6E", fontsize=10, fontweight="bold"
+    )
+    ax.text(
+        top["Difference"].min() * 0.9, -2,
+        "◄ Crítica gostou mais",
+        color="#B0B0B0", fontsize=10, fontweight="bold"
+    )
+
+    ax.set_title(
+        "Audience vs Critics: Who Liked It More?",
+        fontsize=14, fontweight="bold", pad=15
+    )
+    ax.set_xlabel("Difference (Audience Score - Critics Score)", fontsize=11)
+    ax.grid(axis="x", alpha=0.3)
+    ax.spines["top"].set_visible(False)
+    ax.spines["right"].set_visible(False)
+    plt.tight_layout()
+    plt.show()
+
+
+def main():
+    movies = get_movies()
+    movies = calculate_difference(movies)
+
+    print("Top 10 Biggest Disagreements:\n")
+    print(
+        movies.reindex(movies["Difference"].abs().sort_values(ascending=False).index)
+        [["Title", "Rating", "Metascore", "Difference"]]
+        .head(10)
+        .to_string(index=False)
+    )
+
+    plot_divergence(movies)
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/IMDB/IMDB_metascore_genero.py b/IMDB/IMDB_metascore_genero.py
new file mode 100644
index 0000000..3491437
--- /dev/null
+++ b/IMDB/IMDB_metascore_genero.py
@@ -0,0 +1,105 @@
+import pandas as pd
+import matplotlib.pyplot as plt
+from matplotlib.colors import LinearSegmentedColormap
+from db_connection import engine
+
+
+def get_movies():
+    query = """
+        SELECT
+            Title,
+            Genre,
+            Rating,
+            Metascore
+        FROM `looqbox-challenge`.IMDB_movies
+        WHERE Genre IS NOT NULL
+          AND Metascore IS NOT NULL;
+    """
+    return pd.read_sql(query, engine)
+
+
+def expand_genres(df):
+    df["Genre"] = df["Genre"].str.split(",")
+    df = df.explode("Genre")
+    df["Genre"] = df["Genre"].str.strip()
+    return df
+
+
+def calculate_correlation_by_genre(df):
+    return (
+        df.groupby("Genre")
+        .filter(lambda x: len(x) >= 20)
+        .groupby("Genre")
+        .apply(lambda x: x["Rating"].corr(x["Metascore"] / 10))
+        .reset_index()
+        .rename(columns={0: "Correlation"})
+        .sort_values("Correlation", ascending=False)
+    )
+
+
+def plot_correlation_by_genre(df_corr):
+    looqbox_cmap = LinearSegmentedColormap.from_list(
+        "looqbox", ["#B0B0B0", "#3DBE6E"]
+    )
+
+    n = len(df_corr)
+    colors = [looqbox_cmap(i / (n - 1)) for i in range(n)]
+    colors = colors[::-1]
+
+    fig, ax = plt.subplots(figsize=(11, 7))
+
+    bars = ax.barh(
+        df_corr["Genre"],
+        df_corr["Correlation"],
+        color=colors,
+        edgecolor="white",
+        linewidth=0.5,
+        left=0
+    )
+
+    for bar, val in zip(bars, df_corr["Correlation"]):
+        ax.text(
+            bar.get_width() + 0.005,
+            bar.get_y() + bar.get_height() / 2,
+            f"{val:.2f}",
+            va="center",
+            fontsize=9,
+            color="#444444"
+        )
+    avg = df_corr["Correlation"].mean()
+    ax.axvline(
+        avg,
+        color="#e74c3c",
+        linestyle="--",
+        linewidth=1.5,
+        label=f"Overall Average: {avg:.2f}"
+    )
+
+    ax.set_title(
+        "Audience vs Critics Correlation by Genre",
+        fontsize=14, fontweight="bold", pad=15
+    )
+    ax.set_xlabel("Correlation (Rating vs Metascore)", fontsize=11)
+    ax.set_ylabel("Genre", fontsize=11)
+    ax.legend(fontsize=10)
+    ax.invert_yaxis()
+    ax.grid(axis="x", alpha=0.3)
+    ax.spines["top"].set_visible(False)
+    ax.spines["right"].set_visible(False)
+    plt.tight_layout()
+    plt.show()
+
+
+def main():
+    movies = get_movies()
+    movies = expand_genres(movies)
+
+    corr_by_genre = calculate_correlation_by_genre(movies)
+
+    print(corr_by_genre.to_string(index=False))
+
+    plot_correlation_by_genre(corr_by_genre)
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/IMDB/IMDB_nota_ano.py b/IMDB/IMDB_nota_ano.py
new file mode 100644
index 0000000..f5add1e
--- /dev/null
+++ b/IMDB/IMDB_nota_ano.py
@@ -0,0 +1,75 @@
+import pandas as pd
+import matplotlib.pyplot as plt
+from matplotlib.colors import LinearSegmentedColormap
+from db_connection import engine
+
+
+def get_movies():
+    query = """
+        SELECT
+            Title,
+            Year,
+            Rating
+        FROM `looqbox-challenge`.IMDB_movies
+        WHERE Year IS NOT NULL;
+    """
+    return pd.read_sql(query, engine)
+
+
+def plot_rating_by_year(df):
+    avg_by_year = (
+        df.groupby("Year")["Rating"]
+        .mean()
+        .reset_index()
+    )
+
+    looqbox_cmap = LinearSegmentedColormap.from_list(
+        "looqbox", ["#B0B0B0", "#3DBE6E"]
+    )
+
+    ratings = avg_by_year["Rating"]
+    norm = (ratings - ratings.min()) / (ratings.max() - ratings.min())
+    colors = [looqbox_cmap(v) for v in norm]
+
+    fig, ax = plt.subplots(figsize=(12, 5))
+
+    ax.plot(
+        avg_by_year["Year"],
+        avg_by_year["Rating"],
+        color="#B0B0B0",
+        linewidth=2,
+        zorder=1
+    )
+
+    for x, y, c in zip(avg_by_year["Year"], avg_by_year["Rating"], colors):
+        ax.scatter(x, y, color=c, s=80, zorder=2, edgecolors="white", linewidths=0.8)
+        ax.text(x, y + 0.015, f"{y:.2f}", ha="center", fontsize=8, color="#444444")
+
+    avg = ratings.mean()
+    ax.axhline(
+        avg,
+        color="#e74c3c",
+        linestyle="--",
+        linewidth=1.5,
+        label=f"Overall Average: {avg:.2f}"
+    )
+
+    ax.set_title("Average IMDb Rating by Year", fontsize=14, fontweight="bold", pad=15)
+    ax.set_xlabel("Year", fontsize=11)
+    ax.set_ylabel("Average Rating", fontsize=11)
+    ax.set_ylim(ratings.min() - 0.1, ratings.max() + 0.1)
+    ax.legend(fontsize=10)
+    ax.grid(alpha=0.3)
+    ax.spines["top"].set_visible(False)
+    ax.spines["right"].set_visible(False)
+    plt.tight_layout()
+    plt.show()
+
+
+def main():
+    movies = get_movies()
+    plot_rating_by_year(movies)
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/IMDB/__pycache__/db_connection.cpython-314.pyc b/IMDB/__pycache__/db_connection.cpython-314.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e78b112473651a794ab622ce2e27f086e1cb12b2
GIT binary patch
literal 973
zcma)4QESss6h1fYk~V4LrpOqRZN)LE0|&AfhZH+HTwz;Fx}gt=rAx0f(k0tXHmxY+
z!3W*T@+5s6_#pco`vXFK7}0^^lW&vl2Y7C`6cGv@xZgSFyWctI-kVHostjmvzT}@D
z6M&yQiLJYX$=L+%1n>lOQUafQp+pu1Ul`|cqLf%n`pI!Hg)Gco)?x;gxIXTOJ_~ws
z7Q1ot#lD(>o<{1c5qp|cUyan$r2A@8J&mlV&2$EI*_06#Q|!hPV#Z8m;I8Z|W-<$=
z(yPem>M;1KPHsX@`^7J{k|UAQbnA^;!}Y1Ni>YccTK8#<Ivch_BdIuFTP#;syFsO_
zUj!?K<%KS@R$b5|1vQmIwfd;67her-w!H1;^j53u=VoRiG8d5t5t*_0#MlH361dK8
zBLrZ?JXARQkHy~!&rMcxLL|nAvs}lamVXFmyyE;Zf;DHXd6ap^4x&+ZXpz~w)F@J~
z{?vB8i7IW<qfbX4U1Q00x7Xd5@z%uZ(V^womTQ!ji}OZfy>{8%cAYz0tw`IT{$CWk
zhQqGo@~$8R@J$)o&%e)il+n-0Q8t8JPKyNBi^PkkW&B=|jBitKTF>ZaizPWBVpn{4
zHh>8eIY=B*9MT9;rdU`l%onN)wKaW-X`JVi*&qi#hFwL7QujCA#!i#w*$AfmpY=W<
zoF)h%Um<-0V<#~A1C+PwcR914+slQejyx7f$N!&cgZuWL-Bvr=RG^%SAPt8vLc6_v
xFnXXKi_$wOXoV!a6^^vC?T77qIC@~}Gyz2YErUE9zV4(a1MyrW<VN@0e*iup(RKg;

literal 0
HcmV?d00001

diff --git a/IMDB/db_connection.py b/IMDB/db_connection.py
new file mode 100644
index 0000000..1ae7ea9
--- /dev/null
+++ b/IMDB/db_connection.py
@@ -0,0 +1,22 @@
+import os
+from dotenv import load_dotenv
+from sqlalchemy import create_engine
+
+load_dotenv()
+
+DB_HOST = os.getenv("DB_HOST")
+DB_PORT = os.getenv("DB_PORT")
+DB_NAME = os.getenv("DB_NAME")
+DB_USER = os.getenv("DB_USER")
+DB_PASSWORD = os.getenv("DB_PASSWORD")
+
+DATABASE_URL = (
+    f"mysql+pymysql://{DB_USER}:{DB_PASSWORD}"
+    f"@{DB_HOST}:{DB_PORT}/{DB_NAME}"
+)
+
+engine = create_engine(DATABASE_URL)
+
+
+def get_connection():
+    return engine.connect()
\ No newline at end of file
diff --git a/sales/__pycache__/db_connection.cpython-314.pyc b/sales/__pycache__/db_connection.cpython-314.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6c077e731068d3086d9afc9be2843a46b47d5cea
GIT binary patch
literal 974
zcma)4QA^uU6h1etNt-m8ZVVBJS|*N8m@s^qY_dva7Mv}qGK6HMP48SrO*Dya9ec>b
z9yVXh)99nHhvDz=4-EPucM~Ri+M7dvz|O6vj6q=s?sv}l?sv|)_a@VtDg)YS{n4vq
z0`QY3v2}MaIiJ9t5T1ihiV$!w6v>hh2;)3X6cbCyAUO`EkcHW+TFjsv*T?<PXF*TS
zVmEGq*jF>q(@1?aVo#ImtC4z|bYD%Xr;+uvna+SNn=+zeirrX3%$TVR+?NBzOlHAU
zdKDR69fm;F$!*AKzxbtAa)c>Ox7MgO+<-c}n5vdXYXPlNXTx?VlL`yfrBY?J8<b1>
zWw4Tey4YpbDvLT(P*cuVD$h!K;qBmN%inHJZ?(F9Zf1s&IYu5cGGp<Hu?ZL?aGl>p
z2*HSXD0B87i@y_|o2=vnCdP-eT*skSa0utT;$j`anlsjX>iNbFqET{a!LxU%QJ{Y9
zh3)zi722dnUyeSz#<J^f*WEYq*2L=3q2<_?YxuP$^^Hcodez=`oqJm?rftyRFUq@t
z({AGOuOWo+T^ZV+|2W@KM!zOUy&>ds8WUWfiI;Ed@ryATAE(x|Uee8$m*j+qUGe3;
z0ZcrRgTx`lA&tN?h5TxMAzxXnuIbC3#(6&38|1*pc-IkF>fxr_*lE&vZv<2R)B2wf
z&Ju)>Z;(ELu@jg)1?9c^L(c5y_Ht3NBaemB@&9Mq;J&?Qx7Cg|6)I;UNW;<V$Zl^R
wj2@`RqVz!uTM>!wL?i8N`$_u&jvkmgO8`-S%ODR&?>gzpP`nTcxz#=QAJtFNWB>pF

literal 0
HcmV?d00001

diff --git a/sales/__pycache__/retrieve_data.cpython-314.pyc b/sales/__pycache__/retrieve_data.cpython-314.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5d8ad62a367d61332d824f6dd10c83495224e2ed
GIT binary patch
literal 2520
zcmb7GT}%{59G~62kLBPF4z5*!;}(#QD@xUp)FNJk+@6}~fy;SlT{xH95!W1h$L^jL
zY78$;h?<x*ZAzmiBsHm~eek_LG*QvV1*DMenUJ<GeIwD-q`q`!xjpWviFPjR{6BvG
zkD32}X6W#^T>y>VFG3f-a{%xMW~@Qxi5GhiF%3olM;rwjfoAe3d4$ra0G3flZAa{y
zJped6XWIr+gy!IgW)O7zh0UWu!mw%Zl7?mnl*c3)CaJt%JkjV6^DpMncN)Y2+z&uK
z;0dh6BMx{g573pT+BQ^RtjWaM^DN4bcUp8Dbme^v;22v*1=vSg`twAb_)k>ge?%pV
zq7s(ZcZf>DFSo3f<jrc}WZ$}0swiq<EpLy?yREH=7pz9W{!;*qdChFzq$l@(4S-l-
z>MJdK&StIkJn=@a1okS{V5>Me0(R9}mZ+**_)aoIa`p`MA;{QJ4|q%)igWBLWCK8k
z8nSWDR^$ZL%R<mw5I(NVM3B(zjX1@*N)cNl$8B8shQ$a`lA(>O!16l4S6O)vR9n>M
z+z7hz%mE@nWXUX*C`dAP-e(~P-nXdDxe@R?EG|x2yEzo&+#l!9NXEed{Oii|0T6dI
z0|s<}!Wlqkq8><O0Qf<`{GCFG1C$>~1OxcOo;!fpiIEv6@3&%do|v=vBm_WFz6};?
zbKYEQmh_{nGR{w(&G}g(q<qw1Wpt_|qol=bQcEP$B4oz1s>X~$Mvxgv*5DXaIvEZo
zp>z?7-ON-_)zS)lCul$vv?6~@N>a5FU(roKGA(O@Br|)Nw8HFTutM!-&a|CIWB2jn
zyTjqOVXP{V>|#pvQ=hq*qA|!GWBX#v2PMJ?*MA&)7ZTQ@RUrk{63oyMmSa&S2bsME
z`BWp*8x1o9Tz~jvUo6qrA7+uh+j4?;&ZsEnru?SCV62~G3-HaoGhCbWREG%-_r_S}
z5E~m}*(lSkYJ#FA%nibUyP+(a95%Ex25m|4g`E);VO-7Pw*US1b>>7CDwE7PK}zAe
z1*;5~q)IX}mXpw+P-%^_*klH>ijr2)t&d29P9$Vu93~Qm4cE$`QcyNraUqr6fGIZQ
z+i;><p@%^;$c$*v3KYbII+0Rv(;H>%r6kN~Qd&0b8O00|Mw=Z$nZY2fI1om`x4vP+
zD>Lj9I7tQHqlOK;HAwNCK|mG10<EpBW;I`)MohAsKdC}R<+JF^_scM>poZkbP!-Ne
zX_Xm-DfsY*hu^39W9jt7X!??+Vqy>PC1oj@=FLL$3e*${UW9lvh0e^R;VzLV0Tc|U
zss4ywq-n6`tXXshesbOKc~rB~&MvmIInOq|mVG2X?OBSPT8Rh?5kY51_3g<ekEqk)
zn$x@J^xwH~zxKi4N^9?8Yp-e0_bB|db}1ZN2@fxXhxNV@eS2cbb5^I%t~tvWo!fGi
zJ7#y@v8`0LELOJUs_XQIgAc@Cx_-W}5<0OEI-wur^!mZ2>X=>;%awa(v}>Qt64yhw
z2IqV?Pv7<48qQVM>;CbjYFV$4ivX*>?X%u*4$sEs8gCEjH7zsYb(_syG0m>KfTwCY
zva!5X+O2!Lmg%k;`)cFvnJWu)!^Sd9#hzt)4^jkzGnW_W`i*o=MQE80VMX&?@9ocL
z9Hx5J--tsYZfb$9UAH;y4LNtsjghM(D{lWYw|~x;^VWP@_f_4k*j&raGk14B^M+;)
zy`q4pVXe|TQ})V^s9)WFlX?ATx$h+)kqeYyRny$wxn|wd`iySNReEn+zIyrEm1Wxh
zdfko?|EN8vXphVNk#_L7y(!|e&%4SHJ?|@zw3N*U?3iw$@VCAEFhk9M)O5Ixdh#BD
z=qGh{^bJzcXf)`==SUU>Rl$z|I&2duA(cE2$0s)$+%1lxyU*-@)a2%YFk9vxc!BUh
u!56VX_l%<pH3dUT3u5tqHY$2p!8%0{#BZSHIjDOMn7<tzM1!&u$^QkN<}Pdi

literal 0
HcmV?d00001

diff --git a/sales/client_query.py b/sales/client_query.py
new file mode 100644
index 0000000..a8c815d
--- /dev/null
+++ b/sales/client_query.py
@@ -0,0 +1,45 @@
+import pandas as pd
+from sqlalchemy import text
+from db_connection import engine
+
+query1 = """
+SELECT
+      STORE_CODE,
+      STORE_NAME,
+      START_DATE,
+      END_DATE,
+      BUSINESS_NAME,
+      BUSINESS_CODE
+FROM `looqbox-challenge`.data_store_cad
+"""
+
+query2 = """
+SELECT
+        STORE_CODE,
+        DATE,
+        SALES_VALUE,
+        SALES_QTY
+FROM `looqbox-challenge`.data_store_sales
+WHERE DATE BETWEEN '2019-01-01' AND '2019-12-31'
+"""
+
+with engine.connect() as conn:
+    df_stores = pd.read_sql(text(query1), conn)
+    df_sales = pd.read_sql(text(query2), conn)
+
+df_sales['DATE'] = pd.to_datetime(df_sales['DATE'])
+df_sales = df_sales[
+    (df_sales['DATE'] >= '2019-10-01') &
+    (df_sales['DATE'] <= '2019-12-31')
+]
+
+df = df_sales.merge(df_stores, on='STORE_CODE', how='inner')
+
+df_result = df.groupby(['STORE_NAME', 'BUSINESS_NAME']).apply(
+    lambda x: round(x['SALES_VALUE'].sum() / x['SALES_QTY'].sum(), 2)
+).reset_index()
+
+df_result.columns = ['Loja', 'Categoria', 'TM']
+df_result = df_result.sort_values('Loja').reset_index(drop=True)
+
+print(df_result.to_string(index=False))
\ No newline at end of file
diff --git a/sales/db_connection.py b/sales/db_connection.py
new file mode 100644
index 0000000..1ae7ea9
--- /dev/null
+++ b/sales/db_connection.py
@@ -0,0 +1,22 @@
+import os
+from dotenv import load_dotenv
+from sqlalchemy import create_engine
+
+load_dotenv()
+
+DB_HOST = os.getenv("DB_HOST")
+DB_PORT = os.getenv("DB_PORT")
+DB_NAME = os.getenv("DB_NAME")
+DB_USER = os.getenv("DB_USER")
+DB_PASSWORD = os.getenv("DB_PASSWORD")
+
+DATABASE_URL = (
+    f"mysql+pymysql://{DB_USER}:{DB_PASSWORD}"
+    f"@{DB_HOST}:{DB_PORT}/{DB_NAME}"
+)
+
+engine = create_engine(DATABASE_URL)
+
+
+def get_connection():
+    return engine.connect()
\ No newline at end of file
diff --git a/sales/queries.py b/sales/queries.py
new file mode 100644
index 0000000..65d47d8
--- /dev/null
+++ b/sales/queries.py
@@ -0,0 +1,35 @@
+# Descrição das queries solicitadas:
+
+# 1.	What are the 10 most expensive products in the company?
+select 
+product.PRODUCT_NAME,
+product.PRODUCT_COD,
+MAX(SALES_VALUE / SALES_QTY) AS UNIT_PRICE
+FROM `looqbox-challenge`.data_product product
+inner join `looqbox-challenge`.data_product_sales sales
+	on product.PRODUCT_COD = sales.PRODUCT_CODE
+GROUP BY 
+	product.PRODUCT_COD,
+	product.PRODUCT_NAME
+ORDER BY UNIT_PRICE DESC
+LIMIT 10;
+
+
+# 2.	What sections do the 'BEBIDAS' and 'PADARIA' departments have?
+SELECT DISTINCT
+	DEP_NAME,
+	SECTION_NAME
+FROM `looqbox-challenge`.data_product
+WHERE DEP_NAME = "BEBIDAS" OR DEP_NAME = "PADARIA"
+order by DEP_NAME;
+
+
+# 3.	What was the total sale of products (in $) of each Business Area in the first quarter of 2019?
+select 
+store.BUSINESS_NAME,
+SUM(sales.SALES_VALUE) AS TOTAL_VALUE
+FROM `looqbox-challenge`.data_store_cad store
+inner join `looqbox-challenge`.data_product_sales sales
+	on store.STORE_CODE = sales.STORE_CODE
+where sales.DATE between '2019-01-01' AND '2019-03-31'
+group by store.BUSINESS_NAME;
diff --git a/sales/retrieve_data.py b/sales/retrieve_data.py
new file mode 100644
index 0000000..9ce89b7
--- /dev/null
+++ b/sales/retrieve_data.py
@@ -0,0 +1,50 @@
+import pandas as pd
+from sqlalchemy import text
+from db_connection import engine
+
+
+def retrieve_data(product_code=None, store_code=None, date=None):
+   
+    if product_code is not None and not isinstance(product_code, int):
+        raise TypeError(f"product_code must be an integer. Received: {type(product_code).__name__}")
+
+    if store_code is not None and not isinstance(store_code, int):
+        raise TypeError(f"store_code must be an integer. Received: {type(store_code).__name__}")
+
+    if date is not None:
+        if not isinstance(date, list):
+            raise TypeError(f"date must be a list. Received: {type(date).__name__}")
+        if len(date) == 0 or len(date) > 2:
+            raise ValueError("date must contain 1 or 2 dates: ['YYYY-MM-DD'] or ['YYYY-MM-DD', 'YYYY-MM-DD']")
+        if len(date) == 1:
+            date = [date[0], date[0]] 
+
+    query = """
+            SELECT *
+            FROM data_product_sales
+            WHERE 1 = 1
+        """
+    params = {}
+
+    if product_code is not None:
+        query += " AND PRODUCT_CODE = :product_code"
+        params["product_code"] = product_code
+
+    if store_code is not None:
+        query += " AND STORE_CODE = :store_code"
+        params["store_code"] = store_code
+
+    if date is not None:
+        query += " AND DATE BETWEEN :start_date AND :end_date"
+        params["start_date"] = date[0]
+        params["end_date"] = date[1]
+
+    try:
+        with engine.connect() as conn:
+            df = pd.read_sql(text(query), conn, params=params)
+        return df
+
+    except Exception as e:
+        print(f"❌ Query failed: {e}")
+        return pd.DataFrame()
+    
\ No newline at end of file
diff --git a/sales/test_retrieve_data.py b/sales/test_retrieve_data.py
new file mode 100644
index 0000000..cb02b6e
--- /dev/null
+++ b/sales/test_retrieve_data.py
@@ -0,0 +1,25 @@
+from retrieve_data import retrieve_data
+
+print("--------- Test 1: Only date ---------")
+df = retrieve_data(date=['2019-01-01', '2019-01-31'])
+print(df.shape)
+print(df.head())
+
+print("\n--------- Test 2: Only products ---------")
+df = retrieve_data(product_code=18)
+print(df.shape)
+print(df.head())
+
+print("\n--------- Test 3: Only store ---------")
+df = retrieve_data(store_code=1)
+print(df.shape)
+print(df.head())
+
+print("\n---------Test 4: All filters ---------")
+df = retrieve_data(product_code=18, store_code=1, date=['2019-01-01', '2019-01-31'])
+print(df.shape)
+print(df.head())
+
+print("\n--------- Test 5: No filters ---------")
+df = retrieve_data()
+print(df.shape)
\ No newline at end of file