looqbox · srgmsx · Jun 26, 2026
diff --git a/.env.example b/.env.example
@@ -0,0 +1,5 @@
+DB_HOST=
+DB_PORT=3306
+DB_USER=
+DB_PASSWORD=
+DB_NAME=
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,21 @@
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+
+# Virtual environments
+.venv/
+venv/
+env/
+
+# Environment variables (never commit real credentials)
+.env
+
+# Generated outputs
+*.png
+
+# IDE / OS
+.vscode/
+.idea/
+.DS_Store
+Thumbs.db
diff --git a/.streamlit/config.toml b/.streamlit/config.toml
@@ -0,0 +1,2 @@
+[theme]
+primaryColor = "#3FD569"
diff --git a/c1_dynamic_query.py b/c1_dynamic_query.py
@@ -0,0 +1,52 @@
+import pandas as pd
+import mysql.connector
+from dotenv import load_dotenv
+import os
+
+load_dotenv()
+
+def retrieve_data(product_code=None, store_code=None, date=None):
+    """
+    Retrieve data_product_sales filtered by product_code (int or list),
+    store_code and date. Returns a DataFrame.
+    """
+    conn = mysql.connector.connect(
+        host=os.environ.get("DB_HOST"),
+        port=os.environ.get("DB_PORT", 3306),
+        user=os.environ.get("DB_USER"),
+        password=os.environ.get("DB_PASSWORD"),
+        database=os.environ.get("DB_NAME"),
+    )
+
+    try:
+        conditions = []
+        values = []
+
+        if product_code is not None:
+            if isinstance(product_code, (list, tuple)):
+                placeholders = ", ".join(["%s"] * len(product_code))
+                conditions.append(f"PRODUCT_CODE IN ({placeholders})")
+                values.extend(product_code)
+            else:
+                conditions.append("PRODUCT_CODE = %s")
+                values.append(product_code)
+
+        if store_code is not None:
+            conditions.append("STORE_CODE = %s")
+            values.append(store_code)
+
+        if date is not None:
+            conditions.append("`DATE` BETWEEN %s AND %s")
+            values.append(date[0])
+            values.append(date[1])
+
+        query = "SELECT * FROM data_product_sales"
+
+        if conditions:
+            query = query + " WHERE " + " AND ".join(conditions)
+
+        df = pd.read_sql_query(query, conn, params=values)
+        return df
+
+    finally:
+        conn.close()
diff --git a/c2_average_ticket.py b/c2_average_ticket.py
@@ -0,0 +1,111 @@
+import os
+
+import pandas as pd
+import matplotlib
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+import mysql.connector
+from dotenv import load_dotenv
+
+load_dotenv()
+
+QUERY_STORES = """
+SELECT
+      STORE_CODE,
+      STORE_NAME,
+      START_DATE,
+      END_DATE,
+      BUSINESS_NAME,
+      BUSINESS_CODE
+FROM data_store_cad
+"""
+
+QUERY_SALES = """
+SELECT
+        STORE_CODE,
+        DATE,
+        SALES_VALUE,
+        SALES_QTY
+FROM data_store_sales
+WHERE DATE BETWEEN '2019-01-01' AND '2019-12-31'
+"""
+
+PERIOD_START = "2019-10-01"
+PERIOD_END = "2019-12-31"
+
+
+def get_connection():
+    return mysql.connector.connect(
+        host=os.environ.get("DB_HOST"),
+        port=os.environ.get("DB_PORT", 3306),
+        user=os.environ.get("DB_USER"),
+        password=os.environ.get("DB_PASSWORD"),
+        database=os.environ.get("DB_NAME"),
+    )
+
+
+def load_data():
+    conn = get_connection()
+    try:
+        stores = pd.read_sql_query(QUERY_STORES, conn)
+        sales = pd.read_sql_query(QUERY_SALES, conn)
+        return stores, sales
+    finally:
+        conn.close()
+
+
+def build_average_ticket(stores, sales):
+    sales = sales.copy()
+    sales["DATE"] = pd.to_datetime(sales["DATE"])
+    mask = (sales["DATE"] >= PERIOD_START) & (sales["DATE"] <= PERIOD_END)
+    sales = sales.loc[mask]
+
+    agg = sales.groupby("STORE_CODE", as_index=False)[["SALES_VALUE", "SALES_QTY"]].sum()
+    agg["TM"] = (agg["SALES_VALUE"] / agg["SALES_QTY"]).round(2)
+
+    result = agg.merge(
+        stores[["STORE_CODE", "STORE_NAME", "BUSINESS_NAME"]],
+        on="STORE_CODE",
+        how="inner",
+    )
+
+    result = result.rename(columns={"STORE_NAME": "Loja", "BUSINESS_NAME": "Categoria"})
+    result = result[["Loja", "Categoria", "TM"]].sort_values("Loja").reset_index(drop=True)
+    return result
+
+
+def plot_table(df, path="average_ticket.png"):
+    fig, ax = plt.subplots(figsize=(6, 0.4 * len(df) + 1))
+    ax.axis("off")
+
+    table = ax.table(
+        cellText=df.values,
+        colLabels=df.columns,
+        cellLoc="center",
+        loc="center",
+    )
+    table.auto_set_font_size(False)
+    table.set_fontsize(10)
+    table.scale(1, 1.4)
+
+    for col in range(len(df.columns)):
+        cell = table[0, col]
+        cell.set_facecolor("#2f5496")
+        cell.set_text_props(color="white", weight="bold")
+
+    fig.tight_layout()
+    fig.savefig(path, dpi=150, bbox_inches="tight")
+    plt.close(fig)
+    return path
+
+
+def main():
+    stores, sales = load_data()
+    result = build_average_ticket(stores, sales)
+    print(result.to_string(index=False))
+    path = plot_table(result)
+    print(f"\nVisualização salva em: {path}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/c3_visualization.py b/c3_visualization.py
@@ -0,0 +1,78 @@
+import pandas as pd
+import plotly.express as px
+import streamlit as st
+
+from db import load_movies
+
+st.set_page_config(page_title="IMDB dashboard", page_icon="🎬", layout="wide")
+st.title("IMDB Movies: Popularity and Rating")
+
+
+@st.cache_data
+def load_data():
+    df = load_movies()
+    df["Decade"] = (df["Year"] // 10 * 10)
+    return df
+
+
+df = load_data()
+
+
+k1, k2, k3, k4 = st.columns(4)
+k1.metric("Total Films", f"{len(df):,}")
+k2.metric("Avg Rating", f"{df['Rating'].mean():.1f}")
+k3.metric("Avg Votes", f"{df['Votes'].mean():,.0f}")
+k4.metric("Median Votes", f"{df['Votes'].median():,.0f}")
+
+st.divider()
+
+
+st.subheader("Votes vs Rating")
+fig = px.scatter(
+    df.dropna(subset=["Votes", "Rating"]),
+    x="Votes",
+    y="Rating",
+    color="Decade",
+    color_continuous_scale=["#c8f5d5", "#3FD569", "#1a6b35"],
+    hover_data=["Title", "Year", "Director"],
+    trendline="ols",
+    log_x=True,
+    labels={"Votes": "Votes (log scale)", "Rating": "IMDB Rating"},
+)
+st.plotly_chart(fig, use_container_width=True)
+
+st.caption(
+    "Votes on a log scale because the distribution is highly skewed "
+    "(a few blockbusters with millions of votes, most films with very few)."
+)
+
+st.divider()
+
+
+c1, c2 = st.columns(2)
+
+with c1:
+    st.subheader("Low visibility, high ratings")
+    st.caption("Rating ≥ 8 · Votes ≤ 50k")
+    gems = (
+        df[(df["Rating"] >= 8) & (df["Votes"] <= 50_000)]
+        [["Title", "Year", "Director", "Genre", "Rating", "Votes"]]
+        .sort_values("Rating", ascending=False)
+        .reset_index(drop=True)
+    )
+    st.dataframe(gems, use_container_width=True)
+    st.caption(f"{len(gems)} films found")
+
+with c2:
+    st.subheader("High visibility, below-median ratings")
+    st.caption("Top 25% by votes · Below median rating")
+    vote_q75 = df["Votes"].quantile(0.75)
+    median_rating = df["Rating"].median()
+    overhyped = (
+        df[(df["Votes"] >= vote_q75) & (df["Rating"] < median_rating)]
+        [["Title", "Year", "Director", "Rating", "Votes"]]
+        .sort_values("Votes", ascending=False)
+        .reset_index(drop=True)
+    )
+    st.dataframe(overhyped, use_container_width=True)
+    st.caption(f"{len(overhyped)} films found")
diff --git a/db.py b/db.py
@@ -0,0 +1,25 @@
+import os
+
+import mysql.connector
+import pandas as pd
+from dotenv import load_dotenv
+
+load_dotenv()
+
+
+def _get_connection():
+    return mysql.connector.connect(
+        host=os.environ.get("DB_HOST"),
+        port=int(os.environ.get("DB_PORT", 3306)),
+        user=os.environ.get("DB_USER"),
+        password=os.environ.get("DB_PASSWORD"),
+        database=os.environ.get("DB_NAME"),
+    )
+
+
+def load_movies() -> pd.DataFrame:
+    conn = _get_connection()
+    try:
+        return pd.read_sql_query("SELECT * FROM IMDB_movies", conn)
+    finally:
+        conn.close()
diff --git a/logo.png b/logo.png
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,7 @@
+streamlit
+pandas
+plotly>=5.18
+statsmodels
+matplotlib
+mysql-connector-python
+python-dotenv