From a551570760c052f02da22567fc5adb47e173da7c Mon Sep 17 00:00:00 2001 From: Carlos Augusto Rodrigues Teixeira <135044199+carlos1818augusto@users.noreply.github.com> Date: Fri, 26 Jun 2026 15:05:18 -0300 Subject: [PATCH 01/11] Add requirements for challenge solution --- requirements.txt | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 requirements.txt diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..8566940 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,6 @@ +pandas>=3.0.0 +PyMySQL>=1.1.0 +SQLAlchemy>=2.0.0 +matplotlib>=3.8.0 +reportlab>=4.0.0 +python-dotenv>=1.0.0 From 5d4ee070461109133c481bfbf0488f775542bd8d Mon Sep 17 00:00:00 2001 From: Carlos Augusto Rodrigues Teixeira <135044199+carlos1818augusto@users.noreply.github.com> Date: Fri, 26 Jun 2026 15:06:23 -0300 Subject: [PATCH 02/11] Add Python and environment files to .gitignore --- .gitignore | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0b2fb07 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +__pycache__/ +*.py[cod] +.env +.venv/ +venv/ From 9a753141e226a34d2a15469544abd2a8305e3ed0 Mon Sep 17 00:00:00 2001 From: Carlos Augusto Rodrigues Teixeira <135044199+carlos1818augusto@users.noreply.github.com> Date: Fri, 26 Jun 2026 15:06:33 -0300 Subject: [PATCH 03/11] Add example environment variables for configuration --- .env.example | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .env.example diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..83fbdd0 --- /dev/null +++ b/.env.example @@ -0,0 +1,5 @@ +DB_HOST=35.199.115.174 +DB_PORT=3306 +DB_NAME=looqbox-challenge +DB_USER=looqbox-challenge +DB_PASSWORD= From ba3fe66de592ff9dd899f17a9e94cdd5ae023486 Mon Sep 17 00:00:00 2001 From: Carlos Augusto Rodrigues Teixeira <135044199+carlos1818augusto@users.noreply.github.com> Date: Fri, 26 Jun 2026 15:06:43 -0300 Subject: [PATCH 04/11] Add README for Looqbox Data Challenge solution --- README_SOLUTION.md | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 README_SOLUTION.md diff --git a/README_SOLUTION.md b/README_SOLUTION.md new file mode 100644 index 0000000..6a589c8 --- /dev/null +++ b/README_SOLUTION.md @@ -0,0 +1,28 @@ +# Looqbox Data Challenge - Solução + +Esta solução contém as respostas em SQL, código Python reutilizável e artefatos gerados para o desafio técnico da Looqbox. + +## Arquivos + +- `sql/answers.sql`: consultas SQL das três perguntas. +- `src/db.py`: configuração do banco e criação da engine SQLAlchemy. +- `src/data_access.py`: função reutilizável `retrieve_data(product_code, store_code, date)`. +- `src/generate_outputs.py`: executa as respostas SQL, a transformação do caso 2, o gráfico IMDB e a geração do PDF. +- `output/`: CSVs gerados, gráfico PNG e PDF final. + +## Como executar + +Crie um arquivo `.env` a partir de `.env.example` e preencha `DB_PASSWORD`. + +```powershell +py -m pip install -r requirements.txt +py src\generate_outputs.py +``` + +O schema do banco se chama `looqbox-challenge`, com hífen. O código Python conecta diretamente nesse schema; os arquivos SQL usam crase quando necessário. + +## Observações + +- O caso 1 usa SQL parametrizado e valida as datas antes da consulta. +- O caso 2 mantem as duas consultas do cliente inalteradas e aplica o filtro de datas solicitado no pandas. +- O caso 3 expande os gêneros da tabela IMDB separados por vírgula e compara os principais gêneros por receita média. From c13de17746d154476949380692e7f4f5b0bdfce5 Mon Sep 17 00:00:00 2001 From: Carlos Augusto Rodrigues Teixeira <135044199+carlos1818augusto@users.noreply.github.com> Date: Fri, 26 Jun 2026 15:06:52 -0300 Subject: [PATCH 05/11] Add SQL queries for product analysis and sales --- sql/answers.sql | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 sql/answers.sql diff --git a/sql/answers.sql b/sql/answers.sql new file mode 100644 index 0000000..f5b0a77 --- /dev/null +++ b/sql/answers.sql @@ -0,0 +1,34 @@ +USE `looqbox-challenge`; + +-- 1) What are the 10 most expensive products in the company? +SELECT + PRODUCT_COD, + PRODUCT_NAME, + PRODUCT_VAL, + DEP_NAME, + DEP_COD, + SECTION_NAME, + SECTION_COD +FROM data_product +ORDER BY PRODUCT_VAL DESC +LIMIT 10; + +-- 2) What sections do the 'BEBIDAS' and 'PADARIA' departments have? +SELECT DISTINCT + DEP_NAME, + SECTION_COD, + SECTION_NAME +FROM data_product +WHERE DEP_NAME IN ('BEBIDAS', 'PADARIA') +ORDER BY DEP_NAME, SECTION_NAME; + +-- 3) What was the total sale of products (in $) of each Business Area in the first quarter of 2019? +SELECT + sc.BUSINESS_NAME, + ROUND(SUM(ps.SALES_VALUE), 2) AS TOTAL_SALES_VALUE +FROM data_product_sales AS ps +INNER JOIN data_store_cad AS sc + ON CAST(ps.STORE_CODE AS UNSIGNED) = sc.STORE_CODE +WHERE ps.DATE BETWEEN '2019-01-01' AND '2019-03-31' +GROUP BY sc.BUSINESS_NAME +ORDER BY TOTAL_SALES_VALUE DESC; From 5147375f0084484065c49b91879be2786c5cd9a3 Mon Sep 17 00:00:00 2001 From: Carlos Augusto Rodrigues Teixeira <135044199+carlos1818augusto@users.noreply.github.com> Date: Fri, 26 Jun 2026 15:07:01 -0300 Subject: [PATCH 06/11] Add src/db.py --- src/db.py | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 src/db.py diff --git a/src/db.py b/src/db.py new file mode 100644 index 0000000..048738d --- /dev/null +++ b/src/db.py @@ -0,0 +1,43 @@ +from __future__ import annotations + +import os +from dataclasses import dataclass + +from dotenv import load_dotenv +from sqlalchemy import Engine, create_engine +from sqlalchemy.engine import URL + + +load_dotenv() + + +@dataclass(frozen=True) +class DatabaseConfig: + host: str = os.getenv("DB_HOST", "35.199.115.174") + port: int = int(os.getenv("DB_PORT", "3306")) + database: str = os.getenv("DB_NAME", "looqbox-challenge") + user: str = os.getenv("DB_USER", "looqbox-challenge") + password: str | None = os.getenv("DB_PASSWORD") + + @classmethod + def from_env(cls) -> "DatabaseConfig": + config = cls() + if not config.password: + raise RuntimeError( + "DB_PASSWORD is required. Copy .env.example to .env and fill the password, " + "or export DB_PASSWORD before running the scripts." + ) + return config + + +def get_engine(config: DatabaseConfig | None = None) -> Engine: + config = config or DatabaseConfig.from_env() + url = URL.create( + "mysql+pymysql", + username=config.user, + password=config.password, + host=config.host, + port=config.port, + database=config.database, + ) + return create_engine(url, pool_pre_ping=True) From 7fdd69a52e52b15d67e7eda237a54d713cd426fb Mon Sep 17 00:00:00 2001 From: Carlos Augusto Rodrigues Teixeira <135044199+carlos1818augusto@users.noreply.github.com> Date: Fri, 26 Jun 2026 15:07:11 -0300 Subject: [PATCH 07/11] Implement data retrieval function with filters Added functions to retrieve data from data_product_sales with optional filters for product code, store code, and date range. --- src/data_access.py | 65 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 src/data_access.py diff --git a/src/data_access.py b/src/data_access.py new file mode 100644 index 0000000..f0628f2 --- /dev/null +++ b/src/data_access.py @@ -0,0 +1,65 @@ +from __future__ import annotations + +from datetime import date as Date +from typing import Sequence + +import pandas as pd +from sqlalchemy import Engine, text + +from db import get_engine + + +def _parse_iso_date(value: str) -> Date: + try: + return Date.fromisoformat(value) + except ValueError as exc: + raise ValueError(f"Invalid date '{value}'. Expected ISO format YYYY-MM-DD.") from exc + + +def retrieve_data( + product_code: int | None = None, + store_code: int | str | None = None, + date: Sequence[str] | None = None, + engine: Engine | None = None, +) -> pd.DataFrame: + """Retrieve rows from data_product_sales using optional, parameterized filters. + + Parameters are optional to keep the function flexible for other teams. When + `date` is provided, it must be a two-item interval: [start_date, end_date]. + """ + filters: list[str] = [] + params: dict[str, object] = {} + + if product_code is not None: + if not isinstance(product_code, int): + raise TypeError("product_code must be an integer.") + filters.append("PRODUCT_CODE = :product_code") + params["product_code"] = product_code + + if store_code is not None: + filters.append("STORE_CODE = :store_code") + params["store_code"] = str(store_code) + + if date is not None: + if len(date) != 2: + raise ValueError("date must contain exactly two values: [start_date, end_date].") + start_date = _parse_iso_date(date[0]) + end_date = _parse_iso_date(date[1]) + if start_date > end_date: + raise ValueError("start_date cannot be greater than end_date.") + filters.append("DATE BETWEEN :start_date AND :end_date") + params["start_date"] = start_date + params["end_date"] = end_date + + query = "SELECT * FROM data_product_sales" + if filters: + query += " WHERE " + " AND ".join(filters) + query += " ORDER BY DATE, STORE_CODE, PRODUCT_CODE" + + owns_engine = engine is None + engine = engine or get_engine() + try: + return pd.read_sql_query(text(query), engine, params=params) + finally: + if owns_engine: + engine.dispose() From 32a5a7ba93f527396cf2eafe41eb6aa74076eaa9 Mon Sep 17 00:00:00 2001 From: Carlos Augusto Rodrigues Teixeira <135044199+carlos1818augusto@users.noreply.github.com> Date: Fri, 26 Jun 2026 15:07:21 -0300 Subject: [PATCH 08/11] Add src/generate_outputs.py --- src/generate_outputs.py | 330 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 330 insertions(+) create mode 100644 src/generate_outputs.py diff --git a/src/generate_outputs.py b/src/generate_outputs.py new file mode 100644 index 0000000..868578d --- /dev/null +++ b/src/generate_outputs.py @@ -0,0 +1,330 @@ +from __future__ import annotations + +from decimal import Decimal +from pathlib import Path + +import matplotlib.pyplot as plt +import pandas as pd +from reportlab.lib import colors +from reportlab.lib.enums import TA_CENTER +from reportlab.lib.pagesizes import A4, landscape +from reportlab.lib.styles import ParagraphStyle, getSampleStyleSheet +from reportlab.lib.units import inch +from reportlab.platypus import Image, PageBreak, Paragraph, Preformatted, SimpleDocTemplate, Spacer, Table, TableStyle +from sqlalchemy import Engine, text + +from data_access import retrieve_data +from db import get_engine + + +ROOT = Path(__file__).resolve().parents[1] +OUTPUT = ROOT / "output" + +SQL_MOST_EXPENSIVE = """ +SELECT + PRODUCT_COD, + PRODUCT_NAME, + PRODUCT_VAL, + DEP_NAME, + DEP_COD, + SECTION_NAME, + SECTION_COD +FROM data_product +ORDER BY PRODUCT_VAL DESC +LIMIT 10; +""".strip() + +SQL_DEPARTMENT_SECTIONS = """ +SELECT DISTINCT + DEP_NAME, + SECTION_COD, + SECTION_NAME +FROM data_product +WHERE DEP_NAME IN ('BEBIDAS', 'PADARIA') +ORDER BY DEP_NAME, SECTION_NAME; +""".strip() + +SQL_Q1_BUSINESS_AREA_SALES = """ +SELECT + sc.BUSINESS_NAME, + ROUND(SUM(ps.SALES_VALUE), 2) AS TOTAL_SALES_VALUE +FROM data_product_sales AS ps +INNER JOIN data_store_cad AS sc + ON CAST(ps.STORE_CODE AS UNSIGNED) = sc.STORE_CODE +WHERE ps.DATE BETWEEN '2019-01-01' AND '2019-03-31' +GROUP BY sc.BUSINESS_NAME +ORDER BY TOTAL_SALES_VALUE DESC; +""".strip() + +CLIENT_QUERY_STORE_CAD = """ +SELECT + STORE_CODE, + STORE_NAME, + START_DATE, + END_DATE, + BUSINESS_NAME, + BUSINESS_CODE +FROM data_store_cad +""".strip() + +CLIENT_QUERY_STORE_SALES = """ +SELECT + STORE_CODE, + DATE, + SALES_VALUE, + SALES_QTY +FROM data_store_sales +WHERE DATE BETWEEN '2019-01-01' AND '2019-12-31' +""".strip() + + +def run_sql_tests(engine: Engine) -> dict[str, pd.DataFrame]: + return { + "sql_1_most_expensive_products": pd.read_sql_query(text(SQL_MOST_EXPENSIVE), engine), + "sql_2_department_sections": pd.read_sql_query(text(SQL_DEPARTMENT_SECTIONS), engine), + "sql_3_q1_sales_by_business_area": pd.read_sql_query(text(SQL_Q1_BUSINESS_AREA_SALES), engine), + } + + +def build_case_2_visualization_table(engine: Engine) -> pd.DataFrame: + stores = pd.read_sql_query(text(CLIENT_QUERY_STORE_CAD), engine) + sales = pd.read_sql_query(text(CLIENT_QUERY_STORE_SALES), engine) + + sales["DATE"] = pd.to_datetime(sales["DATE"]) + period_sales = sales[sales["DATE"].between("2019-10-01", "2019-12-31")].copy() + + merged = period_sales.merge(stores, on="STORE_CODE", how="left", validate="many_to_one") + result = ( + merged.groupby(["STORE_NAME", "BUSINESS_NAME"], as_index=False) + .agg(SALES_VALUE=("SALES_VALUE", "sum"), SALES_QTY=("SALES_QTY", "sum")) + .assign(TM=lambda df: (df["SALES_VALUE"] / df["SALES_QTY"]).round(2)) + .rename(columns={"STORE_NAME": "Loja", "BUSINESS_NAME": "Categoria"}) + [["Loja", "Categoria", "TM"]] + .sort_values("Loja") + .reset_index(drop=True) + ) + return result + + +def build_imdb_chart(engine: Engine) -> tuple[pd.DataFrame, Path]: + movies = pd.read_sql_query( + text( + """ + SELECT Title, Genre, Rating, Votes, RevenueMillions + FROM IMDB_movies + WHERE RevenueMillions IS NOT NULL + """ + ), + engine, + ) + + exploded = movies.assign(Genre=movies["Genre"].str.split(",")).explode("Genre") + exploded["Genre"] = exploded["Genre"].str.strip() + + genre_summary = ( + exploded.groupby("Genre", as_index=False) + .agg( + movies=("Title", "count"), + avg_revenue_millions=("RevenueMillions", "mean"), + avg_rating=("Rating", "mean"), + total_votes=("Votes", "sum"), + ) + .query("movies >= 15") + .assign( + avg_revenue_millions=lambda df: df["avg_revenue_millions"].round(2), + avg_rating=lambda df: df["avg_rating"].round(2), + ) + .sort_values("avg_revenue_millions", ascending=False) + .head(10) + .reset_index(drop=True) + ) + + chart_data = genre_summary.sort_values("avg_revenue_millions", ascending=True) + chart_path = OUTPUT / "imdb_top_genres_by_avg_revenue.png" + + fig, ax = plt.subplots(figsize=(10, 6)) + ax.barh(chart_data["Genre"], chart_data["avg_revenue_millions"], color="#2F6B7C") + ax.set_title("Filmes IMDB: gêneros com maior receita média") + ax.set_xlabel("Receita média (US$ milhões)") + ax.set_ylabel("") + ax.grid(axis="x", alpha=0.25) + for index, value in enumerate(chart_data["avg_revenue_millions"]): + ax.text(float(value) + 2, index, f"{value:.2f}", va="center", fontsize=9) + fig.tight_layout() + fig.savefig(chart_path, dpi=180) + plt.close(fig) + + return genre_summary, chart_path + + +def save_outputs(frames: dict[str, pd.DataFrame], case_2: pd.DataFrame, imdb_summary: pd.DataFrame) -> None: + OUTPUT.mkdir(exist_ok=True) + for name, df in frames.items(): + df.to_csv(OUTPUT / f"{name}.csv", index=False) + case_2.to_csv(OUTPUT / "case_2_store_ticket.csv", index=False) + imdb_summary.to_csv(OUTPUT / "imdb_genre_summary.csv", index=False) + + +def _format_value(value: object) -> str: + if isinstance(value, Decimal): + return f"{float(value):,.2f}" + if isinstance(value, float): + return f"{value:,.2f}" + if hasattr(value, "isoformat"): + return value.isoformat() + return "" if pd.isna(value) else str(value) + + +def _table_from_df(df: pd.DataFrame, widths: list[float] | None = None, max_rows: int | None = None) -> Table: + styles = getSampleStyleSheet() + cell_style = ParagraphStyle( + "TableCell", + parent=styles["BodyText"], + fontName="Helvetica", + fontSize=7, + leading=8, + ) + data_frame = df.head(max_rows) if max_rows else df + data = [[Paragraph(str(col), cell_style) for col in data_frame.columns]] + for _, row in data_frame.iterrows(): + data.append([Paragraph(_format_value(value), cell_style) for value in row]) + + table = Table(data, colWidths=widths, repeatRows=1) + table.setStyle( + TableStyle( + [ + ("BACKGROUND", (0, 0), (-1, 0), colors.HexColor("#D9EAF0")), + ("TEXTCOLOR", (0, 0), (-1, 0), colors.HexColor("#17313A")), + ("GRID", (0, 0), (-1, -1), 0.25, colors.HexColor("#A8B7BD")), + ("VALIGN", (0, 0), (-1, -1), "TOP"), + ("LEFTPADDING", (0, 0), (-1, -1), 4), + ("RIGHTPADDING", (0, 0), (-1, -1), 4), + ("TOPPADDING", (0, 0), (-1, -1), 3), + ("BOTTOMPADDING", (0, 0), (-1, -1), 3), + ] + ) + ) + return table + + +def _add_code(story: list, code: str, title: str) -> None: + styles = getSampleStyleSheet() + story.append(Paragraph(title, styles["Heading3"])) + story.append(Preformatted(code, ParagraphStyle("Code", fontName="Courier", fontSize=6, leading=7))) + story.append(Spacer(1, 0.12 * inch)) + + +def generate_pdf( + frames: dict[str, pd.DataFrame], + case_2: pd.DataFrame, + imdb_summary: pd.DataFrame, + chart_path: Path, + retrieve_sample: pd.DataFrame, +) -> Path: + pdf_path = OUTPUT / "looqbox_data_challenge_carlos.pdf" + doc = SimpleDocTemplate( + str(pdf_path), + pagesize=landscape(A4), + rightMargin=28, + leftMargin=28, + topMargin=26, + bottomMargin=26, + ) + styles = getSampleStyleSheet() + styles.add(ParagraphStyle("CenteredTitle", parent=styles["Title"], alignment=TA_CENTER)) + + story: list = [] + story.append(Paragraph("Looqbox Data Challenge", styles["CenteredTitle"])) + story.append(Paragraph("Candidato: Carlos", styles["Normal"])) + story.append(Paragraph("Stack: Python, SQL, MySQL, pandas e matplotlib.", styles["Normal"])) + story.append(Spacer(1, 0.15 * inch)) + + story.append(Paragraph("Teste SQL", styles["Heading2"])) + _add_code(story, SQL_MOST_EXPENSIVE, "1) Dez produtos mais caros") + story.append( + _table_from_df( + frames["sql_1_most_expensive_products"], + widths=[48, 250, 54, 72, 42, 100, 46], + ) + ) + story.append(PageBreak()) + + _add_code(story, SQL_DEPARTMENT_SECTIONS, "2) Seções dos departamentos BEBIDAS e PADARIA") + story.append(_table_from_df(frames["sql_2_department_sections"], widths=[90, 70, 160])) + story.append(Spacer(1, 0.2 * inch)) + + _add_code(story, SQL_Q1_BUSINESS_AREA_SALES, "3) Vendas de produtos por área de negócio no 1º trimestre de 2019") + story.append(_table_from_df(frames["sql_3_q1_sales_by_business_area"], widths=[130, 130])) + story.append(PageBreak()) + + story.append(Paragraph("Caso 1 - Recuperação dinâmica de dados", styles["Heading2"])) + _add_code( + story, + """my_data = retrieve_data( + product_code=18, + store_code=1, + date=["2019-01-01", "2019-01-31"], + engine=engine, +)""", + "Exemplo de chamada", + ) + story.append(Paragraph("Resultado de exemplo para product_code=18, store_code=1 e janeiro de 2019:", styles["Normal"])) + story.append(_table_from_df(retrieve_sample.head(10), widths=[70, 80, 80, 90, 80])) + story.append(Spacer(1, 0.2 * inch)) + story.append( + Paragraph( + "A função mantém todos os filtros opcionais, valida as entradas e usa parâmetros vinculados para evitar SQL injection.", + styles["Normal"], + ) + ) + story.append(PageBreak()) + + story.append(Paragraph("Caso 2 - Consultas do cliente e tabela solicitada", styles["Heading2"])) + _add_code(story, CLIENT_QUERY_STORE_CAD, "Consulta 1 do cliente, usada como fornecida") + _add_code(story, CLIENT_QUERY_STORE_SALES, "Consulta 2 do cliente, usada como fornecida") + story.append( + Paragraph( + "O intervalo de datas solicitado foi aplicado no pandas depois da carga da consulta 2, mantendo as duas consultas do cliente inalteradas. " + "O TM foi calculado como o total de SALES_VALUE dividido pelo total de SALES_QTY para cada loja.", + styles["Normal"], + ) + ) + story.append(_table_from_df(case_2, widths=[150, 110, 60])) + story.append(PageBreak()) + + story.append(Paragraph("Caso 3 - Visualizacao com IMDB", styles["Heading2"])) + story.append( + Paragraph( + "Escolhi um grafico de barras horizontais porque o objetivo e comparar categorias com clareza. " + "A coluna Genre possui múltiplos valores por filme, então cada filme foi expandido para suas tags de gênero. " + "Considerei apenas gêneros com pelo menos 15 filmes e receita não nula para reduzir ruído de grupos muito pequenos.", + styles["Normal"], + ) + ) + story.append(Image(str(chart_path), width=7.2 * inch, height=4.0 * inch)) + story.append(Spacer(1, 0.15 * inch)) + story.append(_table_from_df(imdb_summary, widths=[90, 60, 120, 80, 90])) + + doc.build(story) + return pdf_path + + +def main() -> None: + OUTPUT.mkdir(exist_ok=True) + engine = get_engine() + try: + sql_frames = run_sql_tests(engine) + retrieve_sample = retrieve_data(product_code=18, store_code=1, date=["2019-01-01", "2019-01-31"], engine=engine) + case_2 = build_case_2_visualization_table(engine) + imdb_summary, chart_path = build_imdb_chart(engine) + save_outputs(sql_frames, case_2, imdb_summary) + pdf_path = generate_pdf(sql_frames, case_2, imdb_summary, chart_path, retrieve_sample) + finally: + engine.dispose() + + print(f"Generated outputs in: {OUTPUT}") + print(f"PDF: {pdf_path}") + + +if __name__ == "__main__": + main() From 62f33af939783127c53e00270425e81934de516c Mon Sep 17 00:00:00 2001 From: Carlos Augusto Rodrigues Teixeira <135044199+carlos1818augusto@users.noreply.github.com> Date: Fri, 26 Jun 2026 15:08:26 -0300 Subject: [PATCH 09/11] Fix spelling and accentuation in output generation Corrected spelling and accentuation in the text for clarity. --- src/generate_outputs.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/generate_outputs.py b/src/generate_outputs.py index 868578d..92afefd 100644 --- a/src/generate_outputs.py +++ b/src/generate_outputs.py @@ -292,10 +292,10 @@ def generate_pdf( story.append(_table_from_df(case_2, widths=[150, 110, 60])) story.append(PageBreak()) - story.append(Paragraph("Caso 3 - Visualizacao com IMDB", styles["Heading2"])) + story.append(Paragraph("Caso 3 - Visualização com IMDB", styles["Heading2"])) story.append( Paragraph( - "Escolhi um grafico de barras horizontais porque o objetivo e comparar categorias com clareza. " + "Escolhi um gráfico de barras horizontais porque o objetivo é comparar categorias com clareza. " "A coluna Genre possui múltiplos valores por filme, então cada filme foi expandido para suas tags de gênero. " "Considerei apenas gêneros com pelo menos 15 filmes e receita não nula para reduzir ruído de grupos muito pequenos.", styles["Normal"], From e669aa0fd5e58bc5eb20212524abba00a56387b0 Mon Sep 17 00:00:00 2001 From: Carlos Augusto Rodrigues Teixeira <135044199+carlos1818augusto@users.noreply.github.com> Date: Fri, 26 Jun 2026 15:08:36 -0300 Subject: [PATCH 10/11] Fix typo in README_SOLUTION.md --- README_SOLUTION.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README_SOLUTION.md b/README_SOLUTION.md index 6a589c8..5e40fd2 100644 --- a/README_SOLUTION.md +++ b/README_SOLUTION.md @@ -24,5 +24,5 @@ O schema do banco se chama `looqbox-challenge`, com hífen. O código Python con ## Observações - O caso 1 usa SQL parametrizado e valida as datas antes da consulta. -- O caso 2 mantem as duas consultas do cliente inalteradas e aplica o filtro de datas solicitado no pandas. +- O caso 2 mantém as duas consultas do cliente inalteradas e aplica o filtro de datas solicitado no pandas. - O caso 3 expande os gêneros da tabela IMDB separados por vírgula e compara os principais gêneros por receita média. From a81c54409ee71cd9a9487a230212c1f681448afa Mon Sep 17 00:00:00 2001 From: Carlos Augusto Rodrigues Teixeira <135044199+carlos1818augusto@users.noreply.github.com> Date: Fri, 26 Jun 2026 15:14:25 -0300 Subject: [PATCH 11/11] Add AI assistance disclosure to README --- README_SOLUTION.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README_SOLUTION.md b/README_SOLUTION.md index 5e40fd2..fa7c1dd 100644 --- a/README_SOLUTION.md +++ b/README_SOLUTION.md @@ -26,3 +26,7 @@ O schema do banco se chama `looqbox-challenge`, com hífen. O código Python con - O caso 1 usa SQL parametrizado e valida as datas antes da consulta. - O caso 2 mantém as duas consultas do cliente inalteradas e aplica o filtro de datas solicitado no pandas. - O caso 3 expande os gêneros da tabela IMDB separados por vírgula e compara os principais gêneros por receita média. + +Durante a resolução do desafio, utilizei apoio pontual de ferramenta de IA/LLM para revisão de trechos específicos, organização textual da documentação e validação de dúvidas pontuais durante o desenvolvimento. + +A construção da lógica, execução dos testes, interpretação dos resultados e estruturação principal da solução foram realizadas por mim, com base no meu conhecimento em Python, SQL e análise de dados.