Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
.venv
.env
11 changes: 11 additions & 0 deletions cases/01/queries/get_product.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
SELECT
*
FROM
`looqbox-challenge`.data_product_sales
WHERE
PRODUCT_CODE = {product_code} AND
STORE_CODE = {store_code} AND
(
DATE >= '{start_date}' AND
DATE <= '{end_date}'
)
63 changes: 63 additions & 0 deletions cases/01/retrieve_products.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import os
from sqlalchemy import create_engine
from dotenv import load_dotenv
import pandas as pd


def retrieve_data(product_code: int, store_code: int, date: list[str]) -> pd.DataFrame:
"""
Recupera os dados de vendas de produtos para um produto e uma loja específicos, em um período de tempo especificado.

Args:
product_code (int): Código do produto filtrado.
store_code (int): Código da loja filtrada.
date (list[str]): Lista contendo a data inicial e a data final.

Returns:
pd.DataFrame: Um DataFrame contendo as vendas dos produtos para o produto e a loja especificados, no período passado.
"""


# Carrega as variáveis de ambiente
load_dotenv()
user = os.getenv("USER")
password = os.getenv("PASSWORD")
host = os.getenv("HOST")
port = os.getenv("PORT")
database = os.getenv("DATABASE")

# Forma a string de conexão
connection_string = f"mysql+pymysql://{user}:{password}@{host}:{port}/{database}"

# Cria o motor de conexão
engine = create_engine(connection_string)

# Leitura dos arquivos SQL e aplicação das variáveis
query_path = os.path.join(os.path.dirname(__file__), "queries", "get_product.sql")
try:
with open(query_path, "r", encoding="utf-8") as f:
sql_template = f.read()

sql_query = sql_template.format(
product_code=product_code,
store_code=store_code,
start_date=date[0],
end_date=date[1]
)

except Exception as e:
print(f'Error: {e}')

df = pd.read_sql(sql_query, con=engine)

return df


def main():

# Exemplo de execução da função
df = retrieve_data(18, 1, ['2019-01-01', '2019-01-31'])
print(df)

if __name__ == "__main__":
main()
8 changes: 8 additions & 0 deletions cases/02/queries/data_store_cad.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
SELECT
STORE_CODE,
STORE_NAME,
START_DATE,
END_DATE,
BUSINESS_NAME,
BUSINESS_CODE
FROM data_store_cad
7 changes: 7 additions & 0 deletions cases/02/queries/data_store_sales.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
SELECT
STORE_CODE,
DATE,
SALES_VALUE,
SALES_QTY
FROM data_store_sales
WHERE DATE BETWEEN '2019-01-01' AND '2019-12-31'
71 changes: 71 additions & 0 deletions cases/02/store_tm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import os
from sqlalchemy import create_engine
from dotenv import load_dotenv
import pandas as pd

def get_sql_data() -> pd.DataFrame:
# Carrega as variáveis de ambiente
load_dotenv()
user = os.getenv("USER")
password = os.getenv("PASSWORD")
host = os.getenv("HOST")
port = os.getenv("PORT")
database = os.getenv("DATABASE")

# Forma a string de conexão
connection_string = f"mysql+pymysql://{user}:{password}@{host}:{port}/{database}"

# Cria o motor de conexão
engine = create_engine(connection_string)

# Carrega os caminhos dos arquivos SQL
store_cad_path = os.path.join(os.path.dirname(__file__), "queries", "data_store_cad.sql")
store_sales_path = os.path.join(os.path.dirname(__file__), "queries", "data_store_sales.sql")

# Lê as queries de dentro dos arquivos SQL
with open(store_cad_path, "r", encoding="utf-8") as f:
store_cad_query = f.read()
with open(store_sales_path, "r", encoding="utf-8") as f:
store_sales_query = f.read()

# Carrega os dados
store_cad = pd.read_sql(store_cad_query, con=engine)
store_sales = pd.read_sql(store_sales_query, con=engine, parse_dates=['DATE'])

return store_cad, store_sales

def main():

# Exemplo de execução da função
store_cad, store_sales = get_sql_data()

# Filtrando para o período especificado
store_sales = store_sales[(store_sales['DATE'] >= '2019-10-01') & (store_sales['DATE'] <= '2019-12-31')]

# Junção dos DataFrames a partir do STORE_CODE
df_merged = store_cad.merge(store_sales, on='STORE_CODE', how='inner')

# Agrupando a soma de venda e quantidade pela loja e categoria
df_grouped = df_merged.groupby([
'STORE_NAME',
'BUSINESS_NAME'
]).agg({
'SALES_VALUE': ['sum'],
'SALES_QTY': ['sum']
}).reset_index()

# Cálculo do Ticket Médio
df_grouped['TM'] = (df_grouped['SALES_VALUE'] / df_grouped['SALES_QTY']).round(2)

# Mantendo e renomeando apenas as colunas necessárias
df_result = df_grouped[['STORE_NAME', 'BUSINESS_NAME', 'TM']].rename(
columns={
'STORE_NAME': 'Loja',
'BUSINESS_NAME': 'Categoria'
}
)

print(df_result)

if __name__ == "__main__":
main()
49 changes: 49 additions & 0 deletions cases/03/data_viz.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import os
from sqlalchemy import create_engine
from dotenv import load_dotenv
import pandas as pd
import matplotlib.pyplot as plt

def get_imdb_data():
# Carrega as variáveis de ambiente
load_dotenv()
user = os.getenv("USER")
password = os.getenv("PASSWORD")
host = os.getenv("HOST")
port = os.getenv("PORT")
database = os.getenv("DATABASE")

# Forma a string de conexão
connection_string = f"mysql+pymysql://{user}:{password}@{host}:{port}/{database}"

# Cria o motor de conexão
engine = create_engine(connection_string)

# Carrega os caminhos dos arquivos SQL
imdb_path = os.path.join(os.path.dirname(__file__), "queries", "IMDB_movies.sql")

# Lê as queries de dentro dos arquivos SQL
with open(imdb_path, "r", encoding="utf-8") as f:
imdb_query = f.read()

# Carrega os dados
imdb = pd.read_sql(imdb_query, con=engine)

return imdb

def main():
df = get_imdb_data()

plt.figure(figsize=(10, 6))
plt.scatter(df['RevenueMillions'], df['Metascore'], alpha=0.5, color='darkblue')

plt.title('Relação entre Receita e Avaliação dos Filmes (IMDB)')
plt.xlabel('Receita (em Milhões)')
plt.ylabel('Avaliação (Metascore)')
plt.grid(True, linestyle='--', alpha=0.7)

# Exibe o gráfico interativo na tela
plt.show()

if __name__ == "__main__":
main()
12 changes: 12 additions & 0 deletions cases/03/queries/IMDB_movies.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
SELECT
*
FROM
`looqbox-challenge`.IMDB_movies
WHERE
RevenueMillions IS NOT NULL AND
Votes > (
SELECT
AVG(Votes)
FROM
`looqbox-challenge`.IMDB_movies
)
14 changes: 14 additions & 0 deletions cases/sql_test/01.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
WITH ranked_products_by_value AS (
SELECT
*,
RANK() OVER (ORDER BY PRODUCT_VAL DESC) AS rank_value
FROM
`looqbox-challenge`.data_product
)

SELECT
*
FROM
ranked_products_by_value
WHERE
rank_value <= 10
8 changes: 8 additions & 0 deletions cases/sql_test/02.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
SELECT DISTINCT
DEP_NAME, SECTION_NAME
FROM
`looqbox-challenge`.data_product
WHERE
DEP_NAME IN ('BEBIDAS', 'PADARIA')
ORDER BY
DEP_NAME
13 changes: 13 additions & 0 deletions cases/sql_test/03.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
SELECT
BUSINESS_NAME,
SUM(SALES_VALUE) AS TOTAL_SALES
FROM
`looqbox-challenge`.data_store_cad AS store
JOIN
`looqbox-challenge`.data_product_sales AS sales
USING(STORE_CODE)
WHERE
sales.DATE >= '2019-01-01' AND
sales.DATE < '2019-04-01'
GROUP BY
BUSINESS_NAME
Binary file added imdb_scatter_plot.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading