Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .env
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
DB_HOST="35.199.115.174"
DB_USER="looqbox-challenge"
DB_PASSWORD="looq-challenge"
DB_NAME="looqbox-challenge"
DB_PORT=3306

97 changes: 97 additions & 0 deletions IMDB/IMDB_diretores.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
from db_connection import engine


def get_movies():
query = """
SELECT
Director,
Rating
FROM `looqbox-challenge`.IMDB_movies
WHERE Director IS NOT NULL;
"""
return pd.read_sql(query, engine)


def calculate_top_directors(df, min_movies=3, top_n=15):
return (
df.groupby("Director")
.filter(lambda x: len(x) >= min_movies)
.groupby("Director")
.agg(
Avg_Rating=("Rating", "mean"),
Count=("Rating", "count")
)
.reset_index()
.sort_values("Avg_Rating", ascending=False)
.head(top_n)
)


def plot_top_directors(df_directors):
looqbox_cmap = LinearSegmentedColormap.from_list(
"looqbox", ["#B0B0B0", "#3DBE6E"]
)

df_directors = df_directors.sort_values("Avg_Rating", ascending=True)

n = len(df_directors)
colors = [looqbox_cmap(i / (n - 1)) for i in range(n)]

fig, ax = plt.subplots(figsize=(11, 7))

bars = ax.barh(
df_directors["Director"],
df_directors["Avg_Rating"] - 5,
color=colors,
edgecolor="white",
linewidth=0.5,
left=5
)

for bar, rating, count in zip(bars, df_directors["Avg_Rating"], df_directors["Count"]):
ax.text(
bar.get_x() + bar.get_width() + 0.02,
bar.get_y() + bar.get_height() / 2,
f"{rating:.2f} ({count} filmes)",
va="center",
fontsize=9,
color="#444444"
)

avg = df_directors["Avg_Rating"].mean()
ax.axvline(
avg,
color="#e74c3c",
linestyle="--",
linewidth=1.5,
label=f"Group Average: {avg:.2f}"
)

ax.set_xlim(5, df_directors["Avg_Rating"].max() + 0.5)
ax.set_title(
"Most Consistent Directors by IMDb Rating",
fontsize=14, fontweight="bold", pad=15
)
ax.set_xlabel("Average IMDb Rating", fontsize=11)
ax.set_ylabel("Director", fontsize=11)
ax.legend(fontsize=10)
ax.grid(axis="x", alpha=0.3)
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
plt.tight_layout()
plt.show()

def main():
movies = get_movies()
top_directors = calculate_top_directors(movies)

print(top_directors.to_string(index=False))

plot_top_directors(top_directors)


if __name__ == "__main__":
main()
101 changes: 101 additions & 0 deletions IMDB/IMDB_genero.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
import pandas as pd
import matplotlib.pyplot as plt
from db_connection import engine
from matplotlib.colors import LinearSegmentedColormap

def get_movies():
query = """
SELECT
Title,
Genre,
Rating
FROM `looqbox-challenge`.IMDB_movies
WHERE Genre IS NOT NULL;
"""
return pd.read_sql(query, engine)


def expand_genres(df):
df["Genre"] = df["Genre"].str.split(",")
df = df.explode("Genre")
df["Genre"] = df["Genre"].str.strip()
return df


def calculate_avg_by_genre(df):
return (
df.groupby("Genre")
.agg(
Avg_Rating=("Rating", "mean"),
Count=("Title", "count")
)
.reset_index()
.sort_values("Avg_Rating", ascending=False)
)


def plot_rating_by_genre(df_genre):
fig, ax = plt.subplots(figsize=(11, 8))

looqbox_cmap = LinearSegmentedColormap.from_list(
"looqbox", ["#B0B0B0", "#3DBE6E"]
)

n = len(df_genre)
colors = [looqbox_cmap(i / (n - 1)) for i in range(n)]
colors = colors[::-1]

bars = ax.barh(
df_genre["Genre"],
df_genre["Avg_Rating"] - 5,
color=colors,
edgecolor="white",
linewidth=0.5,
left=5
)

for bar, rating, count in zip(bars, df_genre["Avg_Rating"], df_genre["Count"]):
ax.text(
bar.get_x() + bar.get_width() + 0.02,
bar.get_y() + bar.get_height() / 2,
f"{rating:.2f} ({count} filmes)",
va="center",
fontsize=9,
color="#444444"
)

avg = df_genre["Avg_Rating"].mean()
ax.axvline(
avg,
color="#e74c3c",
linestyle="--",
linewidth=1.5,
label=f"Overall Average: {avg:.2f}"
)

ax.set_xlim(5, df_genre["Avg_Rating"].max() + 0.5)
ax.set_title("Average IMDb Rating by Genre", fontsize=14, fontweight="bold", pad=15)
ax.set_xlabel("Average Rating", fontsize=11)
ax.set_ylabel("Genre", fontsize=11)
ax.legend(fontsize=10)
ax.invert_yaxis()
ax.grid(axis="x", alpha=0.3)
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
plt.tight_layout()
plt.show()


def main():
movies = get_movies()
movies = expand_genres(movies)

avg_by_genre = calculate_avg_by_genre(movies)

print(avg_by_genre.to_string(index=False))

plot_rating_by_genre(avg_by_genre)


if __name__ == "__main__":
main()
108 changes: 108 additions & 0 deletions IMDB/IMDB_metascore.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
from db_connection import engine


def get_movies():
query = """
SELECT
Title,
Rating,
Metascore,
Genre
FROM `looqbox-challenge`.IMDB_movies
WHERE Metascore IS NOT NULL;
"""
return pd.read_sql(query, engine)


def calculate_difference(df):
df["Rating_scaled"] = df["Rating"] * 10
df["Difference"] = df["Rating_scaled"] - df["Metascore"]
return df


def get_looqbox_cmap():
return LinearSegmentedColormap.from_list(
"looqbox", ["#B0B0B0", "#3DBE6E"]
)


def plot_divergence(df):
top = (
df.reindex(df["Difference"].abs().sort_values(ascending=False).index)
.head(20)
.sort_values("Difference")
)

cmap = get_looqbox_cmap()
norm = (top["Difference"] - top["Difference"].min()) / (
top["Difference"].max() - top["Difference"].min()
)
colors = [cmap(v) for v in norm]

fig, ax = plt.subplots(figsize=(11, 8))

bars = ax.barh(
top["Title"],
top["Difference"],
color=colors,
edgecolor="white",
linewidth=0.5
)

for bar, val in zip(bars, top["Difference"]):
x = bar.get_width()
ax.text(
x + (0.5 if x >= 0 else -0.5),
bar.get_y() + bar.get_height() / 2,
f"{val:+.1f}",
va="center",
ha="left" if x >= 0 else "right",
fontsize=9,
color="#444444"
)

ax.axvline(0, color="#444444", linewidth=1)

ax.text(
top["Difference"].max() * 0.3, -2,
"► Público gostou mais",
color="#3DBE6E", fontsize=10, fontweight="bold"
)
ax.text(
top["Difference"].min() * 0.9, -2,
"◄ Crítica gostou mais",
color="#B0B0B0", fontsize=10, fontweight="bold"
)

ax.set_title(
"Audience vs Critics: Who Liked It More?",
fontsize=14, fontweight="bold", pad=15
)
ax.set_xlabel("Difference (Audience Score - Critics Score)", fontsize=11)
ax.grid(axis="x", alpha=0.3)
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
plt.tight_layout()
plt.show()


def main():
movies = get_movies()
movies = calculate_difference(movies)

print("Top 10 Biggest Disagreements:\n")
print(
movies.reindex(movies["Difference"].abs().sort_values(ascending=False).index)
[["Title", "Rating", "Metascore", "Difference"]]
.head(10)
.to_string(index=False)
)

plot_divergence(movies)


if __name__ == "__main__":
main()
Loading