-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapporagent.html
More file actions
1041 lines (922 loc) Β· 57.9 KB
/
Copy pathapporagent.html
File metadata and controls
1041 lines (922 loc) Β· 57.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>App or Agent Quiz</title>
<style>
.confetti {
position: fixed;
width: 10px;
height: 10px;
top: -10px;
z-index: 9999;
animation: confetti-fall linear forwards;
}
@keyframes confetti-fall {
to {
transform: translateY(100vh) rotate(360deg);
opacity: 0;
}
}
.celebration-message {
position: fixed;
top: 50%;
left: 50%;
transform: translate(-50%, -50%) scale(0);
background: white;
padding: 40px 60px;
border-radius: 20px;
box-shadow: 0 20px 60px rgba(0, 0, 0, 0.3);
z-index: 10000;
text-align: center;
animation: celebration-pop 0.5s ease-out forwards;
}
@keyframes celebration-pop {
0% {
transform: translate(-50%, -50%) scale(0);
}
50% {
transform: translate(-50%, -50%) scale(1.1);
}
100% {
transform: translate(-50%, -50%) scale(1);
}
}
.celebration-message h2 {
font-size: 2.5em;
margin-bottom: 10px;
color: #667eea;
}
.celebration-message p {
font-size: 1.2em;
color: #4a5568;
}
* {
margin: 0;
padding: 0;
box-sizing: border-box;
}
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
min-height: 100vh;
padding: 20px;
color: #2d3748;
}
.container {
max-width: 900px;
margin: 0 auto;
}
header {
text-align: center;
color: white;
margin-bottom: 40px;
padding: 40px 20px;
}
h1 {
font-size: 3em;
margin-bottom: 10px;
text-shadow: 2px 2px 4px rgba(0, 0, 0, 0.2);
}
.subtitle {
font-size: 1.2em;
opacity: 0.95;
margin-bottom: 20px;
}
.stats {
display: flex;
gap: 20px;
justify-content: center;
flex-wrap: wrap;
margin-top: 20px;
}
.stat-card {
background: rgba(255, 255, 255, 0.2);
backdrop-filter: blur(10px);
padding: 15px 25px;
border-radius: 12px;
border: 1px solid rgba(255, 255, 255, 0.3);
}
.stat-number {
font-size: 2em;
font-weight: bold;
display: block;
}
.stat-label {
font-size: 0.9em;
opacity: 0.9;
}
.progress-bar {
background: rgba(255, 255, 255, 0.3);
height: 8px;
border-radius: 10px;
margin-top: 20px;
overflow: hidden;
}
.progress-fill {
background: white;
height: 100%;
border-radius: 10px;
transition: width 0.5s ease;
width: 0%;
}
.quiz-card {
background: white;
border-radius: 16px;
padding: 30px;
margin-bottom: 20px;
box-shadow: 0 10px 30px rgba(0, 0, 0, 0.2);
transition: all 0.3s ease;
border-left: 6px solid #cbd5e0;
overflow-wrap: break-word; /* Prevents long text from overflowing */
}
.quiz-card.answered-correct {
border-left-color: #48bb78;
}
.quiz-card.answered-wrong {
border-left-color: #f56565;
}
.quiz-card.answered-tricky {
border-left-color: #9f7aea;
}
.quiz-card:hover {
transform: translateY(-4px);
box-shadow: 0 15px 40px rgba(0, 0, 0, 0.25);
}
.spec-label {
font-size: 0.85em;
font-weight: 700;
color: #667eea;
text-transform: uppercase;
letter-spacing: 0.5px;
margin-bottom: 12px;
}
.quiz-title {
font-size: 1.5em;
font-weight: 700;
color: #2d3748;
margin-bottom: 15px;
line-height: 1.3;
}
.quiz-context {
color: #4a5568;
margin-bottom: 25px;
line-height: 1.6;
font-size: 1.05em;
}
.tricky-note {
color: #6b46c1;
font-weight: 600;
margin-bottom: 20px;
padding: 12px;
background: #faf5ff;
border-radius: 8px;
border-left: 4px solid #9f7aea;
}
.choice-buttons {
display: flex;
gap: 15px;
margin-bottom: 20px;
}
.choice-btn {
flex: 1;
padding: 18px 24px;
border: 3px solid #e2e8f0;
background: white;
border-radius: 12px;
cursor: pointer;
font-size: 1.1em;
font-weight: 600;
transition: all 0.3s ease;
display: flex;
align-items: center;
justify-content: center;
gap: 10px;
}
.choice-btn:hover:not(:disabled), .choice-btn:focus {
transform: translateY(-2px);
box-shadow: 0 6px 20px rgba(0, 0, 0, 0.15);
border-color: #667eea;
outline: none;
}
.choice-btn:disabled {
cursor: not-allowed;
opacity: 0.6;
}
.choice-btn.selected-correct {
background: #c6f6d5;
border-color: #48bb78;
color: #22543d;
}
.choice-btn.selected-wrong {
background: #fed7d7;
border-color: #f56565;
color: #742a2a;
}
.choice-btn.not-selected {
opacity: 0.4;
}
.reveal-btn {
width: 100%;
padding: 18px 24px;
border: 3px solid #9f7aea;
background: #faf5ff;
border-radius: 12px;
cursor: pointer;
font-size: 1.1em;
font-weight: 600;
color: #6b46c1;
transition: all 0.3s ease;
margin-bottom: 20px;
}
.reveal-btn:hover, .reveal-btn:focus {
transform: translateY(-2px);
box-shadow: 0 6px 20px rgba(159, 122, 234, 0.3);
background: #f3e8ff;
outline: none;
}
.answer-section {
max-height: 0;
overflow: hidden;
transition: max-height 0.5s ease;
}
.answer-section.visible {
max-height: 800px;
margin-top: 20px;
}
.answer-content {
background: #f7fafc;
padding: 25px;
border-radius: 12px;
border-left: 4px solid #667eea;
}
.result-badge {
display: inline-block;
padding: 8px 16px;
border-radius: 20px;
font-weight: 700;
font-size: 0.9em;
margin-bottom: 15px;
}
.result-badge.correct {
background: #c6f6d5;
color: #22543d;
}
.result-badge.wrong {
background: #fed7d7;
color: #742a2a;
}
.result-badge.tricky {
background: #e9d8fd;
color: #6b46c1;
}
.answer-label {
font-weight: 700;
color: #2d3748;
margin-bottom: 10px;
font-size: 1.1em;
}
.answer-text {
color: #4a5568;
line-height: 1.7;
margin-bottom: 20px;
}
.implementation-box {
background: white;
padding: 20px;
border-radius: 8px;
margin-bottom: 15px;
border-left: 4px solid #e2e8f0;
}
.implementation-box.app-impl {
border-left-color: #e2e8f0;
}
.implementation-box.agent-impl {
border-left-color: #e2e8f0;
}
.quiz-card[data-answer="app"] .implementation-box.app-impl,
.quiz-card.answered-tricky .implementation-box.app-impl,
.quiz-card.answered-tricky .implementation-box.agent-impl {
border-left-color: #48bb78;
}
.quiz-card[data-answer="agent"] .implementation-box.agent-impl {
border-left-color: #48bb78;
}
.impl-title {
font-weight: 700;
margin-bottom: 8px;
color: #2d3748;
}
.impl-text {
color: #4a5568;
line-height: 1.6;
}
.key-indicator {
background: #fff5e6;
padding: 18px;
border-radius: 8px;
margin-top: 15px;
border-left: 4px solid #f6ad55;
}
.key-indicator-label {
font-weight: 700;
color: #c05621;
margin-bottom: 8px;
font-size: 0.95em;
}
.key-indicator-text {
color: #744210;
line-height: 1.6;
}
#load-more-btn:focus {
outline: none;
box-shadow: 0 4px 15px rgba(102, 126, 234, 0.4);
border-color: #434190;
}
@media (max-width: 768px) {
h1 {
font-size: 2em;
}
.quiz-card {
padding: 20px;
}
.choice-buttons {
flex-direction: column;
}
}
footer {
text-align: center;
margin-top: 50px;
padding-bottom: 30px;
color: rgba(255, 255, 255, 0.8);
font-size: 0.9em;
}
footer p {
line-height: 1.6;
}
footer a {
color: white;
font-weight: 600;
text-decoration: none;
transition: all 0.2s ease;
border-bottom: 1px solid rgba(255, 255, 255, 0.5);
padding: 2px 1px;
}
footer a:hover {
background-color: rgba(255, 255, 255, 0.1);
border-bottom-color: white;
}
</style>
</head>
<body>
<div class="container">
<div id="mode-selection" style="text-align: center; padding: 60px 20px;">
<h1 style="color: white; font-size: 3em; margin-bottom: 20px; text-shadow: 2px 2px 4px rgba(0, 0, 0, 0.2);">π€ App or Agent Quiz</h1>
<p style="color: white; font-size: 1.3em; margin-bottom: 40px; opacity: 0.95;">Read the spec. Decide how to build it.</p>
<div style="display: flex; gap: 20px; justify-content: center; flex-wrap: wrap; max-width: 600px; margin: 0 auto;">
<button onclick="startQuiz(10)" style="
flex: 1;
min-width: 250px;
padding: 30px;
background: white;
border: none;
border-radius: 16px;
cursor: pointer;
transition: all 0.3s ease;
box-shadow: 0 10px 30px rgba(0, 0, 0, 0.2);
">
<div style="font-size: 2em; margin-bottom: 10px;">β</div>
<div style="font-size: 1.3em; font-weight: 700; color: #667eea; margin-bottom: 8px;">Quick Practice</div>
<div style="color: #4a5568; font-size: 0.95em;">10 questions β’ ~5 minutes</div>
</button>
<button id="full-challenge-btn" style="
flex: 1;
min-width: 250px;
padding: 30px;
background: white;
border: none;
border-radius: 16px;
cursor: pointer;
transition: all 0.3s ease;
box-shadow: 0 10px 30px rgba(0, 0, 0, 0.2);
">
<div style="font-size: 2em; margin-bottom: 10px;">π―</div>
<div style="font-size: 1.3em; font-weight: 700; color: #667eea; margin-bottom: 8px;">Full Challenge</div>
<div id="full-challenge-details" style="color: #4a5568; font-size: 0.95em;">Loading...</div>
</button>
</div>
</div>
<header style="display: none;">
<h1>π€ App or Agent Quiz</h1>
<div class="subtitle">Read the spec. Decide how to build it.</div>
<div class="stats">
<div class="stat-card">
<span class="stat-number" id="score">0</span>
<span class="stat-label">Correct</span>
</div>
<div class="stat-card">
<span class="stat-number" id="answered">0</span>
<span class="stat-label">Answered</span>
</div>
<div class="stat-card">
<span class="stat-number" id="total-count">0</span>
<span class="stat-label">Total</span>
</div>
</div>
<div class="progress-bar">
<div class="progress-fill" id="progress-fill"></div>
</div>
</header>
<div id="quiz-container"></div>
<div id="load-more-container" style="text-align: center; margin: 40px 0; display: none;">
<button id="load-more-btn" onclick="loadMoreQuestions()" style="
padding: 20px 40px;
background: white;
border: 3px solid #667eea;
border-radius: 12px;
font-size: 1.2em;
font-weight: 600;
color: #667eea;
cursor: pointer;
transition: all 0.3s ease;
box-shadow: 0 4px 15px rgba(102, 126, 234, 0.2);
">
π― Continue with full quiz
</button>
</div>
</div>
<footer>
<p>
A project by <a href="https://github.com/bm-github" target="_blank" rel="noopener noreferrer">bm-github</a>.
<br>
View the source code on <a href="https://github.com/bm-github/apporagent" target="_blank" rel="noopener noreferrer">GitHub</a>.
</p>
</footer>
<script>
let quizData = [{
spec: "Build a tool that extracts structured data from receipts",
context: "Users upload photos of receipts. The tool should extract merchant name, date, total amount, and line items into a structured format.",
answer: "app",
reasoning: "This is a straightforward transformation task: image β structured data. There's no decision-making, no goal-seeking, no uncertainty about what to do next. It performs OCR (likely using a dedicated OCR service like Textract or Tesseract, not an expensive vision model), parses the text, and outputs JSON.",
appApproach: "Use OCR service β parse text with regex/rules β validate format β return structured JSON",
agentApproach: "Not needed. The task doesn't require planning, iteration, or decisions about what to do.",
keyIndicator: "π One-way transformation with a clear input and output. No 'what should I do next?' decisions."
}, {
spec: "Build a system to manage customer support tickets from start to resolution",
context: "When a ticket comes in, the system should gather necessary information, determine which team should handle it, escalate when needed, and track through to resolution.",
answer: "agent",
reasoning: "This requires multi-step decision-making: What information is missing? Should I ask the customer or check internal systems? Which team is best suited? Is this urgent? Has enough time passed to escalate? Is the issue actually resolved? The system needs to form a plan, adapt based on responses, and determine when its goal (resolved ticket) is achieved.",
appApproach: "Could build a simple ticket router that classifies and assigns based on keywords, but won't handle the full end-to-end flow described in the spec.",
agentApproach: "System maintains context across interactions β decides what info to gather β routes intelligently β monitors progress β escalates based on reasoning β verifies resolution",
keyIndicator: "π Goal-seeking behaviour ('get this ticket resolved'), multi-step decisions, adapts based on context and responses."
}, {
spec: "Create a tool that generates email subject lines",
context: "Given email body content, generate 3-5 subject line options that are attention-grabbing and relevant.",
answer: "app",
reasoning: "This is a single-step transformation: email body β subject lines. The LLM generates options based on the content, but there's no reasoning about strategy, no decisions about whether the email should even be sent, no consideration of recipient preferences. It's a function call that returns output.",
appApproach: "Send email body to LLM with prompt β receive subject line options β return to user",
agentApproach: "Not needed unless the spec required reasoning like 'should this email be sent now or later?' or 'which recipient segment should get which subject line?'",
keyIndicator: "π Stateless transformation. No memory of previous emails, no strategic decisions, just input β output."
}, {
spec: "Build a system that monitors production and handles incidents",
context: "Watch metrics and logs. When something looks wrong, diagnose the issue, attempt fixes, and escalate if needed. Track incidents through resolution.",
answer: "agent",
reasoning: "This requires hypothesis formation ('is this a DB issue or network issue?'), deciding what to investigate next, running diagnostic commands, interpreting results, trying different solutions, and determining when the issue is actually resolved. It's iterative problem-solving with uncertain outcomes.",
appApproach: "Could build alerts and runbooks, but that's just triggering predefined responses, not the adaptive diagnosis described in the spec.",
agentApproach: "Detect anomaly β form hypotheses β decide which logs/metrics to check β interpret findings β try solution β verify success β iterate if needed β escalate if stuck",
keyIndicator: "π Diagnostic reasoning, iterative experimentation, 'done' criteria that the system determines itself."
}, {
spec: "Create a documentation search tool",
context: "Engineers should be able to ask questions and get relevant documentation snippets. Handle queries like 'how do I authenticate?' or 'what's the rate limit?'",
answer: "app",
reasoning: "This is semantic search with retrieval: question β find relevant docs β return snippets. While it uses an LLM for embeddings and possibly reranking, there's no ongoing decision-making. It doesn't ask clarifying questions, doesn't form a learning plan, doesn't decide the engineer needs something different than what they asked for.",
appApproach: "Query β generate embedding β search vector DB β retrieve and rank relevant chunks β return to user",
agentApproach: "Would involve reasoning like: 'They asked about auth, but based on their role and recent searches, they probably need the OAuth flow specifically, not API keys. Let me ask a clarifying question...'",
keyIndicator: "π Match and retrieve. If it just finds and returns, it's an app. If it reasons about what the user really needs, it's an agent."
}, {
spec: "Build a code review assistant",
context: "When a PR is opened, review the code and provide feedback on potential issues, best practices, and suggestions for improvement.",
answer: "tricky",
reasoning: "This depends on the depth of review required. If it's checking linting rules, common bug patterns, and style guides β that's an App (pattern matching). If it's evaluating architectural decisions, understanding trade-offs, considering team context, and reasoning about 'is this the right approach for the problem?' β that's an Agent.",
appApproach: "Run linters β check against known bug patterns β flag style violations β surface results",
agentApproach: "Understand the PR's purpose β evaluate if the approach fits the problem β reason about trade-offs β consider broader codebase context β provide architectural feedback β ask questions if intent is unclear",
keyIndicator: "π Pattern matching = App. Architectural reasoning and trade-off evaluation = Agent. The spec's phrase 'best practices and suggestions' could go either way!"
}, {
spec: "Build a weekly competitor intelligence report generator",
context: "Every week, produce a report on what our competitors are doing: new features, pricing changes, market moves, hiring, etc.",
answer: "agent",
reasoning: "This requires deciding what to investigate, which sources to check, evaluating credibility, determining what's worth including, synthesising findings, and knowing when enough information has been gathered. It's research with judgement calls at every step.",
appApproach: "Could scrape specific URLs on a schedule and dump changes, but that's just monitoring, not the intelligence analysis the spec describes.",
agentApproach: "Determine what to investigate this week β decide which sources to check β evaluate credibility β ask follow-up questions based on findings β synthesise into coherent narrative β determine what's actionable",
keyIndicator: "π Information foraging with judgement ('what's important?'), synthesis, and knowing when to stop researching."
}, {
spec: "Create a meeting summarisation tool",
context: "Record meetings and generate concise summaries with key decisions, action items, and important discussion points.",
answer: "app",
reasoning: "This is transcription β summarisation. The LLM condenses the transcript and extracts structured information (decisions, action items). There's no planning involved, no decisions about what to do, no iteration. The output is deterministic given the input.",
appApproach: "Transcribe audio β send transcript to LLM with prompt β extract key points, decisions, action items β format and return",
agentApproach: "Not needed unless the spec required follow-up like 'determine if action items from last meeting were completed' or 'decide which attendees need which information'.",
keyIndicator: "π One-pass transformation of audio β text β summary. No decisions about 'what should happen next?'"
}, {
spec: "Build an email inbox organiser",
context: "Automatically organise incoming emails into folders, mark priority emails, and surface what's important.",
answer: "tricky",
reasoning: "This could be either depending on implementation. If it's rule-based classification (sender, keywords β folder) β App. If it understands your priorities, learns from your behaviour, reasons about urgency based on context ('this looks routine but mentions tomorrow's deadline') β Agent.",
appApproach: "Classify emails by sender/subject/keywords β apply rules β file into folders β mark anything from VIP list as priority",
agentApproach: "Learn user's priorities over time β reason about context and urgency β understand 'this email seems unimportant but actually requires action' β adapt based on user feedback",
keyIndicator: "π Rule-based sorting = App. Reasoning about priorities and context = Agent. The word 'important' in the spec is the tell β who decides what's important?"
}, {
spec: "Create a test data generator for our API",
context: "Given our API schema, generate realistic test data that satisfies all constraints and relationships between entities.",
answer: "app",
reasoning: "This is schema β valid data generation. While it might use an LLM to create realistic-looking names, addresses, etc., there's no decision-making or goal-seeking. It's a function: read schema, generate data that satisfies constraints, return.",
appApproach: "Parse schema β identify constraints β generate random data that satisfies them β validate β return",
agentApproach: "Not needed. The task has a clear algorithm: generate valid data. No uncertainty about what to do.",
keyIndicator: "π Deterministic generation task with clear constraints. No 'figure out what to do' aspect."
}, {
spec: "Build a system that optimises our cloud infrastructure costs",
context: "Analyse our cloud spending and make changes to reduce costs while maintaining performance. Should handle rightsizing, reserved instances, spot instances, etc.",
answer: "agent",
reasoning: "This requires analysing usage patterns, forming hypotheses about optimisation opportunities, evaluating trade-offs (cost vs. performance vs. reliability), making changes, monitoring impact, and iterating. It's multi-objective optimisation with reasoning at each step.",
appApproach: "Could generate a report with recommendations, but actually 'making changes' and 'optimising' requires the decision-making the spec describes.",
agentApproach: "Analyse spending β identify opportunities β evaluate trade-offs β propose changes β implement β monitor impact β adjust if needed β iterate",
keyIndicator: "π Multi-objective optimisation with trade-offs, experimentation, and iteration. The word 'optimise' often signals agent behaviour."
}, {
spec: "Create a sentiment analysis tool for customer reviews",
context: "Analyse customer reviews and classify them as positive, negative, or neutral. Include confidence scores.",
answer: "app",
reasoning: "This is classification: review text β sentiment label. While you might use an LLM, there's no decision-making beyond the classification itself. It's a stateless transformation that outputs a label and confidence score.",
appApproach: "Send review to LLM or classification model β receive sentiment and confidence β return result",
agentApproach: "Not needed. Classification tasks are transformations, not goal-seeking behaviour.",
keyIndicator: "π Fixed classification task. Input β output, no decisions about what to do with the result."
}, {
spec: "Build a password strength checker",
context: "Users enter a password and get feedback on its strength with specific recommendations for improvement (add numbers, special characters, etc.).",
answer: "app",
reasoning: "This is pure validation: password string β strength score + suggestions. It applies deterministic rules (length checks, character variety, common password database lookups). No decisions about what to do next, just evaluate and return feedback.",
appApproach: "Check password against rules β compare to breach databases β calculate strength score β generate specific improvement suggestions β return results",
agentApproach: "Not needed. The evaluation criteria are fixed and the output is purely based on the input string.",
keyIndicator: "π Rule-based validation with deterministic output. No context or goals beyond the single evaluation."
}, {
spec: "Create a personal research assistant for investment decisions",
context: "Help users research potential investments by gathering data from multiple sources, analysing trends, checking fundamentals, and providing a reasoned recommendation.",
answer: "agent",
reasoning: "This requires deciding what to research, which sources are credible, what metrics matter for this specific investment, reasoning about risk vs. reward, and synthesising findings into a recommendation. Each research path depends on what was learned previously.",
appApproach: "Could pull predetermined metrics from APIs and display them, but that's just data retrieval, not the research and reasoning the spec describes.",
agentApproach: "Determine research strategy β gather data from multiple sources β evaluate credibility β identify what's missing β dig deeper based on findings β reason about risks and opportunities β synthesise recommendation",
keyIndicator: "π Multi-step research with adaptive strategy. The system decides what to investigate based on what it learns."
}, {
spec: "Build a log parsing and formatting tool",
context: "Take raw application logs in various formats and convert them to a standardised JSON structure with parsed timestamps, log levels, and messages.",
answer: "app",
reasoning: "This is format conversion: raw logs β structured JSON. It applies parsing rules, regex patterns, and format transformations. Even if it uses an LLM to handle unusual formats, there's no decision-making beyond the transformation itself.",
appApproach: "Detect log format β apply parsing rules β extract fields with regex β normalise timestamps β validate structure β output JSON",
agentApproach: "Not needed. This is a stateless transformation with clear input/output expectations.",
keyIndicator: "π Format transformation task. No reasoning about what to do with the logs, just parse and structure."
}, {
spec: "Create a system that onboards new engineers",
context: "Guide new hires through setup, documentation, first tasks, and check-ins. Adapt the pace based on their progress and background. Ensure they're ready to contribute.",
answer: "agent",
reasoning: "This requires assessing the engineer's background, deciding what they need to learn first, monitoring their progress, adjusting the pace, determining when they're ready for more complex tasks, and knowing when onboarding is complete. It's goal-seeking behaviour with constant adaptation.",
appApproach: "Could provide a static checklist of onboarding steps, but that's not the adaptive guidance the spec describes.",
agentApproach: "Assess background β create personalised plan β assign appropriate tasks β monitor progress β adjust difficulty β provide help when stuck β determine readiness β iterate until fully onboarded",
keyIndicator: "π Goal-driven ('get this person productive') with continuous assessment and adaptation based on individual progress."
}, {
spec: "Build a duplicate detection system for user submissions",
context: "When users submit content (posts, tickets, reports), identify if it's substantially similar to existing submissions and flag potential duplicates.",
answer: "app",
reasoning: "This is similarity matching: new submission β find near-duplicates in existing data. It uses embeddings or fuzzy matching to compare content, but there's no reasoning about what to do with duplicates or decision-making beyond the matching itself.",
appApproach: "Generate embedding for new submission β search for similar embeddings β calculate similarity scores β return matches above threshold",
agentApproach: "Not needed unless the spec required decisions like 'should these be merged?' or 'which is the canonical version?'",
keyIndicator: "π Similarity search and retrieval. If it just finds and flags matches, it's an app."
}, {
spec: "Create a content moderation system",
context: "Review user-generated content for policy violations. Should catch obvious issues automatically, but escalate ambiguous cases for human review with relevant context.",
answer: "tricky",
reasoning: "This depends on the level of judgement required. If it's checking against clear rules (banned words, explicit content detection) β App. If it's reasoning about context, understanding nuance, evaluating edge cases, and deciding what's truly policy-violating β Agent.",
appApproach: "Run content through classifiers β check banned word lists β detect explicit content β flag matches β escalate based on confidence thresholds",
agentApproach: "Understand content context β reason about intent β evaluate nuance and edge cases β determine severity β decide if escalation needed β provide reasoning for human reviewers",
keyIndicator: "π Pattern matching and classification = App. Contextual reasoning and judgement calls = Agent. The word 'ambiguous' suggests some agent-like reasoning is expected."
}, {
spec: "Build a dynamic pricing calculator",
context: "Calculate product prices based on current demand, inventory levels, competitor prices, and customer segment. Adjust in real-time to optimise revenue.",
answer: "agent",
reasoning: "This requires continuously monitoring multiple signals, reasoning about trade-offs (higher price vs. conversion rate), experimenting with different price points, learning what works, and adapting strategy. It's optimisation with multi-factor decision-making.",
appApproach: "Could apply a fixed formula based on inputs, but 'optimise revenue' and real-time adaptation requires the strategic reasoning described.",
agentApproach: "Monitor demand signals β evaluate competitor pricing β assess inventory β reason about customer sensitivity β test price points β measure outcomes β adapt strategy β optimise for revenue",
keyIndicator: "π Continuous optimisation with experimentation and learning. The word 'optimise' combined with multiple variables signals agent behaviour."
}, {
spec: "Create a changelog generator from git commits",
context: "Scan git history and generate a human-readable changelog with features, fixes, and breaking changes categorised appropriately.",
answer: "app",
reasoning: "This is git history β formatted changelog. It parses commit messages (possibly using conventional commit patterns), categorises changes, and formats output. While an LLM might improve the descriptions, there's no decision-making or iterationβjust transformation.",
appApproach: "Parse git commits β categorise by type (feat/fix/breaking) β group by category β format descriptions β generate markdown",
agentApproach: "Not needed. The task is extracting and reformatting existing information.",
keyIndicator: "π Data extraction and formatting. Input (commits) β output (changelog), no strategic decisions."
}, {
spec: "Build a system that manages our technical debt",
context: "Track technical debt across the codebase, prioritise what to address based on impact and effort, and create remediation plans. Monitor progress and adjust priorities.",
answer: "agent",
reasoning: "This requires identifying debt, evaluating impact, reasoning about trade-offs (fix now vs. later), prioritising based on team capacity, creating actionable plans, monitoring whether fixes actually worked, and re-prioritising. It's ongoing judgement and adaptation.",
appApproach: "Could identify code issues with static analysis, but the prioritisation, planning, and adaptive management requires the reasoning described.",
agentApproach: "Identify debt β evaluate business impact β assess effort required β reason about priorities β create remediation plan β monitor progress β adjust based on outcomes β iterate",
keyIndicator: "π Continuous prioritisation with trade-off evaluation and adaptive planning. Multiple feedback loops."
}, {
spec: "Create an API response caching layer",
context: "Cache API responses intelligently based on request patterns, invalidate when data changes, and optimise cache hit rates.",
answer: "tricky",
reasoning: "This could be either. If it's rule-based (cache for X minutes, invalidate on updates) β App. If it's learning patterns, reasoning about what should be cached, predicting cache utility, and adapting strategy β Agent.",
appApproach: "Cache responses with TTL β invalidate on write operations β return cached data on hits β standard cache eviction policies",
agentApproach: "Learn request patterns β predict cache value β reason about when to preload β adapt TTL based on data change frequency β optimise strategy based on hit rates",
keyIndicator: "π Rule-based caching = App. Learning and adaptive optimisation = Agent. The word 'intelligently' and 'optimise' suggest more than basic caching."
}, {
spec: "Build an SQL query generator from natural language",
context: "Users describe what data they want in plain English, and the system generates the appropriate SQL query to retrieve it from our database.",
answer: "app",
reasoning: "This is natural language β SQL translation. While it uses an LLM, there's no iteration or decision-making beyond the translation. It converts intent to query syntax and returns it. The user executes it.",
appApproach: "Parse natural language intent β understand database schema β generate SQL query β validate syntax β return query",
agentApproach: "Would involve executing queries, checking results, refining if wrong, reasoning about performance, and iterating until the right data is retrieved.",
keyIndicator: "π One-way translation task. If it just generates the query without executing and verifying results, it's an app."
},
{
spec: "Build a tool to process employee expense reports",
context: "Employees submit receipts and other details for reimbursement. The tool should extract key data, check it against company policy for violations (e.g., spending limits), and flag any non-compliant items.",
answer: "app",
reasoning: "This is a data extraction and validation pipeline. It follows a fixed set of rules (the company policy) to check each expense. There is no long-term goal or complex decision-making; it's a series of deterministic checks: extract -> compare to rule -> pass/fail.",
appApproach: "Use OCR to read receipts β parse amounts and dates β compare against predefined policy rules (e.g., 'meal limit < Β£40') β validate against project codes β flag non-compliant entries for review.",
agentApproach: "Not needed unless the spec required it to *negotiate* with an employee about an out-of-policy expense or *decide* if an exception is warranted based on context (e.g., 'this was an important client dinner').",
keyIndicator: "π Rule-based validation. The logic ('is this expense valid?') is a series of deterministic checks, not strategic reasoning."
},
{
spec: "Create a system to actively manage a user's investment portfolio",
context: "Based on a user's stated risk tolerance and financial goals (e.g., 'retire in 20 years'), the system should select investments, execute trades, rebalance the portfolio over time, and adjust its strategy based on market conditions and news.",
answer: "agent",
reasoning: "This is a classic long-running, goal-seeking task. It requires continuous monitoring, reasoning about trade-offs (risk vs. reward), forming a plan (buy/sell/hold), and adapting that plan based on new, unpredictable information. The goal is to 'optimise' the portfolio, not just perform a single calculation.",
appApproach: "Could build a simple calculator that suggests a static asset allocation based on a quiz, but it would not 'actively manage' or 'adjust strategy' as the spec requires.",
agentApproach: "Define user goal β create initial investment plan β continuously monitor market data & news β reason about potential impacts β decide whether to rebalance, buy, or sell β execute trades β report on progress β repeat.",
keyIndicator: "π Continuous, goal-driven optimisation with adaptive planning. The system decides *what to do* and *when to do it* to achieve a long-term objective."
},
{
spec: "Build a system to detect and handle potentially fraudulent financial transactions",
context: "Monitor real-time transactions for a bank. When a suspicious transaction is identified, the system should investigate further and decide whether to block the transaction and alert the user.",
answer: "tricky",
reasoning: "This hinges on the depth of 'investigate' and 'decide'. A simple system (App) would just use a machine learning model to flag transactions that match known fraud patterns. A more advanced system (Agent) would actively gather more context to make a better, reasoned decision, much like a human fraud analyst.",
appApproach: "A classification model scores each transaction based on features (amount, location, time, merchant type). If the score exceeds a fixed threshold, the transaction is automatically blocked. This is pattern matching.",
agentApproach: "Detect an anomaly β form a hypothesis ('is this person unexpectedly travelling?') β decide what extra data to check (e.g., user's past travel history, recent login locations) β reason about the combined evidence β make a final decision to block or allow.",
keyIndicator: "π Pattern matching vs. investigative reasoning. Simply 'detecting' is often an app; 'investigating' to make a high-stakes decision is often an agent. The spec includes both."
},
{
spec: "Build a sanctions screening tool for customer onboarding",
context: "When a new customer signs up, the tool must check their name and details against global sanctions lists (e.g., OFAC). The system should handle slight variations in names and flag potential matches for review.",
answer: "app",
reasoning: "This is a classic data matching and retrieval task. It takes an input (a name), compares it against a fixed dataset (the sanctions lists) using fuzzy logic, and outputs a result (match/no match/potential match). There is no planning, no goal-seeking, and no multi-step reasoning.",
appApproach: "Normalise the input name β use a fuzzy matching algorithm (like Levenshtein distance) to compare against a database of sanctioned individuals β return any matches that exceed a predefined similarity threshold.",
agentApproach: "Not needed unless the spec required the system to *independently investigate* an ambiguous match by searching for news articles or corporate records to determine if it's the same person, which is beyond simple screening.",
keyIndicator: "π Data matching and retrieval against a fixed dataset. If the primary action is 'check this against a list,' it's an app."
},
{
spec: "Build a system to investigate and triage Anti-Money Laundering (AML) alerts",
context: "Our transaction monitoring system generates thousands of alerts for suspicious activity. This system needs to investigate each alert to determine if it represents a genuine risk that requires a full-blown Suspicious Activity Report (SAR).",
answer: "agent",
reasoning: "An alert is just a starting point. 'Investigating' requires a multi-step, goal-oriented process: forming a hypothesis, deciding what evidence to gather next (e.g., transaction history, related accounts, customer's profile), synthesising the findings, and making a reasoned judgement. This is not a single transformation; it's an iterative inquiry.",
appApproach: "Could build a simple dashboard that pulls pre-defined data points when an alert is clicked, but it wouldn't decide what to look for or make a recommendation. The human would still be the one investigating.",
agentApproach: "Receive alert β plan investigation steps β pull customer's full transaction history β analyse for patterns (e.g., structuring, pass-through activity) β check for links to other high-risk accounts β synthesise a narrative β recommend 'close case' or 'escalate for SAR filing'.",
keyIndicator: "π Investigative reasoning and evidence gathering. The system has to decide *what to look for next* to achieve its goal of triaging the alert."
},
{
spec: "Create a credit risk assessment tool for loan applications",
context: "Evaluate a loan applicant's data to determine their creditworthiness and the likelihood they will default on the loan. The output should be a risk score and a recommendation to approve or deny the application.",
answer: "tricky",
reasoning: "This is a classic 'it depends on the complexity' problem. For a simple consumer loan, this is an App that just runs a quantitative model. For a complex commercial loan, a system that acts more like a human underwriter, reasoning about qualitative factors, would be an Agent.",
appApproach: "Take quantitative inputs (credit score, income, debt-to-income ratio) β feed them into a pre-trained statistical model or a fixed scorecard β output a risk score and a deterministic approve/deny decision based on a threshold.",
agentApproach: "For a business loan, the system would analyse the company's business plan, market trends, and management team experience. It might reason that 'although their current cash flow is tight, their new contract makes them a good risk' or identify missing information and decide to ask for more documents.",
keyIndicator: "π Scorecard calculation vs. holistic judgement. 'Assessing risk' can be a simple, quantitative calculation (App) or a complex, context-aware evaluation that weighs qualitative evidence (Agent)."
},
{
spec: "Create a system that coordinates deployments across microservices",
context: "When deploying updates, determine deployment order based on dependencies, monitor health checks, rollback if issues detected, and ensure zero-downtime transitions.",
answer: "agent",
reasoning: "This requires planning deployment order, monitoring during execution, detecting problems, deciding whether to continue or rollback, potentially retrying, and determining when the deployment is truly successful. It's orchestration with continuous decision-making.",
appApproach: "Could follow a static deployment pipeline, but the adaptive monitoring, decision-making, and rollback logic described requires agent reasoning.",
agentApproach: "Analyse dependencies β plan deployment order β execute in stages β monitor health β detect issues β decide continue/rollback β retry if needed β verify success",
keyIndicator: "π Orchestration with continuous monitoring and adaptive decision-making. Multiple 'what should I do?' decision points."
}];
let score = 0;
let answered = 0;
function shuffleArray(array) {
const shuffled = [...array];
for (let i = shuffled.length - 1; i > 0; i--) {
const j = Math.floor(Math.random() * (i + 1));
[shuffled[i], shuffled[j]] = [shuffled[j], shuffled[i]];
}
return shuffled;
}
function checkIfShouldShowLoadMore() {
if (initialChoice === 10 && answered === quizData.length && quizData.length < fullQuizData.length) {
document.getElementById('load-more-container').style.display = 'block';
}
}
function createQuizCard(item, index) {
const card = document.createElement('div');
card.className = 'quiz-card';
card.dataset.index = index;
card.dataset.answer = item.answer;
const isTricky = item.answer === 'tricky';
const trickyNote = isTricky ? '<div class="tricky-note">β οΈ This one could go either way depending on implementation!</div>' : '';
const buttonsHTML = isTricky ? `
<button class="reveal-btn" onclick="revealTrickyAnswer(${index})">
π Reveal Answer
</button>
` : `
<div class="choice-buttons">
<button class="choice-btn" onclick="selectAnswer(${index}, 'app')">
<span>π§</span> <span>App</span>
</button>
<button class="choice-btn" onclick="selectAnswer(${index}, 'agent')">
<span>π€</span> <span>Agent</span>
</button>
</div>
`;
card.innerHTML = `
<div class="spec-label">π Product Spec</div>
<div class="quiz-title">${item.spec}</div>
<div class="quiz-context">${item.context}</div>
${trickyNote}
${buttonsHTML}
<div class="answer-section">
<div class="answer-content">
<div class="result-message"></div>
<div class="answer-label">Why:</div>
<div class="answer-text">${item.reasoning}</div>
<div class="implementation-box app-impl">
<div class="impl-title">π§ App Approach:</div>
<div class="impl-text">${item.appApproach}</div>
</div>
<div class="implementation-box agent-impl">
<div class="impl-title">π€ Agent Approach:</div>
<div class="impl-text">${item.agentApproach}</div>
</div>
<div class="key-indicator">
<div class="key-indicator-label">π― Key Indicator:</div>
<div class="key-indicator-text">${item.keyIndicator}</div>
</div>
</div>
</div>
`;
return card;
}
function revealTrickyAnswer(index) {
const card = document.querySelector(`[data-index="${index}"]`);
const answerSection = card.querySelector('.answer-section');
const resultMessage = card.querySelector('.result-message');
const revealBtn = card.querySelector('.reveal-btn');
if (card.classList.contains('answered-tricky')) {
return;
}
answered++;
score++;
card.classList.add('answered-tricky');
revealBtn.disabled = true;
revealBtn.style.opacity = '0.5';
revealBtn.style.cursor = 'not-allowed';
resultMessage.innerHTML = '<div class="result-badge tricky">β¨ Both approaches can work!</div>';
answerSection.classList.add('visible');
updateStats();
setTimeout(() => {
answerSection.scrollIntoView({
behavior: 'smooth',
block: 'nearest'
});
}, 100);
}
function selectAnswer(index, choice) {
const card = document.querySelector(`[data-index="${index}"]`);
const correctAnswer = card.dataset.answer;
const buttons = card.querySelectorAll('.choice-btn');
const answerSection = card.querySelector('.answer-section');
const resultMessage = card.querySelector('.result-message');
if (card.classList.contains('answered-correct') || card.classList.contains('answered-wrong')) {
return;
}
const isCorrect = choice === correctAnswer;
answered++;
if (isCorrect) {
score++;
card.classList.add('answered-correct');
} else {
card.classList.add('answered-wrong');
}
buttons.forEach(btn => {
btn.disabled = true;
const btnType = btn.textContent.includes('App') ? 'app' : 'agent';
if (btnType === choice) {
btn.classList.add(isCorrect ? 'selected-correct' : 'selected-wrong');
} else {
btn.classList.add('not-selected');
}
});
if (isCorrect) {
resultMessage.innerHTML = '<div class="result-badge correct">β Correct!</div>';
} else {
resultMessage.innerHTML = `<div class="result-badge wrong">β Not quite β this is an ${correctAnswer}</div>`;
}
answerSection.classList.add('visible');
updateStats();
setTimeout(() => {
answerSection.scrollIntoView({
behavior: 'smooth',
block: 'nearest'
});
}, 100);
}
function updateStats() {
const totalCount = quizData.length;
document.getElementById('score').textContent = score;
document.getElementById('answered').textContent = answered;
document.getElementById('total-count').textContent = totalCount;
const progress = totalCount > 0 ? (answered / totalCount) * 100 : 0;
document.getElementById('progress-fill').style.width = `${progress}%`;
if (answered > 0 && answered === totalCount) {
if (score === totalCount) {
triggerCelebration();
}
checkIfShouldShowLoadMore();
}
}
function renderQuiz() {
const container = document.getElementById('quiz-container');
container.innerHTML = '';
quizData.forEach((item, index) => {
container.appendChild(createQuizCard(item, index));
});
}
function triggerCelebration() {
const message = document.createElement('div');
message.className = 'celebration-message';
message.innerHTML = `
<h2>π Perfect Score! π</h2>
<p>You've mastered App or Agent thinking!</p>
`;
document.body.appendChild(message);
setTimeout(() => {
message.style.animation = 'celebration-pop 0.3s ease-in reverse';
setTimeout(() => message.remove(), 300);
}, 3000);
const colors = ['#667eea', '#764ba2', '#48bb78', '#ed8936', '#f56565', '#9f7aea'];
for (let i = 0; i < 100; i++) {
setTimeout(() => {
const confetti = document.createElement('div');
confetti.className = 'confetti';
confetti.style.left = Math.random() * 100 + '%';
confetti.style.backgroundColor = colors[Math.floor(Math.random() * colors.length)];
confetti.style.animationDuration = (Math.random() * 3 + 2) + 's';
confetti.style.animationDelay = '0s';
document.body.appendChild(confetti);
setTimeout(() => confetti.remove(), 5000);
}, i * 30);
}
}
function loadMoreQuestions() {
const previousCount = quizData.length;
const newCount = fullQuizData.length;
quizData = fullQuizData.slice(0, newCount);
const container = document.getElementById('quiz-container');
for (let i = previousCount; i < newCount; i++) {
container.appendChild(createQuizCard(quizData[i], i));
}
updateStats();
document.getElementById('load-more-container').style.display = 'none';
}
function startQuiz(questionCount) {
document.getElementById('mode-selection').style.display = 'none';