From 914674f5a6927e444bb3fbf74215c09f30b722b7 Mon Sep 17 00:00:00 2001 From: Vamsi Dath Date: Fri, 24 Apr 2026 21:32:51 -0500 Subject: [PATCH 01/13] WIP --- .../urban-workflows/src/NotebookConvertor.ts | 919 ++++++++++++++++++ .../src/adapters/grammarDetection.ts | 40 + .../components/menus/top/UpMenu.module.css | 7 +- .../src/components/menus/top/UpMenu.tsx | 69 +- 4 files changed, 1032 insertions(+), 3 deletions(-) create mode 100644 utk_curio/frontend/urban-workflows/src/NotebookConvertor.ts create mode 100644 utk_curio/frontend/urban-workflows/src/adapters/grammarDetection.ts diff --git a/utk_curio/frontend/urban-workflows/src/NotebookConvertor.ts b/utk_curio/frontend/urban-workflows/src/NotebookConvertor.ts new file mode 100644 index 00000000..78ea2459 --- /dev/null +++ b/utk_curio/frontend/urban-workflows/src/NotebookConvertor.ts @@ -0,0 +1,919 @@ +import { v4 as uuid } from "uuid"; +import { isUtkSpec, isVegaLiteSpec, tryParseJsonObject } from "./adapters/grammarDetection"; +import { NodeType } from "./constants"; + +type JsonValue = string | number | boolean | null | JsonObject | JsonValue[]; +interface JsonObject { + [key: string]: JsonValue; +} + +export interface NotebookCell { + cell_type: "code" | "markdown" | string; + source: string; + metadata?: Record; +} + +export interface Notebook { + cells: NotebookCell[]; + metadata: Record; + nbformat: number; + nbformat_minor: number; +} + +interface TrillNode { + id: string; + type: string; + x: number; + y: number; + content?: string; + in?: string; + out?: string; +} + +interface TrillEdge { + id: string; + source: string; + target: string; + sourceHandle?: string; + targetHandle?: string; +} + +interface TrillDataflow { + nodes: TrillNode[]; + edges: TrillEdge[]; + name: string; + task: string; + timestamp: number; + provenance_id: string; +} + +export interface TrillSpec { + dataflow: TrillDataflow; +} + +interface InputConnection { + source: string; + sourceHandle: string; + bidirectional: boolean; +} + +interface OutputConnection { + target: string; + targetHandle: string; + bidirectional: boolean; +} + +interface GraphNodeInfo { + node: TrillNode; + dependencies: Set; + dependents: Set; + inputs: Record; + outputs: Record; +} + +interface TrillMeta { + id?: string; + type?: string; + in?: string; + out?: string; +} + +export class TrillNotebookConverter { + private executionGraph: Record = {}; + + public trillToNotebook(trillJson: TrillSpec): Notebook { + const nodes = trillJson.dataflow?.nodes ?? []; + const edges = trillJson.dataflow?.edges ?? []; + + this.buildExecutionGraph(nodes, edges); + + const executionOrder = this.topologicalSort(); + const cells: NotebookCell[] = []; + + for (const nodeId of executionOrder) { + const node = this.executionGraph[nodeId]?.node; + if (!node) { + continue; + } + + const cell = this.generateCellForNode(node); + if (cell) { + cells.push(cell); + } + } + + return { + cells, + metadata: { + kernelspec: { + display_name: "Python 3", + language: "python", + name: "python3", + }, + language_info: { + name: "python", + }, + }, + nbformat: 4, + nbformat_minor: 4, + }; + } + + public notebookToTrill(notebook: Partial | Record): TrillSpec { + const rawCells = Array.isArray((notebook as { cells?: unknown[] }).cells) + ? ((notebook as { cells: unknown[] }).cells ?? []) + : []; + + const nodes: TrillNode[] = []; + const edges: TrillEdge[] = []; + const nodeInputs: Record = {}; + const producedByVar: Record = {}; + + let previousNodeId: string | null = null; + const position = { x: 100, y: 100 }; + const importedWorkflowId = uuid(); + + rawCells.forEach((rawCell) => { + const cell = rawCell as Record; + if (cell.cell_type !== "code") { + return; + } + + // Ignore runtime notebook artifacts (outputs/errors/execution state) on import. + void cell.outputs; + void cell.execution_count; + + const source = cell.source; + const code = Array.isArray(source) ? source.join("") : String(source ?? ""); + + const trillMeta = this.extractTrillVariable(code); + + const nodeId = + trillMeta?.id ?? `notebook_cell_${uuid()}`; + const inferredNodeType = this.inferNodeType(code); + const nodeType = + trillMeta?.type ?? inferredNodeType; + const nodeIn = + trillMeta?.in ?? "DEFAULT"; + const nodeOut = + trillMeta?.out ?? "DEFAULT"; + + const codeWithoutMeta = this.removeTrillVariable(code); + const inputVars = this.extractInputVariables(codeWithoutMeta); + const producedVars = this.extractProducedVariables(codeWithoutMeta); + + const cleanCodeBody = this.unwrapCurioNodeExecution(codeWithoutMeta); + const cleanCode = + nodeType === NodeType.VIS_VEGA + ? this.normalizeVegaSpecForCurio(this.extractVegaLiteSpecCode(cleanCodeBody)) + : cleanCodeBody; + + const node: TrillNode = { + id: nodeId, + type: nodeType, + x: position.x, + y: position.y, + content: cleanCode.trim(), + in: nodeIn, + out: nodeOut, + }; + + nodes.push(node); + nodeInputs[nodeId] = inputVars; + for (const producedVar of producedVars) { + producedByVar[producedVar] = nodeId; + } + + previousNodeId = nodeId; + position.y += 150; + }); + + const targetInputCount: Record = {}; + const edgeKeys = new Set(); + + for (const node of nodes) { + const inputs = nodeInputs[node.id] ?? []; + for (const inputVar of inputs) { + const sourceNodeId = producedByVar[inputVar]; + if (!sourceNodeId || sourceNodeId === node.id) { + continue; + } + + const edgeKey = `${sourceNodeId}->${node.id}::${inputVar}`; + if (edgeKeys.has(edgeKey)) { + continue; + } + edgeKeys.add(edgeKey); + + let targetHandle = "in"; + if (node.type === NodeType.MERGE_FLOW) { + const count = targetInputCount[node.id] ?? 0; + targetHandle = `in_${count}`; + targetInputCount[node.id] = count + 1; + } + + edges.push({ + id: `edge_${uuid()}`, + source: sourceNodeId, + sourceHandle: "out", + target: node.id, + targetHandle, + }); + } + } + + if (edges.length === 0) { + let linearPreviousId: string | null = null; + for (const node of nodes) { + if (linearPreviousId) { + edges.push({ + id: `edge_${uuid()}`, + source: linearPreviousId, + sourceHandle: "out", + target: node.id, + targetHandle: "in", + }); + } + linearPreviousId = node.id; + } + } + + return { + dataflow: { + nodes, + edges, + name: "Imported Notebook", + task: "", + timestamp: Date.now(), + provenance_id: importedWorkflowId, + }, + }; + } + + private buildExecutionGraph(nodes: TrillNode[], edges: TrillEdge[]): void { + this.executionGraph = {}; + + for (const node of nodes) { + this.executionGraph[node.id] = { + node, + dependencies: new Set(), + dependents: new Set(), + inputs: {}, + outputs: {}, + }; + } + + for (const edge of edges) { + const source = edge.source; + const target = edge.target; + + if (!this.executionGraph[source] || !this.executionGraph[target]) { + continue; + } + + const sourceInfo = this.executionGraph[source]; + const targetInfo = this.executionGraph[target]; + + targetInfo.dependencies.add(source); + sourceInfo.dependents.add(target); + + const targetHandle = edge.targetHandle ?? "in"; + const sourceHandle = edge.sourceHandle ?? "out"; + + if (!targetInfo.inputs[targetHandle]) { + targetInfo.inputs[targetHandle] = []; + } + + targetInfo.inputs[targetHandle].push({ + source, + sourceHandle, + bidirectional: false, + }); + + if (!sourceInfo.outputs[sourceHandle]) { + sourceInfo.outputs[sourceHandle] = []; + } + + sourceInfo.outputs[sourceHandle].push({ + target, + targetHandle, + bidirectional: false, + }); + } + } + + private topologicalSort(): string[] { + const visited = new Set(); + const visiting = new Set(); + const result: string[] = []; + + const visit = (nodeId: string): void => { + if (visiting.has(nodeId)) { + throw new Error("Circular dependency detected"); + } + + if (visited.has(nodeId)) { + return; + } + + visiting.add(nodeId); + + for (const dep of this.executionGraph[nodeId].dependencies) { + visit(dep); + } + + visiting.delete(nodeId); + visited.add(nodeId); + result.push(nodeId); + }; + + for (const nodeId of Object.keys(this.executionGraph)) { + visit(nodeId); + } + + return result; + } + + private generateCellForNode(node: TrillNode): NotebookCell | null { + const nodeType = node.type; + const nodeId = node.id; + const nodeInfo = this.executionGraph[nodeId]; + + if (!nodeInfo) { + return null; + } + + let code = ""; + + if (nodeType === NodeType.DATA_LOADING) { + code = this.generateDataLoadingCode(node); + } else if (nodeType === NodeType.MERGE_FLOW) { + code = this.generateMergeFlowCode(node, nodeInfo); + } else if (nodeType === NodeType.DATA_POOL) { + code = this.generateDataPoolCode(node, nodeInfo); + } else if (nodeType === NodeType.DATA_SUMMARY) { + code = this.generateDataSummaryCode(node, nodeInfo); + } else if (nodeType === NodeType.VIS_VEGA) { + code = this.generateVegaVisualizationCode(node, nodeInfo); + } else if (nodeType === NodeType.VIS_UTK) { + code = this.generateUtkVisualizationCode(node, nodeInfo); + } else { + code = this.generateComputationCode(node, nodeInfo); + } + + const nodeMeta = `__trill_node__ = {\n "id": "${nodeId}",\n "type": "${nodeType}",\n "in": "${node.in ?? "DEFAULT"}",\n "out": "${node.out ?? "DEFAULT"}"\n}\n\n`; + + return { + cell_type: "code", + source: nodeMeta + code, + metadata: { + id: nodeId, + language: "python", + nodeId, + nodeType, + in: node.in ?? "DEFAULT", + out: node.out ?? "DEFAULT", + }, + }; + } + + private wrapNodeExecution(body: string, outputVars: string[], displayVar?: string): string { + const uniqueOutputVars = Array.from(new Set(outputVars)); + const primaryOutput = uniqueOutputVars[0] ?? "result"; + + const successAssignments = [ + `${primaryOutput} = _curio_output`, + ...uniqueOutputVars.slice(1).map((outputVar) => `${outputVar} = ${primaryOutput}`), + ].join("\n "); + + const fallbackAssignments = uniqueOutputVars + .map((outputVar) => `${outputVar} = None`) + .join("\n "); + + const displayBlock = displayVar + ? `\nfrom IPython.display import display\ndisplay(${displayVar})\n` + : ""; + + return `def _curio_node():\n\n${this.indent(body, 4)}\n\n_curio_output = _curio_node()\n\ntry:\n ${successAssignments}\nexcept NameError:\n ${fallbackAssignments}\n${displayBlock}`; + } + + private generateDataLoadingCode(node: TrillNode): string { + const code = node.content ?? ""; + const outputs = this.getOutputVariables(node.id); + return this.wrapNodeExecution(code, outputs); + } + + private generateComputationCode(node: TrillNode, nodeInfo: GraphNodeInfo): string { + const code = node.content ?? ""; + const inputs = this.getInputVariables(nodeInfo); + const outputs = this.getOutputVariables(node.id); + + const inputLines = inputs.map((value, index) => `input_${index} = ${value}`).join("\n"); + + let argBlock = ""; + if (inputs.length === 1) { + argBlock = "arg = input_0\n"; + } else if (inputs.length > 1) { + argBlock = `arg = [${inputs.map((_, index) => `input_${index}`).join(", ")}]\n`; + } + + const body = `${inputLines}\n${argBlock}\n${code}\n`; + return this.wrapNodeExecution(body, outputs); + } + + private generateMergeFlowCode(node: TrillNode, nodeInfo: GraphNodeInfo): string { + const inputs = this.getInputVariables(nodeInfo); + const outputs = this.getOutputVariables(node.id); + + const joinedInputs = inputs.join(",\n"); + const indentedInputs = this.indent(joinedInputs, 4); + + const body = `\ninputs = [\n${indentedInputs}\n]\n\nmerged_inputs = [i for i in inputs if i is not None]\n\nreturn merged_inputs\n`; + return this.wrapNodeExecution(body, outputs); + } + + private generateDataPoolCode(node: TrillNode, nodeInfo: GraphNodeInfo): string { + const inputs = this.getInputVariables(nodeInfo); + const outputs = this.getOutputVariables(node.id); + const source = inputs.length > 0 ? inputs[0] : "None"; + + const body = `\nreturn ${source}\n`; + return this.wrapNodeExecution(body, outputs); + } + + private generateDataSummaryCode(node: TrillNode, nodeInfo: GraphNodeInfo): string { + const code = node.content ?? ""; + const inputs = this.getInputVariables(nodeInfo); + const outputs = this.getOutputVariables(node.id); + + const inputLines = inputs.map((value, index) => `input_${index} = ${value}`).join("\n"); + + let argBlock = ""; + if (inputs.length === 1) { + argBlock = "arg = input_0\n"; + } else if (inputs.length > 1) { + argBlock = `arg = [${inputs.map((_, index) => `input_${index}`).join(", ")}]\n`; + } + + const body = `${inputLines}\n${argBlock}\n${code}\n`; + const primaryOutput = outputs[0] ?? "_curio_output"; + return this.wrapNodeExecution(body, outputs, primaryOutput); + } + + private generateVegaVisualizationCode(node: TrillNode, nodeInfo: GraphNodeInfo): string { + const code = this.normalizeVegaSpecForNotebook(node.content ?? ""); + const inputs = this.getInputVariables(nodeInfo); + + const inputVar = inputs.length > 0 ? inputs[0] : "None"; + const outputs = this.getOutputVariables(node.id); + + const body = `\ninput_data = ${inputVar}\n\nspec = ${code.trim()}\n\nvalues = input_data\nif hasattr(input_data, "to_dict"):\n values = input_data.to_dict(orient="records")\n\nif isinstance(spec, dict):\n spec["data"] = {"values": values}\n\nfrom IPython.display import display\ndisplay({"application/vnd.vegalite.v5+json": spec, "text/plain": spec}, raw=True)\n\nreturn input_data\n`; + return this.wrapNodeExecution(body, outputs); + } + + private generateUtkVisualizationCode(node: TrillNode, nodeInfo: GraphNodeInfo): string { + const code = this.ensureUtkImport(node.content ?? ""); + const inputs = this.getInputVariables(nodeInfo); + const outputs = this.getOutputVariables(node.id); + + const inputLines = inputs.map((value, index) => `input_${index} = ${value}`).join("\n"); + + let argBlock = ""; + if (inputs.length === 1) { + argBlock = "arg = input_0\n"; + } else if (inputs.length > 1) { + argBlock = `arg = [${inputs.map((_, index) => `input_${index}`).join(", ")}]\n`; + } + + const body = `${inputLines}\n${argBlock}\n${code}\n`; + return this.wrapNodeExecution(body, outputs); + } + + private inferNodeType(code: string): NodeType { + const codeWithoutMeta = this.removeTrillVariable(code); + + const parsedFullSpec = tryParseJsonObject(codeWithoutMeta.trim()); + if (parsedFullSpec) { + if (isUtkSpec(parsedFullSpec)) { + return NodeType.VIS_UTK; + } + + if (isVegaLiteSpec(parsedFullSpec)) { + return NodeType.VIS_VEGA; + } + } + + const parsedVegaSpec = tryParseJsonObject(this.extractVegaLiteSpecCode(codeWithoutMeta)); + if (parsedVegaSpec && isVegaLiteSpec(parsedVegaSpec)) { + return NodeType.VIS_VEGA; + } + + const utkPattern = /(^|\n)\s*(?:from\s+utk\s+import\s+|import\s+utk\b)|\butk\s*\./; + + if (utkPattern.test(codeWithoutMeta)) { + return NodeType.VIS_UTK; + } + + const vegaPattern = /application\/vnd\.vegalite\.v5\+json|\$schema\s*:\s*["']https:\/\/vega\.github\.io\/schema\/vega-lite\//; + if (vegaPattern.test(codeWithoutMeta)) { + return NodeType.VIS_VEGA; + } + + return NodeType.COMPUTATION_ANALYSIS; + } + + private ensureUtkImport(code: string): string { + const utkPattern = /(^|\n)\s*(?:from\s+utk\s+import\s+|import\s+utk\b)|\butk\s*\./; + if (utkPattern.test(code)) { + return code; + } + + const trimmedCode = code.trim(); + if (!trimmedCode) { + return "import utk"; + } + + return `import utk\n\n${trimmedCode}`; + } + + private normalizeVegaSpecForNotebook(specCode: string): string { + return this.replaceKeywordsOutsideStrings(specCode, { + true: "True", + false: "False", + null: "None", + }); + } + + private normalizeVegaSpecForCurio(specCode: string): string { + return this.replaceKeywordsOutsideStrings(specCode, { + True: "true", + False: "false", + None: "null", + }); + } + + private replaceKeywordsOutsideStrings(text: string, replacements: Record): string { + const keys = Object.keys(replacements).sort((a, b) => b.length - a.length); + let result = ""; + let index = 0; + let inString = false; + let quoteChar = ""; + + const isIdentifierChar = (char: string | undefined): boolean => { + if (!char) { + return false; + } + + const code = char.charCodeAt(0); + return ( + (code >= 65 && code <= 90) || + (code >= 97 && code <= 122) || + (code >= 48 && code <= 57) || + char === "_" + ); + }; + + while (index < text.length) { + const char = text[index]; + + if (inString) { + result += char; + + if (char === "\\") { + index += 1; + if (index < text.length) { + result += text[index]; + } + } else if (char === quoteChar) { + inString = false; + quoteChar = ""; + } + + index += 1; + continue; + } + + if (char === '"' || char === "'") { + inString = true; + quoteChar = char; + result += char; + index += 1; + continue; + } + + let replaced = false; + + for (const key of keys) { + if (!text.startsWith(key, index)) { + continue; + } + + const prev = index > 0 ? text[index - 1] : undefined; + const next = index + key.length < text.length ? text[index + key.length] : undefined; + + if (isIdentifierChar(prev) || isIdentifierChar(next)) { + continue; + } + + result += replacements[key]; + index += key.length; + replaced = true; + break; + } + + if (!replaced) { + result += char; + index += 1; + } + } + + return result; + } + + private indent(text: string, spaces: number): string { + const prefix = " ".repeat(spaces); + return text + .split("\n") + .map((line) => (line.trim().length > 0 ? `${prefix}${line}` : line)) + .join("\n"); + } + + private getOutputVariable(nodeId: string, sourceHandle: string = "out"): string { + const nodeType = this.executionGraph[nodeId].node.type; + const safeId = this.sanitizeId(nodeId); + + let baseOutput = ""; + + if (nodeType === NodeType.DATA_LOADING) { + baseOutput = `data_${safeId}`; + } else if (nodeType === NodeType.MERGE_FLOW) { + baseOutput = `merged_${safeId}`; + } else if (nodeType === NodeType.DATA_POOL) { + baseOutput = `pool_${safeId}`; + } else { + baseOutput = `result_${safeId}`; + } + + if (!sourceHandle || sourceHandle === "out") { + return baseOutput; + } + + return `${baseOutput}_${this.sanitizeId(sourceHandle)}`; + } + + private getOutputVariables(nodeId: string): string[] { + const outputHandles = new Set(["out"]); + const nodeInfo = this.executionGraph[nodeId]; + + for (const outputHandle of Object.keys(nodeInfo.outputs)) { + outputHandles.add(outputHandle); + } + + return Array.from(outputHandles).map((handle) => this.getOutputVariable(nodeId, handle)); + } + + private getInputVariables(nodeInfo: GraphNodeInfo): string[] { + const variables: string[] = []; + + for (const connections of Object.values(nodeInfo.inputs)) { + for (const inputInfo of connections) { + if (!inputInfo.bidirectional) { + variables.push(this.getOutputVariable(inputInfo.source, inputInfo.sourceHandle)); + } + } + } + + return variables; + } + + private sanitizeId(nodeId: string): string { + return nodeId.replace(/[^a-zA-Z0-9]/g, "_"); + } + + private extractTrillVariable(code: string): TrillMeta | null { + const pattern = /__trill_node__\s*=\s*(\{[\s\S]*?\})/; + const match = code.match(pattern); + + if (!match || !match[1]) { + return null; + } + + try { + // Accept Python-style single quotes in older notebook exports. + const normalized = match[1].replace(/'/g, '"'); + const parsed = JSON.parse(normalized) as TrillMeta; + return parsed; + } catch { + return null; + } + } + + private removeTrillVariable(code: string): string { + const pattern = /__trill_node__\s*=\s*\{[\s\S]*?\}\n?/; + return code.replace(pattern, ""); + } + + private unwrapCurioNodeExecution(code: string): string { + const functionMarker = "def _curio_node():"; + const outputMarker = "_curio_output = _curio_node()"; + const tryMarker = "try:"; + const exceptMarker = "except NameError:"; + + const functionStart = code.indexOf(functionMarker); + const outputStart = code.indexOf(outputMarker); + const tryStart = code.indexOf(tryMarker, outputStart >= 0 ? outputStart : 0); + const exceptStart = code.indexOf(exceptMarker, tryStart >= 0 ? tryStart : 0); + + if (functionStart < 0 || outputStart < 0 || tryStart < 0 || exceptStart < 0) { + return code; + } + + const bodyStart = code.indexOf("\n\n", functionStart); + const bodyEnd = code.lastIndexOf("\n\n", outputStart); + + if (bodyStart < 0 || bodyEnd < 0 || bodyEnd <= bodyStart) { + return code; + } + + const body = code.slice(bodyStart + 2, bodyEnd); + const deindentedBody = this.deindent(body, 4).trimEnd(); + return this.stripGeneratedNodePrelude(deindentedBody).trim(); + } + + private stripGeneratedNodePrelude(code: string): string { + const lines = code.split("\n"); + let index = 0; + + while (index < lines.length) { + const line = lines[index].trim(); + + if (line.startsWith("input_") && line.includes(" = ")) { + index += 1; + continue; + } + + if (line.startsWith("arg = ")) { + index += 1; + continue; + } + + break; + } + + let end = lines.length; + + while (end > index) { + const line = lines[end - 1].trim(); + + if (line === "return input_data") { + end -= 1; + continue; + } + + if (line === "") { + end -= 1; + continue; + } + + break; + } + + return lines.slice(index, end).join("\n").trimEnd(); + } + + private extractVegaLiteSpecCode(code: string): string { + const specAssignMarker = "spec ="; + const specAssignStart = code.indexOf(specAssignMarker); + + if (specAssignStart < 0) { + return code; + } + + const objectStart = code.indexOf("{", specAssignStart); + if (objectStart < 0) { + return code; + } + + let depth = 0; + let objectEnd = -1; + + for (let index = objectStart; index < code.length; index += 1) { + const char = code[index]; + + if (char === "{") { + depth += 1; + } else if (char === "}") { + depth -= 1; + if (depth === 0) { + objectEnd = index; + break; + } + } + } + + if (objectEnd < 0) { + return code; + } + + return code.slice(objectStart, objectEnd + 1).trim(); + } + + private extractInputVariables(code: string): string[] { + const variables: string[] = []; + const lines = code.split("\n"); + + for (const rawLine of lines) { + const line = rawLine.trim(); + + if (line.startsWith("input_") && line.includes(" = ")) { + const rhs = line.slice(line.indexOf("=") + 1).trim(); + if (this.isSimpleVariableName(rhs)) { + variables.push(rhs); + } + } + + if (line.startsWith("input_data = ")) { + const rhs = line.slice("input_data = ".length).trim(); + if (this.isSimpleVariableName(rhs)) { + variables.push(rhs); + } + } + } + + return Array.from(new Set(variables)); + } + + private extractProducedVariables(code: string): string[] { + const variables: string[] = []; + const lines = code.split("\n"); + let primaryVar = ""; + + for (const rawLine of lines) { + const line = rawLine.trim(); + + if (line.endsWith("= _curio_output")) { + const lhs = line.slice(0, line.indexOf("=")).trim(); + if (this.isSimpleVariableName(lhs)) { + primaryVar = lhs; + variables.push(lhs); + } + continue; + } + + if (primaryVar && line.endsWith(`= ${primaryVar}`)) { + const lhs = line.slice(0, line.indexOf("=")).trim(); + if (this.isSimpleVariableName(lhs)) { + variables.push(lhs); + } + } + } + + return Array.from(new Set(variables)); + } + + private isSimpleVariableName(value: string): boolean { + if (!value) { + return false; + } + + const first = value.charCodeAt(0); + const startsWithLetterOrUnderscore = + value[0] === "_" || + (first >= 65 && first <= 90) || + (first >= 97 && first <= 122); + + if (!startsWithLetterOrUnderscore) { + return false; + } + + for (let i = 1; i < value.length; i += 1) { + const char = value[i]; + const code = value.charCodeAt(i); + const isAlphaNum = + (code >= 65 && code <= 90) || + (code >= 97 && code <= 122) || + (code >= 48 && code <= 57); + if (!(isAlphaNum || char === "_")) { + return false; + } + } + + return true; + } + + private deindent(text: string, spaces: number): string { + const prefix = " ".repeat(spaces); + return text + .split("\n") + .map((line) => (line.startsWith(prefix) ? line.slice(spaces) : line)) + .join("\n"); + } + + public serializeNotebook(notebook: Notebook): string { + return JSON.stringify(notebook, null, 2); + } + + public serializeTrill(trillJson: TrillSpec): string { + return JSON.stringify(trillJson, null, 2); + } +} diff --git a/utk_curio/frontend/urban-workflows/src/adapters/grammarDetection.ts b/utk_curio/frontend/urban-workflows/src/adapters/grammarDetection.ts new file mode 100644 index 00000000..374ffe68 --- /dev/null +++ b/utk_curio/frontend/urban-workflows/src/adapters/grammarDetection.ts @@ -0,0 +1,40 @@ +type JsonRecord = Record; + +function isRecord(value: unknown): value is JsonRecord { + return typeof value === "object" && value !== null && !Array.isArray(value); +} + +export function tryParseJsonObject(text: string): JsonRecord | null { + try { + const parsed = JSON.parse(text); + return isRecord(parsed) ? parsed : null; + } catch { + return null; + } +} + +export function isVegaLiteSpec(spec: unknown): boolean { + if (!isRecord(spec)) { + return false; + } + + const schema = spec["$schema"]; + if (typeof schema === "string" && schema.includes("vega-lite")) { + return true; + } + + return "mark" in spec && "encoding" in spec; +} + +export function isUtkSpec(spec: unknown): boolean { + if (!isRecord(spec)) { + return false; + } + + const hasGrid = isRecord(spec.grid); + const hasComponents = Array.isArray(spec.components); + const hasKnots = Array.isArray(spec.knots); + const hasMapStyle = Array.isArray(spec.map_style); + + return hasGrid && hasComponents && hasKnots && hasMapStyle; +} diff --git a/utk_curio/frontend/urban-workflows/src/components/menus/top/UpMenu.module.css b/utk_curio/frontend/urban-workflows/src/components/menus/top/UpMenu.module.css index c872b3f6..4a859039 100644 --- a/utk_curio/frontend/urban-workflows/src/components/menus/top/UpMenu.module.css +++ b/utk_curio/frontend/urban-workflows/src/components/menus/top/UpMenu.module.css @@ -51,7 +51,7 @@ background-color: #1E1F23; box-shadow: 0 2px 5px rgba(0, 0, 0, 0.2); z-index: 100; - width: 200px; + width: 400px; border: 1px solid rgba(0, 0, 0, 0.2); border-radius: 0; } @@ -123,3 +123,8 @@ z-index: 200; } +.gridContainer { + display: grid; + grid-template-columns: 1fr 1fr; + gap: 2px; +} diff --git a/utk_curio/frontend/urban-workflows/src/components/menus/top/UpMenu.tsx b/utk_curio/frontend/urban-workflows/src/components/menus/top/UpMenu.tsx index 8da8c313..395a5783 100644 --- a/utk_curio/frontend/urban-workflows/src/components/menus/top/UpMenu.tsx +++ b/utk_curio/frontend/urban-workflows/src/components/menus/top/UpMenu.tsx @@ -4,11 +4,12 @@ import { useNodeActionsContext, useFlowContext } from "../../../providers/FlowPr import { useReactFlow } from "reactflow"; import { useCode } from "../../../hook/useCode"; import { TrillGenerator } from "../../../TrillGenerator"; +import { TrillNotebookConverter } from "../../../NotebookConvertor"; import styles from "./UpMenu.module.css"; import clsx from 'clsx'; import { FontAwesomeIcon } from "@fortawesome/react-fontawesome"; import { - faDatabase, faFileImport, faFileExport, faRobot, + faDatabase, faFileImport, faFileExport, faRobot, faBookOpen, faShareSquare, faTableColumns, faUpRightAndDownLeftFromCenter, faDownLeftAndUpRightToCenter, faCubes, faSitemap, faCircleQuestion } from "@fortawesome/free-solid-svg-icons"; @@ -37,6 +38,7 @@ export default function UpMenu({ const menuBarRef = useRef(null); const loadTrillInputRef = useRef(null); + const loadNotebookInputRef = useRef(null); const { workflowNameRef, workflowName, setWorkflowName, setAllMinimized, allMinimized, expandStatus, setExpandStatus } = useNodeActionsContext(); const { packages } = useFlowContext(); @@ -89,6 +91,27 @@ export default function UpMenu({ setActiveMenu(null); }; + const exportNotebook = () => { + const trill_spec = TrillGenerator.generateTrill(getNodes(), getEdges(), workflowNameRef.current, "", packages); + const converter = new TrillNotebookConverter(); + const notebook = converter.trillToNotebook(trill_spec); + const notebookContent = converter.serializeNotebook(notebook); + + const blob = new Blob([notebookContent], { type: 'application/json' }); + + const link = document.createElement('a'); + link.href = URL.createObjectURL(blob); + link.download = workflowNameRef.current + '.ipynb'; + document.body.appendChild(link); + link.click(); + document.body.removeChild(link); + + // For now, we will just log the specification. (TEMORARy) + console.log("Exporting notebook with specification:", trill_spec); + + setActiveMenu(null); + } + const handleFileUpload = (e: any) => { const file = e.target.files[0]; if (file && file.type === 'application/json') { @@ -108,12 +131,39 @@ export default function UpMenu({ } }; + const handleNotebookUpload = (e: any) => { + const file = e.target.files[0]; + if (!file) { + return; + } + + const reader = new FileReader(); + reader.onload = (event: any) => { + try { + const jsonContent = JSON.parse(event.target.result); + const converter = new TrillNotebookConverter(); + const trillSpec = converter.notebookToTrill(jsonContent); + loadTrill(trillSpec); + } catch (err) { + console.error('Invalid notebook file:', err); + } + }; + reader.onerror = (event: any) => console.error('Error reading notebook file:', event.target.error); + reader.readAsText(file); + }; + const loadTrillFile = () => { setActiveMenu(null); // Defer the click so the input is not unmounted before the dialog opens setTimeout(() => loadTrillInputRef.current?.click(), 0); }; + const loadNotebookFile = () => { + setActiveMenu(null); + // Defer the click so the input is not unmounted before the dialog opens + setTimeout(() => loadNotebookInputRef.current?.click(), 0); + }; + useEffect(() => { const handleClickOutside = (event: MouseEvent) => { if (menuBarRef.current && !menuBarRef.current.contains(event.target as Node)) { @@ -164,7 +214,7 @@ export default function UpMenu({
{activeMenu === 'file' && ( -
+
@@ -173,6 +223,14 @@ export default function UpMenu({
+
+ + +
+
+ + +
)}
@@ -273,6 +331,13 @@ export default function UpMenu({ style={{ display: 'none' }} onChange={handleFileUpload} /> + Date: Fri, 24 Apr 2026 22:55:29 -0500 Subject: [PATCH 02/13] Modified to include changes from useFlowContext --- .../src/components/menus/top/UpMenu.tsx | 523 +++++++++++++----- 1 file changed, 382 insertions(+), 141 deletions(-) diff --git a/utk_curio/frontend/urban-workflows/src/components/menus/top/UpMenu.tsx b/utk_curio/frontend/urban-workflows/src/components/menus/top/UpMenu.tsx index 395a5783..cada8f13 100644 --- a/utk_curio/frontend/urban-workflows/src/components/menus/top/UpMenu.tsx +++ b/utk_curio/frontend/urban-workflows/src/components/menus/top/UpMenu.tsx @@ -1,27 +1,48 @@ -import React, { useState, useRef, useEffect } from "react"; -import { TrillProvenanceWindow, DatasetsWindow, PackageManagerWindow } from "components/menus"; -import { useNodeActionsContext, useFlowContext } from "../../../providers/FlowProvider"; +import React, { useEffect, useRef, useState } from "react"; +import { + DatasetsWindow, + PackageManagerWindow, + TrillProvenanceWindow, +} from "components/menus"; +import { + useFlowContext, + useNodeActionsContext, +} from "../../../providers/FlowProvider"; import { useReactFlow } from "reactflow"; import { useCode } from "../../../hook/useCode"; import { TrillGenerator } from "../../../TrillGenerator"; import { TrillNotebookConverter } from "../../../NotebookConvertor"; import styles from "./UpMenu.module.css"; -import clsx from 'clsx'; +import clsx from "clsx"; import { FontAwesomeIcon } from "@fortawesome/react-fontawesome"; import { - faDatabase, faFileImport, faFileExport, faRobot, faBookOpen, faShareSquare, - faTableColumns, faUpRightAndDownLeftFromCenter, faDownLeftAndUpRightToCenter, - faCubes, faSitemap, faCircleQuestion + faCubes, + faDatabase, + faFileImport, + faFolderOpen, + faFloppyDisk, + faPlus, + faRobot, + faBookOpen, + faShareSquare, + faTableColumns, + faUpRightAndDownLeftFromCenter, + faDownLeftAndUpRightToCenter, + faSitemap, + faCircleQuestion, } from "@fortawesome/free-solid-svg-icons"; -import logo from 'assets/curio-2.png'; -import introJs from 'intro.js'; +import logo from "assets/curio-2.png"; +import { UserMenu } from "components/login/UserMenu"; +import introJs from "intro.js"; import "intro.js/introjs.css"; +import { useNavigate, Link } from "react-router-dom"; +import { useUserContext } from "../../../providers/UserProvider"; export default function UpMenu({ setDashBoardMode, setDashboardOn, dashboardOn, - setAIMode + setAIMode, }: { setDashBoardMode: (mode: boolean) => void; setDashboardOn: (mode: boolean) => void; @@ -34,39 +55,81 @@ export default function UpMenu({ const [datasetsOpen, setDatasetsOpen] = useState(false); const [packagesOpen, setPackagesOpen] = useState(false); const [activeMenu, setActiveMenu] = useState(null); + const [saving, setSaving] = useState(false); const [aiModeOn, setAiModeOn] = useState(false); const menuBarRef = useRef(null); const loadTrillInputRef = useRef(null); const loadNotebookInputRef = useRef(null); - - const { workflowNameRef, workflowName, setWorkflowName, setAllMinimized, allMinimized, expandStatus, setExpandStatus } = useNodeActionsContext(); - const { packages } = useFlowContext(); + const navigate = useNavigate(); + const { skipProjectPage } = useUserContext(); + const { + workflowNameRef, + projectDirty, + cleanCanvas, + saveCurrentProject, + saveAsNewProject, + discardProject, + packages, + } = useFlowContext(); + const { + workflowName, + setWorkflowName, + setAllMinimized, + allMinimized, + expandStatus, + setExpandStatus, + } = useNodeActionsContext(); const { getNodes, getEdges } = useReactFlow(); const { loadTrill } = useCode(); const toggleMenu = (menu: string) => { - setActiveMenu(prev => prev === menu ? null : menu); + setActiveMenu((prev) => (prev === menu ? null : menu)); + }; + + const closeTrillProvenanceModal = () => { + setTrillProvenanceOpen(false); + }; + + const openTrillProvenanceModal = () => { + setTrillProvenanceOpen(true); + setActiveMenu(null); }; - const closeTrillProvenanceModal = () => setTrillProvenanceOpen(false); - const openTrillProvenanceModal = () => { setTrillProvenanceOpen(true); setActiveMenu(null); }; + const closeDatasetsModal = () => { + setDatasetsOpen(false); + }; - const closeDatasetsModal = () => setDatasetsOpen(false); - const openDatasetsModal = () => { setDatasetsOpen(true); setActiveMenu(null); }; + const openDatasetsModal = () => { + setDatasetsOpen(true); + setActiveMenu(null); + }; - const handleNameChange = (e: any) => setWorkflowName(e.target.value); - const handleNameBlur = () => setIsEditing(false); - const handleKeyPress = (e: any) => { if (e.key === "Enter") setIsEditing(false); }; + const handleNameChange = (e: React.ChangeEvent) => { + setWorkflowName(e.target.value); + }; - const openTutorial = () => { setTutorialOpen(true); setActiveMenu(null); }; + const handleNameBlur = () => { + setIsEditing(false); + }; + + const handleKeyPress = (e: React.KeyboardEvent) => { + if (e.key === "Enter") { + setIsEditing(false); + } + }; + + const openTutorial = () => { + setTutorialOpen(true); + setActiveMenu(null); + }; const toggleExpand = () => { - if (expandStatus === 'expanded') { - setExpandStatus('minimized'); + if (expandStatus === "expanded") { + setExpandStatus("minimized"); setAllMinimized(allMinimized + 1); } else { - setExpandStatus('expanded'); + setExpandStatus("expanded"); setAllMinimized(0); } setActiveMenu(null); @@ -78,16 +141,41 @@ export default function UpMenu({ setAIMode(next); }; - const exportTrill = () => { - const trill_spec = TrillGenerator.generateTrill(getNodes(), getEdges(), workflowNameRef.current, "", packages); - const jsonString = JSON.stringify(trill_spec, null, 2); - const blob = new Blob([jsonString], { type: 'application/json' }); - const link = document.createElement('a'); - link.href = URL.createObjectURL(blob); - link.download = workflowNameRef.current + '.json'; - document.body.appendChild(link); - link.click(); - document.body.removeChild(link); + const handleNewWorkflow = () => { + if (projectDirty && !window.confirm("You have unsaved changes. Continue?")) { + return; + } + discardProject(); + cleanCanvas(); + setActiveMenu(null); + navigate("/dataflow/new"); + }; + + const handleSave = async () => { + setSaving(true); + try { + await saveCurrentProject(); + } catch (err: any) { + console.error("Save failed:", err); + alert(`Save failed: ${err?.message || "unknown error"}`); + setSaving(false); + return; + } + setSaving(false); + setActiveMenu(null); + }; + + const handleSaveAs = async () => { + const name = window.prompt("Project name:", workflowNameRef.current); + if (!name) return; + + setSaving(true); + try { + await saveAsNewProject(name); + } catch (err) { + console.error("Save As failed:", err); + } + setSaving(false); setActiveMenu(null); }; @@ -105,51 +193,61 @@ export default function UpMenu({ document.body.appendChild(link); link.click(); document.body.removeChild(link); - - // For now, we will just log the specification. (TEMORARy) console.log("Exporting notebook with specification:", trill_spec); setActiveMenu(null); - } + }; + + const handleFileUpload = (e: React.ChangeEvent) => { + const file = e.target.files?.[0]; - const handleFileUpload = (e: any) => { - const file = e.target.files[0]; - if (file && file.type === 'application/json') { + if (file && file.type === "application/json") { const reader = new FileReader(); - reader.onload = (e: any) => { + + reader.onload = (event: ProgressEvent) => { try { - const jsonContent = JSON.parse(e.target.result); + const jsonContent = JSON.parse(event.target?.result as string); loadTrill(jsonContent); } catch (err) { - console.error('Invalid JSON file:', err); + console.error("Invalid JSON file:", err); + } finally { + setActiveMenu(null); } }; - reader.onerror = (e: any) => console.error('Error reading file:', e.target.error); + + reader.onerror = (event: ProgressEvent) => { + console.error("Error reading file:", event.target?.error); + setActiveMenu(null); + }; + reader.readAsText(file); } else { - console.error('Please select a valid .json file.'); + console.error("Please select a valid .json file."); + setActiveMenu(null); } }; - const handleNotebookUpload = (e: any) => { - const file = e.target.files[0]; - if (!file) { - return; - } + const handleNotebookUpload = (e: React.ChangeEvent) => { + const file = e.target.files?.[0]; - const reader = new FileReader(); - reader.onload = (event: any) => { - try { - const jsonContent = JSON.parse(event.target.result); - const converter = new TrillNotebookConverter(); - const trillSpec = converter.notebookToTrill(jsonContent); - loadTrill(trillSpec); - } catch (err) { - console.error('Invalid notebook file:', err); - } - }; - reader.onerror = (event: any) => console.error('Error reading notebook file:', event.target.error); - reader.readAsText(file); + if (file && (file.type === "application/x-ipynb+json") || file?.name.endsWith(".ipynb")) { + const reader = new FileReader(); + reader.onload = (event: any) => { + try { + const jsonContent = JSON.parse(event.target.result); + const converter = new TrillNotebookConverter(); + const trillSpec = converter.notebookToTrill(jsonContent); + loadTrill(trillSpec); + } catch (err) { + console.error("Invalid notebook file:", err); + } + }; + reader.onerror = (event: any) => console.error("Error reading notebook file:", event.target.error); + reader.readAsText(file); + } else { + console.error("Please select a valid .ipynb file."); + } + setActiveMenu(null); }; const loadTrillFile = () => { @@ -166,87 +264,203 @@ export default function UpMenu({ useEffect(() => { const handleClickOutside = (event: MouseEvent) => { - if (menuBarRef.current && !menuBarRef.current.contains(event.target as Node)) { + if ( + menuBarRef.current && + !menuBarRef.current.contains(event.target as Node) + ) { setActiveMenu(null); } }; + if (activeMenu) { - document.addEventListener('click', handleClickOutside); + document.addEventListener("click", handleClickOutside); } else { - document.removeEventListener('click', handleClickOutside); + document.removeEventListener("click", handleClickOutside); } - return () => document.removeEventListener('click', handleClickOutside); + + return () => { + document.removeEventListener("click", handleClickOutside); + }; }, [activeMenu]); useEffect(() => { - if (tutorialOpen) { - const intro = introJs(); - intro.setOptions({ - steps: [ - { intro: "Welcome to Curio, a framework for urban analytics. Let's take a quick tour to help you get started." }, - { element: '#step-loading', intro: "This is a Data Loading Node. Here, you can create an array for basic datasets or import data from a file. Once loaded, add your code to convert the data into a DataFrame for further analysis." }, - { element: '#step-analysis', intro: "This is a Data Analysis Node. Use it to perform calculations and operations on your dataset, preparing it for visualization." }, - { element: '#step-transformation', intro: "The Data Transformation Node allows you to filter, segment, or restructure your data." }, - { element: '#step-cleaning', intro: "This is a Data Cleaning Node. Use it to refine your dataset by handling missing values, removing outliers, and generating identifiers for data quality purposes." }, - { element: '#step-pool', intro: "This is a Data Pool Node. It enables you to display your processed data in a structured grid format for easy review." }, - { element: '#step-utk', intro: "This is a UTK Node. It renders your data in an interactive 3D environment using UTK." }, - { element: '#step-vega', intro: "This is a Vega-Lite Node. Use it to visualize data in 2D formats (bar charts, scatter plots, and line graphs) using a JSON specification." }, - { element: '#step-image', intro: "The Image Node displays a gallery of images." }, - { element: '#step-merge', intro: "This is a Merge Flow Node. It allows you to combine multiple data streams into a single dataset. Red handles indicate a missing connection, while green handles show that a connection has been established. Note: each handle can only connect to one edge." }, - { element: '#step-final', intro: "That's it! Drag and drop nodes into your workspace and begin exploring your data with Curio." } - ], - showStepNumbers: false, - showProgress: false, - exitOnOverlayClick: false, - tooltipClass: "custom-intro-tooltip", - }); - intro.start(); - setTutorialOpen(false); - } + if (!tutorialOpen) return; + + const intro = introJs(); + intro.setOptions({ + steps: [ + { + intro: "Welcome to Curio, a framework for urban analytics. Let's take a quick tour to help you get started.", + }, + { + element: "#step-loading", + intro: "This is a Data Loading Node. Here, you can create an array for basic datasets or import data from a file. Once loaded, add your code to convert the data into a DataFrame for further analysis.", + }, + { + element: "#step-analysis", + intro: "This is a Data Analysis Node. Use it to perform calculations and operations on your dataset, preparing it for visualization.", + }, + { + element: "#step-transformation", + intro: "The Data Transformation Node allows you to filter, segment, or restructure your data.", + }, + { + element: "#step-cleaning", + intro: "This is a Data Cleaning Node. Use it to refine your dataset by handling missing values, removing outliers, and generating identifiers for data quality purposes.", + }, + { + element: "#step-pool", + intro: "This is a Data Pool Node. It enables you to display your processed data in a structured grid format for easy review.", + }, + { + element: "#step-utk", + intro: "This is a UTK Node. It renders your data in an interactive 3D environment using UTK.", + }, + { + element: "#step-vega", + intro: "This is a Vega-Lite Node. Use it to visualize data in 2D formats (bar charts, scatter plots, and line graphs) using a JSON specification.", + }, + { + element: "#step-image", + intro: "The Image Node displays a gallery of images.", + }, + { + element: "#step-merge", + intro: "This is a Merge Flow Node. It allows you to combine multiple data streams into a single dataset. Red handles indicate a missing connection, while green handles show that a connection has been established. Note: each handle can only connect to one edge.", + }, + { + element: "#step-final", + intro: "That's it! Drag and drop nodes into your workspace and begin exploring your data with Curio.", + }, + ], + showStepNumbers: false, + showProgress: false, + exitOnOverlayClick: false, + tooltipClass: "custom-intro-tooltip", + }); + intro.start(); + setTutorialOpen(false); }, [tutorialOpen]); return ( <> -
- Curio logo + { + (e.target as HTMLInputElement).value = ""; + }} + /> + { + (e.target as HTMLInputElement).value = ""; + }} + /> +
+ + Curio logo + {/* File */}
- - {activeMenu === 'file' && ( -
+ + {activeMenu === "file" && ( +
e.stopPropagation()}> + {!skipProjectPage && ( +
+ + +
+ )}
- -
-
- - -
-
- - -
-
- - +
+ {!skipProjectPage && ( + <> +
+ + +
+
+ + +
+
+
{ + navigate("/projects"); + setActiveMenu(null); + }} + > + + +
+ + )}
)}
{/* View */}
- - {activeMenu === 'view' && ( + + {activeMenu === "view" && (
-
{ setDashBoardMode(!dashboardOn); setDashboardOn(!dashboardOn); setActiveMenu(null); }}> +
{ + setDashBoardMode(!dashboardOn); + setDashboardOn(!dashboardOn); + setActiveMenu(null); + }} + > - +
- - + +
)} @@ -254,12 +468,20 @@ export default function UpMenu({ {/* Data */}
- - {activeMenu === 'data' && ( + + {activeMenu === "data" && (
-
{ setPackagesOpen(true); setActiveMenu(null); }}> +
{ + setPackagesOpen(true); + setActiveMenu(null); + }} + > - +
@@ -271,8 +493,13 @@ export default function UpMenu({ {/* Provenance */}
- - {activeMenu === 'provenance' && ( + + {activeMenu === "provenance" && (
@@ -284,8 +511,10 @@ export default function UpMenu({ {/* Help */}
- - {activeMenu === 'help' && ( + + {activeMenu === "help" && (
@@ -303,6 +532,26 @@ export default function UpMenu({ > + + {/* Curio Jupyter interoperability */} +
+ + {activeMenu === "notebook" && ( +
+
+ + +
+
+ + +
+
+ )} +
+
{/* Editable Workflow Name */} @@ -318,33 +567,25 @@ export default function UpMenu({ className={styles.input} /> ) : ( -

setIsEditing(true)}> +

setIsEditing(true)} + > {workflowName}

)}
- - - setPackagesOpen(false)} /> + setPackagesOpen(false)} + /> ); } From e30ec0666809d9b564d51946e5f02ccdd928da15 Mon Sep 17 00:00:00 2001 From: Vamsi Dath Date: Fri, 24 Apr 2026 22:57:29 -0500 Subject: [PATCH 03/13] Replaced use of reactflow with FlowProvider for nodes and edges --- .../urban-workflows/src/components/menus/top/UpMenu.tsx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/utk_curio/frontend/urban-workflows/src/components/menus/top/UpMenu.tsx b/utk_curio/frontend/urban-workflows/src/components/menus/top/UpMenu.tsx index cada8f13..30270afb 100644 --- a/utk_curio/frontend/urban-workflows/src/components/menus/top/UpMenu.tsx +++ b/utk_curio/frontend/urban-workflows/src/components/menus/top/UpMenu.tsx @@ -8,7 +8,6 @@ import { useFlowContext, useNodeActionsContext, } from "../../../providers/FlowProvider"; -import { useReactFlow } from "reactflow"; import { useCode } from "../../../hook/useCode"; import { TrillGenerator } from "../../../TrillGenerator"; import { TrillNotebookConverter } from "../../../NotebookConvertor"; @@ -71,6 +70,8 @@ export default function UpMenu({ saveAsNewProject, discardProject, packages, + nodes, + edges, } = useFlowContext(); const { workflowName, @@ -80,7 +81,6 @@ export default function UpMenu({ expandStatus, setExpandStatus, } = useNodeActionsContext(); - const { getNodes, getEdges } = useReactFlow(); const { loadTrill } = useCode(); const toggleMenu = (menu: string) => { @@ -180,7 +180,7 @@ export default function UpMenu({ }; const exportNotebook = () => { - const trill_spec = TrillGenerator.generateTrill(getNodes(), getEdges(), workflowNameRef.current, "", packages); + const trill_spec = TrillGenerator.generateTrill(nodes, edges, workflowNameRef.current, "", packages); const converter = new TrillNotebookConverter(); const notebook = converter.trillToNotebook(trill_spec); const notebookContent = converter.serializeNotebook(notebook); From 1eb5e5e0b5680d055790c8671cbed55f0f62a11b Mon Sep 17 00:00:00 2001 From: Vamsi Dath Date: Fri, 24 Apr 2026 23:53:58 -0500 Subject: [PATCH 04/13] removed reduntant class form style --- .../src/components/menus/top/UpMenu.module.css | 6 ------ 1 file changed, 6 deletions(-) diff --git a/utk_curio/frontend/urban-workflows/src/components/menus/top/UpMenu.module.css b/utk_curio/frontend/urban-workflows/src/components/menus/top/UpMenu.module.css index cd579692..1fac69a9 100644 --- a/utk_curio/frontend/urban-workflows/src/components/menus/top/UpMenu.module.css +++ b/utk_curio/frontend/urban-workflows/src/components/menus/top/UpMenu.module.css @@ -154,9 +154,3 @@ top: 12px; z-index: 200; } - -.gridContainer { - display: grid; - grid-template-columns: 1fr 1fr; - gap: 2px; -} From 9d95cd33731fad755f2b3004aa371560dc3effd0 Mon Sep 17 00:00:00 2001 From: Vamsi Dath Date: Sat, 25 Apr 2026 00:19:14 -0500 Subject: [PATCH 05/13] tiny changes in Notebook button UI --- .../src/components/menus/top/UpMenu.module.css | 2 +- .../urban-workflows/src/components/menus/top/UpMenu.tsx | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/utk_curio/frontend/urban-workflows/src/components/menus/top/UpMenu.module.css b/utk_curio/frontend/urban-workflows/src/components/menus/top/UpMenu.module.css index 1fac69a9..b798c3de 100644 --- a/utk_curio/frontend/urban-workflows/src/components/menus/top/UpMenu.module.css +++ b/utk_curio/frontend/urban-workflows/src/components/menus/top/UpMenu.module.css @@ -52,7 +52,7 @@ background-color: #1E1F23; box-shadow: 0 2px 5px rgba(0, 0, 0, 0.2); z-index: 100; - width: 400px; + width: 200px; border: 1px solid rgba(0, 0, 0, 0.2); border-radius: 0; } diff --git a/utk_curio/frontend/urban-workflows/src/components/menus/top/UpMenu.tsx b/utk_curio/frontend/urban-workflows/src/components/menus/top/UpMenu.tsx index 30270afb..70a65e6e 100644 --- a/utk_curio/frontend/urban-workflows/src/components/menus/top/UpMenu.tsx +++ b/utk_curio/frontend/urban-workflows/src/components/menus/top/UpMenu.tsx @@ -536,17 +536,17 @@ export default function UpMenu({ {/* Curio Jupyter interoperability */}
{activeMenu === "notebook" && (
- +
- +
)} From 07c8f30db18b3cb91eb587d3a7a0a086fed40265 Mon Sep 17 00:00:00 2001 From: Vamsi Dath Date: Sun, 26 Apr 2026 22:16:59 -0500 Subject: [PATCH 06/13] notebook examples folder created --- .../example3-original-notebook-NW.ipynb | 172 ++++++++++++++++++ 1 file changed, 172 insertions(+) create mode 100644 docs/examples/notebooks/example3-original-notebook-NW.ipynb diff --git a/docs/examples/notebooks/example3-original-notebook-NW.ipynb b/docs/examples/notebooks/example3-original-notebook-NW.ipynb new file mode 100644 index 00000000..30d4466e --- /dev/null +++ b/docs/examples/notebooks/example3-original-notebook-NW.ipynb @@ -0,0 +1,172 @@ +{ + "cells": [ + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"09c6e03f-117d-45c5-af30-8f71bc3e58b6\",\n \"type\": \"COMPUTATION_ANALYSIS\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n\n\n # computation analysis - clear\r\n\r\n import sys\r\n import os\r\n import re\r\n import torch\r\n\r\n _ROOT = os.getcwd()\r\n _CANDIDATES = [\r\n os.environ.get(\"CITYSURFACES_DIR\"),\r\n os.path.join(_ROOT, \"city-surfaces\"),\r\n os.path.join(_ROOT, \"CitySurfaces\"),\r\n ]\r\n CITYSURFACES_DIR = None\r\n for _p in _CANDIDATES:\r\n if _p and os.path.isfile(os.path.join(_p, \"config.py\")):\r\n CITYSURFACES_DIR = os.path.abspath(_p)\r\n break\r\n if CITYSURFACES_DIR is None:\r\n raise FileNotFoundError(\r\n \"CitySurfaces repo not found (need config.py). Clone \"\r\n \"https://github.com/VIDA-NYU/city-surfaces into ./city-surfaces/ under \"\r\n f\"your Curio launch directory, or set CITYSURFACES_DIR. cwd={_ROOT!r}\"\r\n )\r\n sys.path.insert(0, CITYSURFACES_DIR)\r\n\r\n # CitySurfaces calls logx.msg() during model init; runx requires initialize() first (normally done in val.py main).\r\n import tempfile\r\n from runx.logx import logx\r\n _log_dir = os.path.join(tempfile.gettempdir(), \"curio_citysurfaces_runx\")\r\n logx.initialize(logdir=_log_dir, tensorboard=False, hparams={}, global_rank=0)\r\n\r\n WEIGHTS_DIR = './data/dataset/CitySurfaces_weights'\r\n WEIGHTS_FILE = os.path.join(WEIGHTS_DIR, 'block_c_10classes.pth')\r\n NUM_CLASSES = 10\r\n DEVICE = \"cuda\"\r\n\r\n from config import cfg\r\n cfg.immutable(False)\r\n cfg.DATASET.NUM_CLASSES = NUM_CLASSES\r\n cfg.MODEL.BNFUNC = torch.nn.BatchNorm2d\r\n cfg.MODEL.HRNET_CHECKPOINT = os.path.join(WEIGHTS_DIR, 'hrnetv2_w48_imagenet_pretrained.pth')\r\n cfg.OPTIONS.INIT_DECODER = False\r\n # val.py sets this via assert_and_infer_cfg(); required for network/mynn.py interpolate branches\r\n _m = re.match(r'^([0-9]+\\.[0-9]+)', torch.__version__)\r\n cfg.OPTIONS.TORCH_VERSION = float(_m.group(1)) if _m else 2.0\r\n cfg.immutable(True)\r\n\r\n from network.ocrnet import HRNet_Mscale\r\n\r\n model = HRNet_Mscale(num_classes=NUM_CLASSES, criterion=None).to(DEVICE)\r\n\r\n # PyTorch 2.6+ defaults weights_only=True; CitySurfaces checkpoints need False (trusted local files).\r\n checkpoint = torch.load(WEIGHTS_FILE, map_location=DEVICE, weights_only=False)\r\n state_dict = checkpoint.get('state_dict', checkpoint)\r\n\r\n model_state = model.state_dict()\r\n new_state = {}\r\n for k in model_state:\r\n if k in state_dict and model_state[k].size() == state_dict[k].size():\r\n new_state[k] = state_dict[k]\r\n elif 'module.' + k in state_dict and model_state[k].size() == state_dict['module.' + k].size():\r\n new_state[k] = state_dict['module.' + k]\r\n\r\n model_state.update(new_state)\r\n model.load_state_dict(model_state)\r\n model.eval()\r\n\r\n return \"Pretrained CitySurfaces model loaded (10 classes)\"\n\n\n_curio_output = _curio_node()\n\ntry:\n result_09c6e03f_117d_45c5_af30_8f71bc3e58b6 = _curio_output\nexcept NameError:\n result_09c6e03f_117d_45c5_af30_8f71bc3e58b6 = None\n", + "metadata": { + "id": "09c6e03f-117d-45c5-af30-8f71bc3e58b6", + "language": "python", + "nodeId": "09c6e03f-117d-45c5-af30-8f71bc3e58b6", + "nodeType": "COMPUTATION_ANALYSIS", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"9def5617-0b4e-4afb-afaf-7a567af01f92\",\n \"type\": \"DATA_LOADING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n import geopandas as gpd\r\n # Load neighborhood data\r\n boston = gpd.read_file('./data/dataset/Census2020_BlockGroups.shp').to_crs('EPSG:4326')\r\n return boston\n\n_curio_output = _curio_node()\n\ntry:\n data_9def5617_0b4e_4afb_afaf_7a567af01f92 = _curio_output\nexcept NameError:\n data_9def5617_0b4e_4afb_afaf_7a567af01f92 = None\n", + "metadata": { + "id": "9def5617-0b4e-4afb-afaf-7a567af01f92", + "language": "python", + "nodeId": "9def5617-0b4e-4afb-afaf-7a567af01f92", + "nodeType": "DATA_LOADING", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"c66ae5dc-5727-4dba-9ad7-e0d312cbc1cb\",\n \"type\": \"DATA_LOADING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n import pandas as pd\r\n df = pd.read_csv('./data/dataset/gsv/boston_gsv.csv', names=['status','id','lat','lon'])\r\n sample = df[df['status']=='OK'].sample(100, random_state=42)\r\n return sample\n\n_curio_output = _curio_node()\n\ntry:\n data_c66ae5dc_5727_4dba_9ad7_e0d312cbc1cb = _curio_output\nexcept NameError:\n data_c66ae5dc_5727_4dba_9ad7_e0d312cbc1cb = None\n", + "metadata": { + "id": "c66ae5dc-5727-4dba-9ad7-e0d312cbc1cb", + "language": "python", + "nodeId": "c66ae5dc-5727-4dba-9ad7-e0d312cbc1cb", + "nodeType": "DATA_LOADING", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"aaff4f52-b04b-413f-9f83-7e12fb0acbf0\",\n \"type\": \"COMPUTATION_ANALYSIS\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n input_0 = data_c66ae5dc_5727_4dba_9ad7_e0d312cbc1cb\n arg = input_0\n\n import sys\r\n import os\r\n import re\r\n import torch\r\n import torch.nn.functional as F\r\n import numpy as np\r\n from PIL import Image\r\n from io import BytesIO\r\n import base64\r\n\r\n sample = arg\r\n\r\n _ROOT = os.getcwd()\r\n _CANDIDATES = [\r\n os.environ.get(\"CITYSURFACES_DIR\"),\r\n os.path.join(_ROOT, \"city-surfaces\"),\r\n os.path.join(_ROOT, \"CitySurfaces\"),\r\n ]\r\n CITYSURFACES_DIR = None\r\n for _p in _CANDIDATES:\r\n if _p and os.path.isfile(os.path.join(_p, \"config.py\")):\r\n CITYSURFACES_DIR = os.path.abspath(_p)\r\n break\r\n if CITYSURFACES_DIR is None:\r\n raise FileNotFoundError(\r\n \"CitySurfaces repo not found (need config.py). Clone \"\r\n \"https://github.com/VIDA-NYU/city-surfaces into ./city-surfaces/ under \"\r\n f\"your Curio launch directory, or set CITYSURFACES_DIR. cwd={_ROOT!r}\"\r\n )\r\n sys.path.insert(0, CITYSURFACES_DIR)\r\n\r\n import tempfile\r\n from runx.logx import logx\r\n _log_dir = os.path.join(tempfile.gettempdir(), \"curio_citysurfaces_runx\")\r\n logx.initialize(logdir=_log_dir, tensorboard=False, hparams={}, global_rank=0)\r\n\r\n WEIGHTS_DIR = './data/dataset/CitySurfaces_weights'\r\n WEIGHTS_FILE = os.path.join(WEIGHTS_DIR, 'block_c_10classes.pth')\r\n NUM_CLASSES = 10\r\n DEVICE = 'cuda'\r\n IMAGE_SIZE = 320\r\n\r\n from config import cfg\r\n cfg.immutable(False)\r\n cfg.DATASET.NUM_CLASSES = NUM_CLASSES\r\n cfg.MODEL.BNFUNC = torch.nn.BatchNorm2d\r\n cfg.MODEL.HRNET_CHECKPOINT = os.path.join(WEIGHTS_DIR, 'hrnetv2_w48_imagenet_pretrained.pth')\r\n cfg.OPTIONS.INIT_DECODER = False\r\n _m = re.match(r'^([0-9]+\\.[0-9]+)', torch.__version__)\r\n cfg.OPTIONS.TORCH_VERSION = float(_m.group(1)) if _m else 2.0\r\n cfg.immutable(True)\r\n\r\n from network.ocrnet import HRNet_Mscale\r\n\r\n def compute_uncertainty(predictions):\r\n sorted_probs = np.sort(predictions, axis=1)\r\n highest_prob = sorted_probs[:, -1, :, :]\r\n second_highest_prob = sorted_probs[:, -2, :, :]\r\n uncertainty_margin = highest_prob - second_highest_prob\r\n return 1.0 - uncertainty_margin\r\n\r\n model = HRNet_Mscale(num_classes=NUM_CLASSES, criterion=None).to(DEVICE)\r\n\r\n checkpoint = torch.load(WEIGHTS_FILE, map_location=DEVICE, weights_only=False)\r\n state_dict = checkpoint.get('state_dict', checkpoint)\r\n model_state = model.state_dict()\r\n new_state = {}\r\n for k in model_state:\r\n if k in state_dict and model_state[k].size() == state_dict[k].size():\r\n new_state[k] = state_dict[k]\r\n elif 'module.' + k in state_dict and model_state[k].size() == state_dict['module.' + k].size():\r\n new_state[k] = state_dict['module.' + k]\r\n model_state.update(new_state)\r\n model.load_state_dict(model_state)\r\n model.eval()\r\n\r\n MEAN = torch.tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1).to(DEVICE)\r\n STD = torch.tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1).to(DEVICE)\r\n\r\n color_map = {\r\n 0: (255, 127, 14), # concrete\r\n 1: (43, 160, 43), # bricks\r\n 2: (31, 119, 179), # granite\r\n 3: (153, 153, 153), # asphalt\r\n 4: (214, 39, 40), # mixed\r\n 5: (54, 54, 54), # road\r\n 6: (0, 0, 0), # background\r\n 7: (138, 0, 138), # granite block-stone\r\n 8: (240, 110, 170), # hexagonal\r\n 9: (139, 109, 48), # cobblestone\r\n }\r\n\r\n lats = []\r\n lons = []\r\n uncerts = []\r\n images = []\r\n predicted_images = []\r\n uncert_images = []\r\n for index, row in sample.iterrows():\r\n image_path = './data/dataset/gsv/boston/%s_left.jpg' % row['id']\r\n\r\n pil_image = Image.open(image_path).convert(\"RGB\").resize((IMAGE_SIZE, IMAGE_SIZE))\r\n\r\n image = np.array(pil_image, dtype=np.float32) / 255.0\r\n input_tensor = torch.from_numpy(image.reshape(1, IMAGE_SIZE, IMAGE_SIZE, 3)).permute((0, 3, 1, 2)).to(DEVICE)\r\n input_tensor = (input_tensor - MEAN) / STD\r\n\r\n with torch.no_grad():\r\n output = model({'images': input_tensor})\r\n logits = output['pred']\r\n predictions = F.softmax(logits, dim=1)\r\n\r\n pred_labels = torch.argmax(predictions, dim=1)\r\n pred_array = pred_labels.cpu().numpy()\r\n pred_array = pred_array.reshape((IMAGE_SIZE, IMAGE_SIZE))\r\n pred_pil = Image.new(\"RGB\", (pred_array.shape[1], pred_array.shape[0]))\r\n for i in range(pred_array.shape[0]):\r\n for j in range(pred_array.shape[1]):\r\n pred_pil.putpixel((j, i), color_map[pred_array[i, j]])\r\n\r\n buffered = BytesIO()\r\n pred_pil.save(buffered, format=\"PNG\")\r\n pred_str = base64.b64encode(buffered.getvalue()).decode('utf-8')\r\n\r\n uncertainty_margin = compute_uncertainty(predictions.cpu().detach().numpy())\r\n\r\n uncertainty_array = np.uint8(uncertainty_margin * 255)\r\n uncertainty_array = np.transpose(uncertainty_array, (1, 2, 0))\r\n uncertainty_array = np.squeeze(uncertainty_array, axis=2)\r\n uncertainty_pil = Image.fromarray(uncertainty_array)\r\n\r\n buffered = BytesIO()\r\n uncertainty_pil.save(buffered, format=\"PNG\")\r\n uncertainty_str = base64.b64encode(buffered.getvalue()).decode('utf-8')\r\n\r\n lats.append(row['lat'])\r\n lons.append(row['lon'])\r\n uncerts.append(float(np.average(uncertainty_margin)))\r\n\r\n buffered = BytesIO()\r\n pil_image.save(buffered, format=\"PNG\")\r\n img_str = base64.b64encode(buffered.getvalue()).decode('utf-8')\r\n\r\n images.append(img_str)\r\n predicted_images.append(pred_str)\r\n uncert_images.append(uncertainty_str)\r\n\r\n return (lats, lons, uncerts, images, predicted_images, uncert_images)\n\n\n_curio_output = _curio_node()\n\ntry:\n result_aaff4f52_b04b_413f_9f83_7e12fb0acbf0 = _curio_output\nexcept NameError:\n result_aaff4f52_b04b_413f_9f83_7e12fb0acbf0 = None\n", + "metadata": { + "id": "aaff4f52-b04b-413f-9f83-7e12fb0acbf0", + "language": "python", + "nodeId": "aaff4f52-b04b-413f-9f83-7e12fb0acbf0", + "nodeType": "COMPUTATION_ANALYSIS", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"cf7bf5ef-5ce7-4e26-974b-fb782f84be19\",\n \"type\": \"COMPUTATION_ANALYSIS\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n input_0 = result_aaff4f52_b04b_413f_9f83_7e12fb0acbf0\n arg = input_0\n\n import geopandas as gpd\r\n\r\n lats = arg[0]\r\n lons = arg[1]\r\n uncerts = arg[2]\r\n original_images = arg[3]\r\n predicted_images = arg[4]\r\n uncert_images = arg[5]\r\n\r\n image_content = list(zip(original_images, predicted_images, uncert_images))\r\n\r\n gdf = pd.DataFrame({'lat': lats, 'lon': lons, 'uncertainty': uncerts, 'image_content': image_content})\r\n\r\n gdf['image_id'] = gdf.index\r\n\r\n gdf = gpd.GeoDataFrame(\r\n gdf, geometry=gpd.points_from_xy(gdf.lon, gdf.lat), crs=\"EPSG:4326\"\r\n )\r\n\r\n gdf = gdf.sort_values(by='image_id', ascending=True)\r\n\r\n return gdf\n\n\n_curio_output = _curio_node()\n\ntry:\n result_cf7bf5ef_5ce7_4e26_974b_fb782f84be19 = _curio_output\nexcept NameError:\n result_cf7bf5ef_5ce7_4e26_974b_fb782f84be19 = None\n", + "metadata": { + "id": "cf7bf5ef-5ce7-4e26-974b-fb782f84be19", + "language": "python", + "nodeId": "cf7bf5ef-5ce7-4e26-974b-fb782f84be19", + "nodeType": "COMPUTATION_ANALYSIS", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"7902bce6-4771-4f73-9ee1-d706fc22892f\",\n \"type\": \"MERGE_FLOW\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n\n inputs = [\n\n ]\n\n merged_inputs = [i for i in inputs if i is not None]\n\n return merged_inputs\n\n\n_curio_output = _curio_node()\n\ntry:\n merged_7902bce6_4771_4f73_9ee1_d706fc22892f = _curio_output\nexcept NameError:\n merged_7902bce6_4771_4f73_9ee1_d706fc22892f = None\n", + "metadata": { + "id": "7902bce6-4771-4f73-9ee1-d706fc22892f", + "language": "python", + "nodeId": "7902bce6-4771-4f73-9ee1-d706fc22892f", + "nodeType": "MERGE_FLOW", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"55aa4581-9a68-4257-b2c7-63e3360737e3\",\n \"type\": \"COMPUTATION_ANALYSIS\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n input_0 = merged_7902bce6_4771_4f73_9ee1_d706fc22892f\n arg = input_0\n\n import geopandas as gpd\r\n\r\n boston = arg[0]\r\n gdf = arg[1]\r\n\r\n def agg_to_list(series):\r\n return list(series)\r\n\r\n joined = gpd.sjoin(boston, gdf).groupby('GEOID20').agg({'uncertainty': 'mean', 'image_id': agg_to_list})\r\n boston = boston.set_index('GEOID20')\r\n boston.loc[joined.index,'uncertainty'] = joined['uncertainty']\r\n boston.loc[joined.index,'image_id'] = joined['image_id']\r\n\r\n filtered_boston = boston.loc[joined.index]\r\n\r\n filtered_boston = filtered_boston.rename(columns={'image_id': 'linked'})\r\n\r\n return filtered_boston\n\n\n_curio_output = _curio_node()\n\ntry:\n result_55aa4581_9a68_4257_b2c7_63e3360737e3 = _curio_output\nexcept NameError:\n result_55aa4581_9a68_4257_b2c7_63e3360737e3 = None\n", + "metadata": { + "id": "55aa4581-9a68-4257-b2c7-63e3360737e3", + "language": "python", + "nodeId": "55aa4581-9a68-4257-b2c7-63e3360737e3", + "nodeType": "COMPUTATION_ANALYSIS", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"71ab6de4-b23c-42d9-a2fe-ce5141d285b2\",\n \"type\": \"DATA_CLEANING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n input_0 = result_55aa4581_9a68_4257_b2c7_63e3360737e3\n arg = input_0\n\n import geopandas as gpd\r\n\r\n filtered_boston = arg\r\n\r\n filtered_boston = filtered_boston.loc[:, [filtered_boston.geometry.name, 'uncertainty', 'linked']]\r\n\r\n filtered_boston = filtered_boston.set_crs(4326)\r\n filtered_boston = filtered_boston.to_crs(3395)\r\n\r\n filtered_boston.metadata = {\r\n 'name': 'boston'\r\n }\r\n\r\n return filtered_boston\n\n\n_curio_output = _curio_node()\n\ntry:\n result_71ab6de4_b23c_42d9_a2fe_ce5141d285b2 = _curio_output\nexcept NameError:\n result_71ab6de4_b23c_42d9_a2fe_ce5141d285b2 = None\n", + "metadata": { + "id": "71ab6de4-b23c-42d9-a2fe-ce5141d285b2", + "language": "python", + "nodeId": "71ab6de4-b23c-42d9-a2fe-ce5141d285b2", + "nodeType": "DATA_CLEANING", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"ddae8fb9-82ee-4523-bf07-9184c7fc873f\",\n \"type\": \"VIS_UTK\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n\n\n import utk\n\n {\n \"components\": [\n {\n \"id\": \"grammar_map\",\n \"position\": {\n \"width\": [\n 1,\n 12\n ],\n \"height\": [\n 1,\n 4\n ]\n }\n }\n ],\n \"knots\": [],\n \"ex_knots\": [\n {\n \"id\": \"boston0\",\n \"out_name\": \"boston\",\n \"in_name\": \"uncertainty\"\n }\n ],\n \"grid\": {\n \"width\": 12,\n \"height\": 4\n },\n \"grammar\": false\n }\n\n\n_curio_output = _curio_node()\n\ntry:\n result_ddae8fb9_82ee_4523_bf07_9184c7fc873f = _curio_output\nexcept NameError:\n result_ddae8fb9_82ee_4523_bf07_9184c7fc873f = None\n", + "metadata": { + "id": "ddae8fb9-82ee-4523-bf07-9184c7fc873f", + "language": "python", + "nodeId": "ddae8fb9-82ee-4523-bf07-9184c7fc873f", + "nodeType": "VIS_UTK", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"3c5db6ce-0082-47d7-80b7-1b9534b4726b\",\n \"type\": \"DATA_POOL\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n\n return None\n\n\n_curio_output = _curio_node()\n\ntry:\n pool_3c5db6ce_0082_47d7_80b7_1b9534b4726b = _curio_output\nexcept NameError:\n pool_3c5db6ce_0082_47d7_80b7_1b9534b4726b = None\n", + "metadata": { + "id": "3c5db6ce-0082-47d7-80b7-1b9534b4726b", + "language": "python", + "nodeId": "3c5db6ce-0082-47d7-80b7-1b9534b4726b", + "nodeType": "DATA_POOL", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"f7172aea-3e3d-4c58-b0bc-dc02db24a733\",\n \"type\": \"DATA_POOL\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n\n return None\n\n\n_curio_output = _curio_node()\n\ntry:\n pool_f7172aea_3e3d_4c58_b0bc_dc02db24a733 = _curio_output\nexcept NameError:\n pool_f7172aea_3e3d_4c58_b0bc_dc02db24a733 = None\n", + "metadata": { + "id": "f7172aea-3e3d-4c58-b0bc-dc02db24a733", + "language": "python", + "nodeId": "f7172aea-3e3d-4c58-b0bc-dc02db24a733", + "nodeType": "DATA_POOL", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"a61f234d-78b4-4c3f-9d22-15a2855967b3\",\n \"type\": \"COMPUTATION_ANALYSIS\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n input_0 = pool_f7172aea_3e3d_4c58_b0bc_dc02db24a733\n arg = input_0\n\n df = pd.DataFrame(arg.drop(columns=arg.geometry.name))\r\n df = df[df['interacted'] == '1']\r\n df = df.sort_values(by='uncertainty', ascending=False)\r\n return df.head(20)\n\n\n_curio_output = _curio_node()\n\ntry:\n result_a61f234d_78b4_4c3f_9d22_15a2855967b3 = _curio_output\nexcept NameError:\n result_a61f234d_78b4_4c3f_9d22_15a2855967b3 = None\n", + "metadata": { + "id": "a61f234d-78b4-4c3f-9d22-15a2855967b3", + "language": "python", + "nodeId": "a61f234d-78b4-4c3f-9d22-15a2855967b3", + "nodeType": "COMPUTATION_ANALYSIS", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"e2e0b5d8-a0dc-4860-9a08-203871b0d28f\",\n \"type\": \"VIS_IMAGE\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n input_0 = result_a61f234d_78b4_4c3f_9d22_15a2855967b3\n arg = input_0\n\n\n\n\n_curio_output = _curio_node()\n\ntry:\n result_e2e0b5d8_a0dc_4860_9a08_203871b0d28f = _curio_output\nexcept NameError:\n result_e2e0b5d8_a0dc_4860_9a08_203871b0d28f = None\n", + "metadata": { + "id": "e2e0b5d8-a0dc-4860-9a08-203871b0d28f", + "language": "python", + "nodeId": "e2e0b5d8-a0dc-4860-9a08-203871b0d28f", + "nodeType": "VIS_IMAGE", + "in": "DEFAULT", + "out": "DEFAULT" + } + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file From c7d7181cfa571bb93efb89b50aad4b451b90df5f Mon Sep 17 00:00:00 2001 From: Jaideep Nutalapati Date: Mon, 27 Apr 2026 12:30:41 -0500 Subject: [PATCH 07/13] Added new notebook examples for Curio pipeline --- .../example10-original-notebook-W.ipynb | 112 ++++++ .../example11-original-notebook-W.ipynb | 340 ++++++++++++++++++ .../example5-original-notebook-W.ipynb | 64 ++++ .../example7-original-notebook-W.ipynb | 52 +++ .../example8-original-notebook-W.ipynb | 304 ++++++++++++++++ .../example9-original-notebook-W.ipynb | 52 +++ 6 files changed, 924 insertions(+) create mode 100644 docs/examples/notebooks/example10-original-notebook-W.ipynb create mode 100644 docs/examples/notebooks/example11-original-notebook-W.ipynb create mode 100644 docs/examples/notebooks/example5-original-notebook-W.ipynb create mode 100644 docs/examples/notebooks/example7-original-notebook-W.ipynb create mode 100644 docs/examples/notebooks/example8-original-notebook-W.ipynb create mode 100644 docs/examples/notebooks/example9-original-notebook-W.ipynb diff --git a/docs/examples/notebooks/example10-original-notebook-W.ipynb b/docs/examples/notebooks/example10-original-notebook-W.ipynb new file mode 100644 index 00000000..7b87a3c1 --- /dev/null +++ b/docs/examples/notebooks/example10-original-notebook-W.ipynb @@ -0,0 +1,112 @@ +{ + "cells": [ + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"82537c44-8195-4cd3-a5fa-8a049d53d96e\",\n \"type\": \"DATA_LOADING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n import pandas as pd\n\n df = pd.read_csv(\"/Users/jaideepnutalapati/Documents/GitHub/curio/docs/examples/data/Green_Roofs.csv\")\n return df\n\n_curio_output = _curio_node()\n\ntry:\n data_82537c44_8195_4cd3_a5fa_8a049d53d96e = _curio_output\nexcept NameError:\n data_82537c44_8195_4cd3_a5fa_8a049d53d96e = None\n", + "metadata": { + "id": "82537c44-8195-4cd3-a5fa-8a049d53d96e", + "language": "python", + "nodeId": "82537c44-8195-4cd3-a5fa-8a049d53d96e", + "nodeType": "DATA_LOADING", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"e5e7e21f-609d-496b-b231-659ee91ff9af\",\n \"type\": \"DATA_CLEANING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n input_0 = data_82537c44_8195_4cd3_a5fa_8a049d53d96e\n arg = input_0\n\n import pandas as pd\n\n df = arg\n df.fillna(0, inplace=True)\n\n return df\n\n\n_curio_output = _curio_node()\n\ntry:\n result_e5e7e21f_609d_496b_b231_659ee91ff9af = _curio_output\nexcept NameError:\n result_e5e7e21f_609d_496b_b231_659ee91ff9af = None\n", + "metadata": { + "id": "e5e7e21f-609d-496b-b231-659ee91ff9af", + "language": "python", + "nodeId": "e5e7e21f-609d-496b-b231-659ee91ff9af", + "nodeType": "DATA_CLEANING", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"4226b7ed-c8e7-4acf-873b-2d835d9c4a07\",\n \"type\": \"VIS_VEGA\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n\n input_data = result_e5e7e21f_609d_496b_b231_659ee91ff9af\n\n spec = {\n \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.json\",\n \"description\": \"Histogram of Total Roof Size of Buildings in Chicago (log-scaled)\",\n \"data\": {\n \"name\": \"data\"\n },\n \"transform\": [\n { \"filter\": \"datum.TOTAL_ROOF_SQFT > 0\" },\n {\n \"calculate\": \"log(datum.TOTAL_ROOF_SQFT) / log(10)\",\n \"as\": \"log_roof_size\"\n }\n ],\n \"mark\": \"bar\",\n \"encoding\": {\n \"x\": {\n \"field\": \"log_roof_size\",\n \"bin\": { \"maxbins\": 30 },\n \"axis\": {\n \"title\": \"Total Roof Size (sqft)\",\n \"values\": [3, 4, 5, 6],\n \"labelExpr\": \"'10^' + datum.value\"\n }\n },\n \"y\": {\n \"aggregate\": \"count\",\n \"type\": \"quantitative\",\n \"axis\": {\n \"title\": \"Number of Buildings\"\n }\n }\n }\n }\n\n values = input_data\n if hasattr(input_data, \"to_dict\"):\n values = input_data.to_dict(orient=\"records\")\n\n if isinstance(spec, dict):\n spec[\"data\"] = {\"values\": values}\n\n from IPython.display import display\n display({\"application/vnd.vegalite.v5+json\": spec, \"text/plain\": spec}, raw=True)\n\n return input_data\n\n\n_curio_output = _curio_node()\n\ntry:\n result_4226b7ed_c8e7_4acf_873b_2d835d9c4a07 = _curio_output\nexcept NameError:\n result_4226b7ed_c8e7_4acf_873b_2d835d9c4a07 = None\n", + "metadata": { + "id": "4226b7ed-c8e7-4acf-873b-2d835d9c4a07", + "language": "python", + "nodeId": "4226b7ed-c8e7-4acf-873b-2d835d9c4a07", + "nodeType": "VIS_VEGA", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"92717c5a-2e65-4ca3-9818-d5f73c89f0a9\",\n \"type\": \"DATA_LOADING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n import geopandas as gpd\n import pandas as pd\n from shapely.geometry import Point\n # Read the green roofs dataset\n green_roofs_df = pd.read_csv(\"/Users/jaideepnutalapati/Documents/GitHub/curio/docs/examples/data/Green_Roofs.csv\")\n\n # Create the dataset into geo dataframe using latitude and longitude columns\n geometry = [Point(xy) for xy in zip(green_roofs_df['LONGITUDE'], green_roofs_df['LATITUDE'])]\n green_roofs_df = gpd.GeoDataFrame(green_roofs_df, geometry=geometry, crs=4326)\n chicago = gpd.read_file(\"/Users/jaideepnutalapati/Documents/GitHub/curio/docs/examples/data/Green_Roofs/chicago.geojson\")\n\n # Joining the green roofs dataset with the chicago neighborhood geojson file\n joined = gpd.sjoin(green_roofs_df, chicago, predicate='within')\n return joined\n\n_curio_output = _curio_node()\n\ntry:\n data_92717c5a_2e65_4ca3_9818_d5f73c89f0a9 = _curio_output\nexcept NameError:\n data_92717c5a_2e65_4ca3_9818_d5f73c89f0a9 = None\n", + "metadata": { + "id": "92717c5a-2e65-4ca3-9818-d5f73c89f0a9", + "language": "python", + "nodeId": "92717c5a-2e65-4ca3-9818-d5f73c89f0a9", + "nodeType": "DATA_LOADING", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"c528f5bc-5d28-4c72-9158-dfb4282c504f\",\n \"type\": \"DATA_POOL\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n\n return data_92717c5a_2e65_4ca3_9818_d5f73c89f0a9\n\n\n_curio_output = _curio_node()\n\ntry:\n pool_c528f5bc_5d28_4c72_9158_dfb4282c504f = _curio_output\nexcept NameError:\n pool_c528f5bc_5d28_4c72_9158_dfb4282c504f = None\n", + "metadata": { + "id": "c528f5bc-5d28-4c72-9158-dfb4282c504f", + "language": "python", + "nodeId": "c528f5bc-5d28-4c72-9158-dfb4282c504f", + "nodeType": "DATA_POOL", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"5c11ed5f-c993-4940-89d0-08d186e903f9\",\n \"type\": \"VIS_VEGA\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n\n input_data = pool_c528f5bc_5d28_4c72_9158_dfb4282c504f\n\n spec = {\n \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.json\",\n \"description\": \"Dot Density Map of Green Roof Locations in Chicago with Zoom & Pan\",\n \"width\": 500,\n \"height\": 600,\n \"title\": \"Green Roof Locations in Chicago\",\n \"mark\": \"circle\",\n \"selection\": {\n \"grid\": {\n \"type\": \"interval\",\n \"bind\": \"scales\"\n }\n },\n \"encoding\": {\n \"x\": {\n \"field\": \"LONGITUDE\",\n \"type\": \"quantitative\",\n \"scale\": { \"domain\": [-88.0, -87.5] },\n \"axis\": { \"title\": \"Longitude\" }\n },\n \"y\": {\n \"field\": \"LATITUDE\",\n \"type\": \"quantitative\",\n \"scale\": { \"domain\": [41.6, 42.1] },\n \"axis\": { \"title\": \"Latitude\" }\n },\n \"size\": {\n \"field\": \"VEGETATED_SQFT\",\n \"type\": \"quantitative\",\n \"legend\": { \"title\": \"Vegetated Sqft\" }\n },\n \"tooltip\": [\n { \"field\": \"VEGETATED_SQFT\", \"type\": \"quantitative\" },\n { \"field\": \"TOTAL_ROOF_SQFT\", \"type\": \"quantitative\" },\n { \"field\": \"zip\", \"type\": \"nominal\" }\n ]\n },\n \"config\": {\n \"view\": { \"stroke\": \"transparent\" }\n }\n }\n\n values = input_data\n if hasattr(input_data, \"to_dict\"):\n values = input_data.to_dict(orient=\"records\")\n\n if isinstance(spec, dict):\n spec[\"data\"] = {\"values\": values}\n\n from IPython.display import display\n display({\"application/vnd.vegalite.v5+json\": spec, \"text/plain\": spec}, raw=True)\n\n return input_data\n\n\n_curio_output = _curio_node()\n\ntry:\n result_5c11ed5f_c993_4940_89d0_08d186e903f9 = _curio_output\nexcept NameError:\n result_5c11ed5f_c993_4940_89d0_08d186e903f9 = None\n", + "metadata": { + "id": "5c11ed5f-c993-4940-89d0-08d186e903f9", + "language": "python", + "nodeId": "5c11ed5f-c993-4940-89d0-08d186e903f9", + "nodeType": "VIS_VEGA", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"a9995883-f2ea-4b42-b74d-59ebc727afc6\",\n \"type\": \"DATA_TRANSFORMATION\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n input_0 = pool_c528f5bc_5d28_4c72_9158_dfb4282c504f\n arg = input_0\n\n import geopandas as gpd\n\n joined = arg\n\n # filter out the top 10 zip codes from the joined dataframe on 'zip' by square feet\n top_10_largest = joined.groupby('zip')['VEGETATED_SQFT'].sum().reset_index().sort_values(by='VEGETATED_SQFT', ascending=False).head(10)\n\n return top_10_largest\n\n\n_curio_output = _curio_node()\n\ntry:\n result_a9995883_f2ea_4b42_b74d_59ebc727afc6 = _curio_output\nexcept NameError:\n result_a9995883_f2ea_4b42_b74d_59ebc727afc6 = None\n", + "metadata": { + "id": "a9995883-f2ea-4b42-b74d-59ebc727afc6", + "language": "python", + "nodeId": "a9995883-f2ea-4b42-b74d-59ebc727afc6", + "nodeType": "DATA_TRANSFORMATION", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"d23e2587-57bf-4db4-84fe-cdb7c2de638d\",\n \"type\": \"VIS_VEGA\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n\n input_data = result_a9995883_f2ea_4b42_b74d_59ebc727afc6\n\n spec = {\n \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.json\",\n \"description\": \"Top 10 largest zip codes by green roof area\",\n \"width\": 400,\n \"height\": 200,\n \"selection\": {\n \"zip_select\": {\n \"type\": \"multi\",\n \"fields\": [\"zip\"],\n \"toggle\": \"event.shiftKey\"\n }\n },\n \"mark\": { \"type\": \"bar\", \"stroke\": \"black\", \"color\": \"green\" },\n \"encoding\": {\n \"x\": {\n \"field\": \"VEGETATED_SQFT\",\n \"type\": \"quantitative\",\n \"axis\": {\n \"title\": \"Total Vegetated Roof Size (sqft)\",\n \"values\": [100000, 250000, 500000, 1000000],\n \"format\": \",d\"\n }\n },\n \"y\": {\n \"field\": \"zip\",\n \"type\": \"nominal\",\n \"sort\": \"-x\",\n \"axis\": { \"title\": \"Zip Code\" }\n },\n \"color\": {\n \"field\": \"zip\",\n \"type\": \"nominal\",\n \"scale\": { \"scheme\": \"category20\" }\n },\n \"opacity\": {\n \"condition\": { \"selection\": \"zip_select\", \"value\": 1 },\n \"value\": 0.3\n },\n \"tooltip\": [\n { \"field\": \"zip\", \"type\": \"nominal\" },\n { \"field\": \"VEGETATED_SQFT\", \"type\": \"quantitative\" }\n ]\n }\n }\n\n values = input_data\n if hasattr(input_data, \"to_dict\"):\n values = input_data.to_dict(orient=\"records\")\n\n if isinstance(spec, dict):\n spec[\"data\"] = {\"values\": values}\n\n from IPython.display import display\n display({\"application/vnd.vegalite.v5+json\": spec, \"text/plain\": spec}, raw=True)\n\n return input_data\n\n\n_curio_output = _curio_node()\n\ntry:\n result_d23e2587_57bf_4db4_84fe_cdb7c2de638d = _curio_output\nexcept NameError:\n result_d23e2587_57bf_4db4_84fe_cdb7c2de638d = None\n", + "metadata": { + "id": "d23e2587-57bf-4db4-84fe-cdb7c2de638d", + "language": "python", + "nodeId": "d23e2587-57bf-4db4-84fe-cdb7c2de638d", + "nodeType": "VIS_VEGA", + "in": "DEFAULT", + "out": "DEFAULT" + } + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/docs/examples/notebooks/example11-original-notebook-W.ipynb b/docs/examples/notebooks/example11-original-notebook-W.ipynb new file mode 100644 index 00000000..840c1203 --- /dev/null +++ b/docs/examples/notebooks/example11-original-notebook-W.ipynb @@ -0,0 +1,340 @@ +{ + "cells": [ + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"7d41f188-ec81-4642-ba73-eeb46ae8ebe2\",\n \"type\": \"DATA_LOADING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n import pandas as pd\n\n # Load the CSV directly\n df = pd.read_csv(\"data/Energy_Usage_5000.csv\")\n\n # Select relevant columns and clean missing values\n grouped_data = df[[\"BUILDING TYPE\", \"TOTAL KWH\", \"TOTAL THERMS\"]].dropna()\n\n # Return cleaned DataFrame\n return grouped_data\n\n_curio_output = _curio_node()\n\ntry:\n data_7d41f188_ec81_4642_ba73_eeb46ae8ebe2 = _curio_output\nexcept NameError:\n data_7d41f188_ec81_4642_ba73_eeb46ae8ebe2 = None\n", + "metadata": { + "id": "7d41f188-ec81-4642-ba73-eeb46ae8ebe2", + "language": "python", + "nodeId": "7d41f188-ec81-4642-ba73-eeb46ae8ebe2", + "nodeType": "DATA_LOADING", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"9eafa9de-726a-404b-8c97-d3d5ec94e51d\",\n \"type\": \"DATA_CLEANING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n input_0 = data_7d41f188_ec81_4642_ba73_eeb46ae8ebe2\n arg = input_0\n\n def remove_outliers(df, column):\n Q1 = df[column].quantile(0.25)\n Q3 = df[column].quantile(0.75)\n IQR = Q3 - Q1\n return df[(df[column] >= Q1 - 1.5 * IQR) & (df[column] <= Q3 + 1.5 * IQR)]\n\n def clean(df):\n # Drop only if columns exist\n required_cols = ['CENSUS BLOCK', 'BUILDING TYPE', 'BUILDING_SUBTYPE']\n drop_cols = [col for col in required_cols if col in df.columns]\n\n df_cleaned = df.dropna(subset=drop_cols).copy()\n\n # Standard KWH/THERM fill\n kwh_columns = [col for col in df.columns if 'KWH' in col and '2010' in col and 'SQFT' not in col]\n therm_columns = [col for col in df.columns if 'THERM' in col and '2010' in col and 'SQFT' not in col]\n df_cleaned[kwh_columns] = df_cleaned[kwh_columns].fillna(df_cleaned[kwh_columns].median())\n df_cleaned[therm_columns] = df_cleaned[therm_columns].fillna(df_cleaned[therm_columns].median())\n\n for col in [\n 'TOTAL KWH', 'TOTAL THERMS',\n 'OCCUPIED UNITS PERCENTAGE', 'OCCUPIED UNITS',\n 'RENTER-OCCUPIED HOUSING UNITS'\n ]:\n if col in df_cleaned.columns:\n df_cleaned[col] = df_cleaned[col].fillna(df_cleaned[col].median())\n\n df_cleaned['ELECTRICITY ACCOUNTS'] = pd.to_numeric(df_cleaned.get('ELECTRICITY ACCOUNTS'), errors='coerce')\n df_cleaned['GAS ACCOUNTS'] = pd.to_numeric(df_cleaned.get('GAS ACCOUNTS'), errors='coerce')\n df_cleaned['ELECTRICITY'] = df_cleaned['TOTAL KWH']\n df_cleaned['GAS'] = df_cleaned['TOTAL THERMS']\n\n df_cleaned = df_cleaned.loc[:, df_cleaned.isnull().mean() < 0.2]\n\n if 'TERM APRIL 2010' in df.columns:\n df.rename(columns={'TERM APRIL 2010': 'THERM APRIL 2010'}, inplace=True)\n\n # Standardize community names\n if 'COMMUNITY AREA NAME' in df_cleaned.columns:\n df_cleaned['COMMUNITY AREA NAME'] = df_cleaned['COMMUNITY AREA NAME'].str.strip().str.upper()\n df_cleaned['COMMUNITY AREA NAME'] = df_cleaned['COMMUNITY AREA NAME'].replace({\n \"LAKEVIEW\": \"LAKE VIEW\",\n \"O'HARE\": \"OHARE\"\n })\n\n # Ensure total columns are present\n if 'TOTAL KWH' in df_cleaned.columns:\n df_cleaned['TOTAL KWH'] = df_cleaned['TOTAL KWH'].fillna(df_cleaned['TOTAL KWH'].median())\n if 'TOTAL THERMS' in df_cleaned.columns:\n df_cleaned['TOTAL THERMS'] = df_cleaned['TOTAL THERMS'].fillna(df_cleaned['TOTAL THERMS'].median())\n\n if 'AVERAGE BUILDING AGE' in df_cleaned.columns:\n df_cleaned['DECADE BUILT'] = (2010 - df_cleaned['AVERAGE BUILDING AGE']) // 10 * 10\n\n df_cleaned = remove_outliers(df_cleaned, 'TOTAL KWH')\n df_cleaned = remove_outliers(df_cleaned, 'TOTAL THERMS')\n\n\n\n return df_cleaned\n\n\n # Run cleaning and return\n return clean(arg)\n\n\n_curio_output = _curio_node()\n\ntry:\n result_9eafa9de_726a_404b_8c97_d3d5ec94e51d = _curio_output\nexcept NameError:\n result_9eafa9de_726a_404b_8c97_d3d5ec94e51d = None\n", + "metadata": { + "id": "9eafa9de-726a-404b-8c97-d3d5ec94e51d", + "language": "python", + "nodeId": "9eafa9de-726a-404b-8c97-d3d5ec94e51d", + "nodeType": "DATA_CLEANING", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"fc8f5cba-2ecd-4702-b19f-d471b49104c8\",\n \"type\": \"DATA_TRANSFORMATION\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n input_0 = result_9eafa9de_726a_404b_8c97_d3d5ec94e51d\n arg = input_0\n\n # Assume `arg` is the cleaned DataFrame from the previous card\n import pandas as pd\n\n energy_long = pd.melt(\n arg,\n id_vars='BUILDING TYPE',\n value_vars=['TOTAL KWH', 'TOTAL THERMS'],\n var_name='ENERGY TYPE',\n value_name='VALUE'\n )\n\n total_by_type = energy_long.groupby('BUILDING TYPE')['VALUE'].transform('sum')\n energy_long['PERCENTAGE'] = (energy_long['VALUE'] / total_by_type) * 100\n\n return energy_long\n\n\n_curio_output = _curio_node()\n\ntry:\n result_fc8f5cba_2ecd_4702_b19f_d471b49104c8 = _curio_output\nexcept NameError:\n result_fc8f5cba_2ecd_4702_b19f_d471b49104c8 = None\n", + "metadata": { + "id": "fc8f5cba-2ecd-4702-b19f-d471b49104c8", + "language": "python", + "nodeId": "fc8f5cba-2ecd-4702-b19f-d471b49104c8", + "nodeType": "DATA_TRANSFORMATION", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"53e8c833-202d-40d2-8ccf-d0b304566593\",\n \"type\": \"VIS_VEGA\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n\n input_data = result_fc8f5cba_2ecd_4702_b19f_d471b49104c8\n\n spec = {\n \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.json\",\n \"data\": { \"name\": \"energy_transformed_1\" },\n \"mark\": \"rect\",\n \"encoding\": {\n \"x\": { \"field\": \"BUILDING TYPE\", \"type\": \"nominal\" },\n \"y\": { \"field\": \"ENERGY TYPE\", \"type\": \"nominal\" },\n \"color\": {\n \"field\": \"VALUE\",\n \"type\": \"quantitative\",\n \"scale\": { \"scheme\": \"viridis\" }\n },\n \"tooltip\": [\n { \"field\": \"BUILDING TYPE\" },\n { \"field\": \"ENERGY TYPE\" },\n { \"field\": \"VALUE\", \"format\": \".2f\" },\n { \"field\": \"PERCENTAGE\", \"format\": \".1f\" }\n ]\n },\n \"title\": \"Energy Consumption Heatmap (KWH + THERMS)\"\n }\n\n values = input_data\n if hasattr(input_data, \"to_dict\"):\n values = input_data.to_dict(orient=\"records\")\n\n if isinstance(spec, dict):\n spec[\"data\"] = {\"values\": values}\n\n from IPython.display import display\n display({\"application/vnd.vegalite.v5+json\": spec, \"text/plain\": spec}, raw=True)\n\n return input_data\n\n\n_curio_output = _curio_node()\n\ntry:\n result_53e8c833_202d_40d2_8ccf_d0b304566593 = _curio_output\nexcept NameError:\n result_53e8c833_202d_40d2_8ccf_d0b304566593 = None\n", + "metadata": { + "id": "53e8c833-202d-40d2-8ccf-d0b304566593", + "language": "python", + "nodeId": "53e8c833-202d-40d2-8ccf-d0b304566593", + "nodeType": "VIS_VEGA", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"22d68832-0b7b-4a7c-8f11-989d4780f56f\",\n \"type\": \"VIS_VEGA\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n\n input_data = result_fc8f5cba_2ecd_4702_b19f_d471b49104c8\n\n spec = {\n \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.json\",\n \"data\": { \"name\": \"energy_transformed_1\" },\n \"mark\": \"circle\",\n \"encoding\": {\n \"x\": {\n \"field\": \"BUILDING TYPE\",\n \"type\": \"nominal\",\n \"axis\": { \"labelAngle\": -45 }\n },\n \"y\": { \"field\": \"VALUE\", \"type\": \"quantitative\" },\n \"color\": { \"field\": \"ENERGY TYPE\", \"type\": \"nominal\" },\n \"size\": { \"field\": \"VALUE\", \"type\": \"quantitative\" },\n \"tooltip\": [\n { \"field\": \"BUILDING TYPE\" },\n { \"field\": \"ENERGY TYPE\" },\n { \"field\": \"VALUE\", \"format\": \".2f\" }\n ]\n },\n \"title\": \"Dot Plot of Energy Usage by Building Type\"\n }\n\n values = input_data\n if hasattr(input_data, \"to_dict\"):\n values = input_data.to_dict(orient=\"records\")\n\n if isinstance(spec, dict):\n spec[\"data\"] = {\"values\": values}\n\n from IPython.display import display\n display({\"application/vnd.vegalite.v5+json\": spec, \"text/plain\": spec}, raw=True)\n\n return input_data\n\n\n_curio_output = _curio_node()\n\ntry:\n result_22d68832_0b7b_4a7c_8f11_989d4780f56f = _curio_output\nexcept NameError:\n result_22d68832_0b7b_4a7c_8f11_989d4780f56f = None\n", + "metadata": { + "id": "22d68832-0b7b-4a7c-8f11-989d4780f56f", + "language": "python", + "nodeId": "22d68832-0b7b-4a7c-8f11-989d4780f56f", + "nodeType": "VIS_VEGA", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"7a1c0e5b-3c39-4727-a616-f4ca97fdbb44\",\n \"type\": \"DATA_TRANSFORMATION\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n input_0 = result_9eafa9de_726a_404b_8c97_d3d5ec94e51d\n arg = input_0\n\n # Group by building type and compute average gas usage\n df_avg_gas = arg.groupby(\"BUILDING TYPE\")[\"TOTAL THERMS\"].mean().reset_index()\n df_avg_gas.rename(columns={\"TOTAL THERMS\": \"AVG TOTAL THERMS\"}, inplace=True)\n\n return df_avg_gas\n\n\n_curio_output = _curio_node()\n\ntry:\n result_7a1c0e5b_3c39_4727_a616_f4ca97fdbb44 = _curio_output\nexcept NameError:\n result_7a1c0e5b_3c39_4727_a616_f4ca97fdbb44 = None\n", + "metadata": { + "id": "7a1c0e5b-3c39-4727-a616-f4ca97fdbb44", + "language": "python", + "nodeId": "7a1c0e5b-3c39-4727-a616-f4ca97fdbb44", + "nodeType": "DATA_TRANSFORMATION", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"ce74522c-d689-4cf9-bb41-e4f6a24882d4\",\n \"type\": \"VIS_VEGA\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n\n input_data = result_7a1c0e5b_3c39_4727_a616_f4ca97fdbb44\n\n spec = {\n \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.json\",\n \"data\": { \"name\": \"avg_gas_by_building\" },\n \"mark\": \"bar\",\n \"encoding\": {\n \"x\": {\n \"field\": \"BUILDING TYPE\",\n \"type\": \"nominal\",\n \"axis\": { \"labelAngle\": -45 }\n },\n \"y\": { \"field\": \"AVG TOTAL THERMS\", \"type\": \"quantitative\" },\n \"tooltip\": [\n { \"field\": \"BUILDING TYPE\" },\n { \"field\": \"AVG TOTAL THERMS\", \"format\": \".2f\" }\n ],\n \"color\": { \"field\": \"BUILDING TYPE\", \"type\": \"nominal\" }\n }\n }\n\n values = input_data\n if hasattr(input_data, \"to_dict\"):\n values = input_data.to_dict(orient=\"records\")\n\n if isinstance(spec, dict):\n spec[\"data\"] = {\"values\": values}\n\n from IPython.display import display\n display({\"application/vnd.vegalite.v5+json\": spec, \"text/plain\": spec}, raw=True)\n\n return input_data\n\n\n_curio_output = _curio_node()\n\ntry:\n result_ce74522c_d689_4cf9_bb41_e4f6a24882d4 = _curio_output\nexcept NameError:\n result_ce74522c_d689_4cf9_bb41_e4f6a24882d4 = None\n", + "metadata": { + "id": "ce74522c-d689-4cf9-bb41-e4f6a24882d4", + "language": "python", + "nodeId": "ce74522c-d689-4cf9-bb41-e4f6a24882d4", + "nodeType": "VIS_VEGA", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"15aa8924-3c5a-42a3-a89e-456f675c469a\",\n \"type\": \"DATA_LOADING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n import pandas as pd\n\n df = pd.read_csv(\"data/Energy_Usage_5000.csv\")\n\n # Standardize column names right away for consistency\n df.columns = [col.upper().strip() for col in df.columns]\n\n # Just return full dataset for now no filtering yet\n return df\n\n_curio_output = _curio_node()\n\ntry:\n data_15aa8924_3c5a_42a3_a89e_456f675c469a = _curio_output\nexcept NameError:\n data_15aa8924_3c5a_42a3_a89e_456f675c469a = None\n", + "metadata": { + "id": "15aa8924-3c5a-42a3-a89e-456f675c469a", + "language": "python", + "nodeId": "15aa8924-3c5a-42a3-a89e-456f675c469a", + "nodeType": "DATA_LOADING", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"23a15771-4d9a-479f-911f-b6f758850574\",\n \"type\": \"DATA_CLEANING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n input_0 = data_15aa8924_3c5a_42a3_a89e_456f675c469a\n arg = input_0\n\n def remove_outliers(df, column):\n Q1 = df[column].quantile(0.25)\n Q3 = df[column].quantile(0.75)\n IQR = Q3 - Q1\n return df[(df[column] >= Q1 - 1.5 * IQR) & (df[column] <= Q3 + 1.5 * IQR)]\n\n def clean(df):\n # We assume column names are already uppercased by the data loading card\n required_cols = ['COMMUNITY AREA NAME', 'TOTAL KWH', 'TOTAL THERMS', 'BUILDING TYPE']\n df = df.dropna(subset=required_cols).copy()\n\n df['COMMUNITY AREA NAME'] = df['COMMUNITY AREA NAME'].str.strip().str.upper()\n df['TOTAL KWH'] = df['TOTAL KWH'].fillna(df['TOTAL KWH'].median())\n df['TOTAL THERMS'] = df['TOTAL THERMS'].fillna(df['TOTAL THERMS'].median())\n\n df = remove_outliers(df, 'TOTAL KWH')\n df = remove_outliers(df, 'TOTAL THERMS')\n\n return df\n\n return clean(arg)\n\n\n_curio_output = _curio_node()\n\ntry:\n result_23a15771_4d9a_479f_911f_b6f758850574 = _curio_output\nexcept NameError:\n result_23a15771_4d9a_479f_911f_b6f758850574 = None\n", + "metadata": { + "id": "23a15771-4d9a-479f-911f-b6f758850574", + "language": "python", + "nodeId": "23a15771-4d9a-479f-911f-b6f758850574", + "nodeType": "DATA_CLEANING", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"2cd9e67d-4e07-4852-8a55-f6b50cd3658d\",\n \"type\": \"DATA_TRANSFORMATION\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n input_0 = result_23a15771_4d9a_479f_911f_b6f758850574\n arg = input_0\n\n df_avg = arg[[\"COMMUNITY AREA NAME\", \"TOTAL KWH\", \"TOTAL THERMS\"]].dropna()\n\n agg_df = df_avg.groupby(\"COMMUNITY AREA NAME\").agg({\n \"TOTAL KWH\": \"mean\",\n \"TOTAL THERMS\": \"mean\"\n }).reset_index()\n\n agg_df[\"AVG ENERGY USE\"] = agg_df[\"TOTAL KWH\"] + agg_df[\"TOTAL THERMS\"]\n\n top10 = agg_df.sort_values(\"AVG ENERGY USE\", ascending=False).head(10)\n\n return top10\n\n\n_curio_output = _curio_node()\n\ntry:\n result_2cd9e67d_4e07_4852_8a55_f6b50cd3658d = _curio_output\nexcept NameError:\n result_2cd9e67d_4e07_4852_8a55_f6b50cd3658d = None\n", + "metadata": { + "id": "2cd9e67d-4e07-4852-8a55-f6b50cd3658d", + "language": "python", + "nodeId": "2cd9e67d-4e07-4852-8a55-f6b50cd3658d", + "nodeType": "DATA_TRANSFORMATION", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"4e7a7f96-4615-4099-bbb9-b5811b560361\",\n \"type\": \"VIS_VEGA\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n\n input_data = result_2cd9e67d_4e07_4852_8a55_f6b50cd3658d\n\n spec = {\n \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.json\",\n \"data\": { \"name\": \"top10_avg_energy_by_community\" },\n \"mark\": \"bar\",\n \"encoding\": {\n \"x\": {\n \"field\": \"COMMUNITY AREA NAME\",\n \"type\": \"nominal\",\n \"sort\": \"-y\",\n \"axis\": { \"labelAngle\": -45 }\n },\n \"y\": {\n \"field\": \"AVG ENERGY USE\",\n \"type\": \"quantitative\",\n \"title\": \"Avg Energy Use (KWH + THERMS)\"\n },\n \"tooltip\": [\n { \"field\": \"COMMUNITY AREA NAME\" },\n { \"field\": \"AVG ENERGY USE\", \"format\": \".2f\" }\n ],\n \"color\": {\n \"field\": \"AVG ENERGY USE\",\n \"type\": \"quantitative\",\n \"scale\": { \"scheme\": \"blues\" }\n }\n },\n \"title\": \"Top 10 Communities by Avg Energy Consumption\"\n }\n\n values = input_data\n if hasattr(input_data, \"to_dict\"):\n values = input_data.to_dict(orient=\"records\")\n\n if isinstance(spec, dict):\n spec[\"data\"] = {\"values\": values}\n\n from IPython.display import display\n display({\"application/vnd.vegalite.v5+json\": spec, \"text/plain\": spec}, raw=True)\n\n return input_data\n\n\n_curio_output = _curio_node()\n\ntry:\n result_4e7a7f96_4615_4099_bbb9_b5811b560361 = _curio_output\nexcept NameError:\n result_4e7a7f96_4615_4099_bbb9_b5811b560361 = None\n", + "metadata": { + "id": "4e7a7f96-4615-4099-bbb9-b5811b560361", + "language": "python", + "nodeId": "4e7a7f96-4615-4099-bbb9-b5811b560361", + "nodeType": "VIS_VEGA", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"0957871c-b1f3-4c89-84de-3dc8f88e49c7\",\n \"type\": \"DATA_TRANSFORMATION\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n input_0 = result_23a15771_4d9a_479f_911f_b6f758850574\n arg = input_0\n\n df_scatter = arg[[\"TOTAL KWH\", \"TOTAL THERMS\", \"BUILDING TYPE\"]].dropna()\n df_scatter = df_scatter[(df_scatter[\"TOTAL KWH\"] > 0) & (df_scatter[\"TOTAL THERMS\"] > 0)]\n\n return df_scatter\n\n\n_curio_output = _curio_node()\n\ntry:\n result_0957871c_b1f3_4c89_84de_3dc8f88e49c7 = _curio_output\nexcept NameError:\n result_0957871c_b1f3_4c89_84de_3dc8f88e49c7 = None\n", + "metadata": { + "id": "0957871c-b1f3-4c89-84de-3dc8f88e49c7", + "language": "python", + "nodeId": "0957871c-b1f3-4c89-84de-3dc8f88e49c7", + "nodeType": "DATA_TRANSFORMATION", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"2ba732cf-7fa3-4a59-b41f-faa3d707994f\",\n \"type\": \"VIS_VEGA\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n\n input_data = result_0957871c_b1f3_4c89_84de_3dc8f88e49c7\n\n spec = {\n \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.json\",\n \"data\": { \"name\": \"scatter_energy_usage\" },\n \"mark\": \"point\",\n \"encoding\": {\n \"x\": {\n \"field\": \"TOTAL KWH\",\n \"type\": \"quantitative\",\n \"scale\": { \"type\": \"log\" }\n },\n \"y\": {\n \"field\": \"TOTAL THERMS\",\n \"type\": \"quantitative\",\n \"scale\": { \"type\": \"log\" }\n },\n \"color\": { \"field\": \"BUILDING TYPE\", \"type\": \"nominal\" },\n \"tooltip\": [\n { \"field\": \"BUILDING TYPE\" },\n { \"field\": \"TOTAL KWH\" },\n { \"field\": \"TOTAL THERMS\" }\n ]\n },\n \"title\": \"Electricity vs Gas Usage by Building Type (Log Scale)\"\n }\n\n values = input_data\n if hasattr(input_data, \"to_dict\"):\n values = input_data.to_dict(orient=\"records\")\n\n if isinstance(spec, dict):\n spec[\"data\"] = {\"values\": values}\n\n from IPython.display import display\n display({\"application/vnd.vegalite.v5+json\": spec, \"text/plain\": spec}, raw=True)\n\n return input_data\n\n\n_curio_output = _curio_node()\n\ntry:\n result_2ba732cf_7fa3_4a59_b41f_faa3d707994f = _curio_output\nexcept NameError:\n result_2ba732cf_7fa3_4a59_b41f_faa3d707994f = None\n", + "metadata": { + "id": "2ba732cf-7fa3-4a59-b41f-faa3d707994f", + "language": "python", + "nodeId": "2ba732cf-7fa3-4a59-b41f-faa3d707994f", + "nodeType": "VIS_VEGA", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"e54363aa-e9c8-45d2-a5d0-3938185d4445\",\n \"type\": \"DATA_TRANSFORMATION\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n input_0 = result_23a15771_4d9a_479f_911f_b6f758850574\n arg = input_0\n\n df_strip = arg[[\"BUILDING TYPE\", \"TOTAL THERMS\"]].dropna()\n\n # Remove large outliers for visualization clarity\n df_strip = df_strip[df_strip[\"TOTAL THERMS\"] < 500_000]\n\n return df_strip\n\n\n_curio_output = _curio_node()\n\ntry:\n result_e54363aa_e9c8_45d2_a5d0_3938185d4445 = _curio_output\nexcept NameError:\n result_e54363aa_e9c8_45d2_a5d0_3938185d4445 = None\n", + "metadata": { + "id": "e54363aa-e9c8-45d2-a5d0-3938185d4445", + "language": "python", + "nodeId": "e54363aa-e9c8-45d2-a5d0-3938185d4445", + "nodeType": "DATA_TRANSFORMATION", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"0503ce30-db2c-4a6f-833f-459969113302\",\n \"type\": \"VIS_VEGA\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n\n input_data = result_e54363aa_e9c8_45d2_a5d0_3938185d4445\n\n spec = {\n \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.json\",\n \"data\": { \"name\": \"df_strip\" },\n \"mark\": \"tick\",\n \"encoding\": {\n \"x\": { \"field\": \"TOTAL THERMS\", \"type\": \"quantitative\" },\n \"y\": { \"field\": \"BUILDING TYPE\", \"type\": \"nominal\" },\n \"color\": { \"field\": \"BUILDING TYPE\", \"type\": \"nominal\" },\n \"tooltip\": [{ \"field\": \"BUILDING TYPE\" }, { \"field\": \"TOTAL THERMS\" }]\n },\n \"title\": \"Gas Usage Spread by Building Type (Strip Plot)\"\n }\n\n values = input_data\n if hasattr(input_data, \"to_dict\"):\n values = input_data.to_dict(orient=\"records\")\n\n if isinstance(spec, dict):\n spec[\"data\"] = {\"values\": values}\n\n from IPython.display import display\n display({\"application/vnd.vegalite.v5+json\": spec, \"text/plain\": spec}, raw=True)\n\n return input_data\n\n\n_curio_output = _curio_node()\n\ntry:\n result_0503ce30_db2c_4a6f_833f_459969113302 = _curio_output\nexcept NameError:\n result_0503ce30_db2c_4a6f_833f_459969113302 = None\n", + "metadata": { + "id": "0503ce30-db2c-4a6f-833f-459969113302", + "language": "python", + "nodeId": "0503ce30-db2c-4a6f-833f-459969113302", + "nodeType": "VIS_VEGA", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"e62ad390-9e0a-44aa-87f8-644c33974d04\",\n \"type\": \"DATA_LOADING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n import pandas as pd\n df = pd.read_csv(\"/Users/jaideepnutalapati/Documents/GitHub/curio/docs/examples/data/Energy_Usage_5000.csv\")\n return df\n\n_curio_output = _curio_node()\n\ntry:\n data_e62ad390_9e0a_44aa_87f8_644c33974d04 = _curio_output\nexcept NameError:\n data_e62ad390_9e0a_44aa_87f8_644c33974d04 = None\n", + "metadata": { + "id": "e62ad390-9e0a-44aa-87f8-644c33974d04", + "language": "python", + "nodeId": "e62ad390-9e0a-44aa-87f8-644c33974d04", + "nodeType": "DATA_LOADING", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"42ba84c5-edcf-49b5-ab3b-b5658e018f60\",\n \"type\": \"DATA_CLEANING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n input_0 = data_e62ad390_9e0a_44aa_87f8_644c33974d04\n arg = input_0\n\n # Filter for KWH month columns\n month_cols = [col for col in arg.columns if col.startswith(\"KWH \") and \"2010\" in col]\n required_cols = [\"COMMUNITY AREA NAME\"] + month_cols\n\n df = arg[required_cols].dropna()\n\n # Melt to long format\n df_long = pd.melt(\n df,\n id_vars=[\"COMMUNITY AREA NAME\"],\n value_vars=month_cols,\n var_name=\"Month\",\n value_name=\"KWH\"\n )\n\n # Extract month name (e.g., \"JANUARY\")\n df_long[\"Month\"] = df_long[\"Month\"].str.extract(r\"KWH (.+?) 2010\")[0].str.upper()\n df_long = df_long.dropna(subset=[\"Month\", \"KWH\", \"COMMUNITY AREA NAME\"])\n\n return df_long\n\n\n_curio_output = _curio_node()\n\ntry:\n result_42ba84c5_edcf_49b5_ab3b_b5658e018f60 = _curio_output\nexcept NameError:\n result_42ba84c5_edcf_49b5_ab3b_b5658e018f60 = None\n", + "metadata": { + "id": "42ba84c5-edcf-49b5-ab3b-b5658e018f60", + "language": "python", + "nodeId": "42ba84c5-edcf-49b5-ab3b-b5658e018f60", + "nodeType": "DATA_CLEANING", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"e7753795-7c01-4773-a9a0-ffeb648cf9bf\",\n \"type\": \"DATA_TRANSFORMATION\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n input_0 = result_42ba84c5_edcf_49b5_ab3b_b5658e018f60\n arg = input_0\n\n # Get top 20 communities by average KWH\n top_20 = arg.groupby(\"COMMUNITY AREA NAME\")[\"KWH\"].mean().sort_values(ascending=False).head(20).index\n\n # Filter the long-form data\n df_top20 = arg[arg[\"COMMUNITY AREA NAME\"].isin(top_20)].copy()\n return df_top20\n\n\n_curio_output = _curio_node()\n\ntry:\n result_e7753795_7c01_4773_a9a0_ffeb648cf9bf = _curio_output\nexcept NameError:\n result_e7753795_7c01_4773_a9a0_ffeb648cf9bf = None\n", + "metadata": { + "id": "e7753795-7c01-4773-a9a0-ffeb648cf9bf", + "language": "python", + "nodeId": "e7753795-7c01-4773-a9a0-ffeb648cf9bf", + "nodeType": "DATA_TRANSFORMATION", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"d537cc76-27b7-41b4-95dc-7ec65ec1ec42\",\n \"type\": \"VIS_VEGA\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n\n input_data = result_e7753795_7c01_4773_a9a0_ffeb648cf9bf\n\n spec = {\n \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.json\",\n \"params\": [\n {\n \"name\": \"commPick\",\n \"select\": {\n \"type\": \"point\",\n \"fields\": [\"COMMUNITY AREA NAME\"]\n }\n }\n ],\n \"vconcat\": [\n {\n \"title\": \"Click on a Line to Highlight a Community\",\n \"width\": 650,\n \"height\": 400,\n \"mark\": {\n \"type\": \"line\",\n \"interpolate\": \"monotone\"\n },\n \"encoding\": {\n \"x\": {\n \"field\": \"Month\",\n \"type\": \"nominal\",\n \"sort\": [\n \"JANUARY\",\n \"FEBRUARY\",\n \"MARCH\",\n \"APRIL\",\n \"MAY\",\n \"JUNE\",\n \"JULY\",\n \"AUGUST\",\n \"SEPTEMBER\",\n \"OCTOBER\",\n \"NOVEMBER\",\n \"DECEMBER\"\n ],\n \"axis\": { \"labelAngle\": 45 }\n },\n \"y\": {\n \"field\": \"KWH\",\n \"type\": \"quantitative\",\n \"title\": \"Total KWH\",\n \"scale\": { \"zero\": False }\n },\n \"color\": {\n \"field\": \"COMMUNITY AREA NAME\",\n \"type\": \"nominal\",\n \"scale\": { \"scheme\": \"category20\" },\n \"legend\": { \"columns\": 2 }\n },\n \"opacity\": {\n \"condition\": { \"param\": \"commPick\", \"value\": 1 },\n \"value\": 0.2\n },\n \"tooltip\": [\n { \"field\": \"COMMUNITY AREA NAME\", \"title\": \"Community\" },\n { \"field\": \"Month\" },\n { \"field\": \"KWH\", \"format\": \",.0f\" }\n ]\n }\n },\n {\n \"title\": \"Average KWH of Selected Community\",\n \"width\": 650,\n \"height\": 300,\n \"mark\": \"bar\",\n \"encoding\": {\n \"y\": {\n \"field\": \"COMMUNITY AREA NAME\",\n \"type\": \"nominal\",\n \"sort\": \"-x\"\n },\n \"x\": {\n \"aggregate\": \"mean\",\n \"field\": \"KWH\",\n \"type\": \"quantitative\",\n \"title\": \"Avg KWH\"\n },\n \"color\": {\n \"field\": \"COMMUNITY AREA NAME\",\n \"type\": \"nominal\"\n }\n },\n \"transform\": [{ \"filter\": { \"param\": \"commPick\" } }]\n }\n ]\n }\n\n values = input_data\n if hasattr(input_data, \"to_dict\"):\n values = input_data.to_dict(orient=\"records\")\n\n if isinstance(spec, dict):\n spec[\"data\"] = {\"values\": values}\n\n from IPython.display import display\n display({\"application/vnd.vegalite.v5+json\": spec, \"text/plain\": spec}, raw=True)\n\n return input_data\n\n\n_curio_output = _curio_node()\n\ntry:\n result_d537cc76_27b7_41b4_95dc_7ec65ec1ec42 = _curio_output\nexcept NameError:\n result_d537cc76_27b7_41b4_95dc_7ec65ec1ec42 = None\n", + "metadata": { + "id": "d537cc76-27b7-41b4-95dc-7ec65ec1ec42", + "language": "python", + "nodeId": "d537cc76-27b7-41b4-95dc-7ec65ec1ec42", + "nodeType": "VIS_VEGA", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"e3b53a07-079f-402c-82c5-69d30fe06b24\",\n \"type\": \"DATA_LOADING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n import pandas as pd\n\n df = pd.read_csv(\"/Users/jaideepnutalapati/Documents/GitHub/curio/docs/examples/data/Energy_Usage_5000.csv\")\n\n month_cols = [col for col in df.columns if col.startswith(\"KWH \") and \"2010\" in col]\n required_cols = [\"COMMUNITY AREA NAME\"] + month_cols\n\n df = df[required_cols].dropna()\n return df\n\n_curio_output = _curio_node()\n\ntry:\n data_e3b53a07_079f_402c_82c5_69d30fe06b24 = _curio_output\nexcept NameError:\n data_e3b53a07_079f_402c_82c5_69d30fe06b24 = None\n", + "metadata": { + "id": "e3b53a07-079f-402c-82c5-69d30fe06b24", + "language": "python", + "nodeId": "e3b53a07-079f-402c-82c5-69d30fe06b24", + "nodeType": "DATA_LOADING", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"03b3d67d-9bfd-466d-89c3-a616f6951f7d\",\n \"type\": \"DATA_CLEANING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n input_0 = data_e3b53a07_079f_402c_82c5_69d30fe06b24\n arg = input_0\n\n df_long = pd.melt(\n arg,\n id_vars=[\"COMMUNITY AREA NAME\"],\n value_vars=[col for col in arg.columns if \"KWH\" in col],\n var_name=\"Month\",\n value_name=\"KWH\"\n )\n\n df_long[\"Month\"] = df_long[\"Month\"].str.extract(r\"KWH (.+?) 2010\")[0].str.upper()\n df_long = df_long.dropna(subset=[\"Month\", \"KWH\", \"COMMUNITY AREA NAME\"])\n\n return df_long\n\n\n_curio_output = _curio_node()\n\ntry:\n result_03b3d67d_9bfd_466d_89c3_a616f6951f7d = _curio_output\nexcept NameError:\n result_03b3d67d_9bfd_466d_89c3_a616f6951f7d = None\n", + "metadata": { + "id": "03b3d67d-9bfd-466d-89c3-a616f6951f7d", + "language": "python", + "nodeId": "03b3d67d-9bfd-466d-89c3-a616f6951f7d", + "nodeType": "DATA_CLEANING", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"2deacd81-afd9-4521-bd35-636b30e7c755\",\n \"type\": \"DATA_TRANSFORMATION\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n input_0 = result_03b3d67d_9bfd_466d_89c3_a616f6951f7d\n arg = input_0\n\n top20_names = arg.groupby(\"COMMUNITY AREA NAME\")[\"KWH\"].mean().sort_values(ascending=False).head(20).index\n df_top20 = arg[arg[\"COMMUNITY AREA NAME\"].isin(top20_names)].copy()\n\n return df_top20\n\n\n_curio_output = _curio_node()\n\ntry:\n result_2deacd81_afd9_4521_bd35_636b30e7c755 = _curio_output\nexcept NameError:\n result_2deacd81_afd9_4521_bd35_636b30e7c755 = None\n", + "metadata": { + "id": "2deacd81-afd9-4521-bd35-636b30e7c755", + "language": "python", + "nodeId": "2deacd81-afd9-4521-bd35-636b30e7c755", + "nodeType": "DATA_TRANSFORMATION", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"d7ba337c-dd7b-49f8-970b-e3114585c58b\",\n \"type\": \"VIS_VEGA\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n\n input_data = result_2deacd81_afd9_4521_bd35_636b30e7c755\n\n spec = {\n \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.json\",\n \"vconcat\": [\n {\n \"title\": \"Monthly Average Energy Usage (Brush to Select Months)\",\n \"params\": [\n {\n \"name\": \"monthBrush\",\n \"select\": {\n \"type\": \"interval\",\n \"encodings\": [\"x\"]\n }\n }\n ],\n \"mark\": \"bar\",\n \"encoding\": {\n \"x\": {\n \"field\": \"Month\",\n \"type\": \"ordinal\",\n \"scale\": {\n \"domain\": [\n \"JANUARY\",\n \"FEBRUARY\",\n \"MARCH\",\n \"APRIL\",\n \"MAY\",\n \"JUNE\",\n \"JULY\",\n \"AUGUST\",\n \"SEPTEMBER\",\n \"OCTOBER\",\n \"NOVEMBER\",\n \"DECEMBER\"\n ]\n },\n \"axis\": {\n \"labelAngle\": -40,\n \"labelFontSize\": 11\n }\n },\n \"y\": {\n \"aggregate\": \"mean\",\n \"field\": \"KWH\",\n \"type\": \"quantitative\",\n \"title\": \"Avg KWH\"\n },\n \"tooltip\": [\n { \"field\": \"Month\" },\n { \"aggregate\": \"mean\", \"field\": \"KWH\", \"title\": \"Avg KWH\" }\n ],\n \"color\": {\n \"value\": \"#4C78A8\"\n }\n }\n },\n {\n \"title\": \"Avg KWH by Community (Filtered by Selected Months)\",\n \"transform\": [\n {\n \"filter\": { \"param\": \"monthBrush\" }\n }\n ],\n \"mark\": \"bar\",\n \"encoding\": {\n \"y\": {\n \"field\": \"COMMUNITY AREA NAME\",\n \"type\": \"nominal\",\n \"sort\": \"-x\"\n },\n \"x\": {\n \"aggregate\": \"mean\",\n \"field\": \"KWH\",\n \"type\": \"quantitative\",\n \"title\": \"Avg KWH\"\n },\n \"color\": {\n \"field\": \"COMMUNITY AREA NAME\",\n \"type\": \"nominal\"\n },\n \"tooltip\": [\n { \"field\": \"COMMUNITY AREA NAME\" },\n { \"aggregate\": \"mean\", \"field\": \"KWH\", \"title\": \"Avg KWH\" }\n ]\n }\n }\n ]\n }\n\n values = input_data\n if hasattr(input_data, \"to_dict\"):\n values = input_data.to_dict(orient=\"records\")\n\n if isinstance(spec, dict):\n spec[\"data\"] = {\"values\": values}\n\n from IPython.display import display\n display({\"application/vnd.vegalite.v5+json\": spec, \"text/plain\": spec}, raw=True)\n\n return input_data\n\n\n_curio_output = _curio_node()\n\ntry:\n result_d7ba337c_dd7b_49f8_970b_e3114585c58b = _curio_output\nexcept NameError:\n result_d7ba337c_dd7b_49f8_970b_e3114585c58b = None\n", + "metadata": { + "id": "d7ba337c-dd7b-49f8-970b-e3114585c58b", + "language": "python", + "nodeId": "d7ba337c-dd7b-49f8-970b-e3114585c58b", + "nodeType": "VIS_VEGA", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"f5b36cc1-63de-4c10-aca9-c28dd2f3ba3a\",\n \"type\": \"DATA_LOADING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n import pandas as pd\n\n df = pd.read_csv(\"/Users/jaideepnutalapati/Documents/GitHub/curio/docs/examples/data/Energy_Usage_5000.csv\")\n\n columns_needed = [\"AVERAGE STORIES\", \"AVERAGE BUILDING AGE\", \"TOTAL KWH\"] + [col for col in df.columns if col.startswith(\"KWH \") and \"2010\" in col]\n\n df = df[columns_needed].dropna()\n return df\n\n_curio_output = _curio_node()\n\ntry:\n data_f5b36cc1_63de_4c10_aca9_c28dd2f3ba3a = _curio_output\nexcept NameError:\n data_f5b36cc1_63de_4c10_aca9_c28dd2f3ba3a = None\n", + "metadata": { + "id": "f5b36cc1-63de-4c10-aca9-c28dd2f3ba3a", + "language": "python", + "nodeId": "f5b36cc1-63de-4c10-aca9-c28dd2f3ba3a", + "nodeType": "DATA_LOADING", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"5c3fa75a-a919-432b-9e95-83e6f1691c8d\",\n \"type\": \"DATA_CLEANING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n input_0 = data_f5b36cc1_63de_4c10_aca9_c28dd2f3ba3a\n arg = input_0\n\n def story_bracket(stories):\n if stories <= 1:\n return \"1 story\"\n elif stories == 2:\n return \"2 stories\"\n elif 3 <= stories <= 5:\n return \"3-5 stories\"\n elif 6 <= stories <= 10:\n return \"6-10 stories\"\n else:\n return \"11+ stories\"\n\n def age_bracket(age):\n if age <= 20:\n return \"0-20 yrs\"\n elif age <= 40:\n return \"21-40 yrs\"\n elif age <= 60:\n return \"41-60 yrs\"\n elif age <= 80:\n return \"61-80 yrs\"\n else:\n return \"81+ yrs\"\n\n df = arg.copy()\n\n df[\"STORY BRACKET\"] = df[\"AVERAGE STORIES\"].apply(story_bracket)\n df[\"AGE BRACKET\"] = df[\"AVERAGE BUILDING AGE\"].apply(age_bracket)\n return df\n\n\n_curio_output = _curio_node()\n\ntry:\n result_5c3fa75a_a919_432b_9e95_83e6f1691c8d = _curio_output\nexcept NameError:\n result_5c3fa75a_a919_432b_9e95_83e6f1691c8d = None\n", + "metadata": { + "id": "5c3fa75a-a919-432b-9e95-83e6f1691c8d", + "language": "python", + "nodeId": "5c3fa75a-a919-432b-9e95-83e6f1691c8d", + "nodeType": "DATA_CLEANING", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"ffa68346-1c37-48f0-9284-7d34a397692f\",\n \"type\": \"DATA_TRANSFORMATION\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n input_0 = result_5c3fa75a_a919_432b_9e95_83e6f1691c8d\n arg = input_0\n\n import pandas as pd\n\n df_long = pd.melt(\n arg,\n id_vars=[\"STORY BRACKET\", \"AGE BRACKET\", \"TOTAL KWH\"],\n value_vars=[col for col in arg.columns if col.startswith(\"KWH \")],\n var_name=\"Month\",\n value_name=\"KWH\"\n )\n\n df_long[\"Month\"] = df_long[\"Month\"].str.extract(r\"KWH (.+?) 2010\")[0].str.upper()\n df_long = df_long.dropna(subset=[\"Month\", \"KWH\", \"STORY BRACKET\", \"AGE BRACKET\"])\n\n return df_long\n\n\n_curio_output = _curio_node()\n\ntry:\n result_ffa68346_1c37_48f0_9284_7d34a397692f = _curio_output\nexcept NameError:\n result_ffa68346_1c37_48f0_9284_7d34a397692f = None\n", + "metadata": { + "id": "ffa68346-1c37-48f0-9284-7d34a397692f", + "language": "python", + "nodeId": "ffa68346-1c37-48f0-9284-7d34a397692f", + "nodeType": "DATA_TRANSFORMATION", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"82ef1d71-15c8-481c-a61b-eb172822f7a6\",\n \"type\": \"VIS_VEGA\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n\n input_data = result_ffa68346_1c37_48f0_9284_7d34a397692f\n\n spec = {\n \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.json\",\n \"params\": [\n {\n \"name\": \"storySelect\",\n \"bind\": {\n \"input\": \"select\",\n \"options\": [\n \"1 story\",\n \"2 stories\",\n \"3-5 stories\",\n \"6-10 stories\",\n \"11+ stories\"\n ]\n },\n \"value\": \"1 story\"\n }\n ],\n \"vconcat\": [\n {\n \"width\": 600,\n \"title\": {\n \"text\": \"Distribution of Total KWH by Age (Box Plot)\",\n \"align\": \"center\"\n },\n \"transform\": [{ \"filter\": \"datum['STORY BRACKET'] == storySelect\" }],\n \"mark\": \"boxplot\",\n \"encoding\": {\n \"x\": {\n \"field\": \"AGE BRACKET\",\n \"type\": \"nominal\",\n \"sort\": [\"0-20 yrs\", \"21-40 yrs\", \"41-60 yrs\", \"61-80 yrs\", \"81+ yrs\"]\n },\n \"y\": {\n \"field\": \"TOTAL KWH\",\n \"type\": \"quantitative\",\n \"title\": \"Total KWH\"\n },\n \"color\": {\n \"field\": \"AGE BRACKET\",\n \"type\": \"nominal\",\n \"legend\": {\n \"orient\": \"right\",\n \"anchor\": \"middle\",\n \"direction\": \"vertical\"\n }\n },\n \"tooltip\": [{ \"field\": \"AGE BRACKET\" }, { \"field\": \"TOTAL KWH\" }]\n }\n },\n {\n \"width\": 600,\n \"title\": {\n \"text\": \"Monthly Avg KWH Trend by Age (for Selected Stories)\",\n \"align\": \"center\"\n },\n \"transform\": [{ \"filter\": \"datum['STORY BRACKET'] == storySelect\" }],\n \"mark\": { \"type\": \"line\", \"point\": True },\n \"encoding\": {\n \"x\": {\n \"field\": \"Month\",\n \"type\": \"ordinal\",\n \"sort\": [\n \"JANUARY\",\n \"FEBRUARY\",\n \"MARCH\",\n \"APRIL\",\n \"MAY\",\n \"JUNE\",\n \"JULY\",\n \"AUGUST\",\n \"SEPTEMBER\",\n \"OCTOBER\",\n \"NOVEMBER\",\n \"DECEMBER\"\n ]\n },\n \"y\": {\n \"aggregate\": \"mean\",\n \"field\": \"KWH\",\n \"type\": \"quantitative\",\n \"title\": \"Avg Monthly KWH\"\n },\n \"color\": {\n \"field\": \"AGE BRACKET\",\n \"type\": \"nominal\",\n \"legend\": {\n \"orient\": \"right\",\n \"anchor\": \"middle\",\n \"direction\": \"vertical\"\n }\n },\n \"tooltip\": [\n { \"field\": \"Month\" },\n { \"aggregate\": \"mean\", \"field\": \"KWH\" },\n { \"field\": \"AGE BRACKET\" }\n ]\n }\n }\n ],\n \"config\": {\n \"concat\": { \"align\": \"center\" }\n }\n }\n\n values = input_data\n if hasattr(input_data, \"to_dict\"):\n values = input_data.to_dict(orient=\"records\")\n\n if isinstance(spec, dict):\n spec[\"data\"] = {\"values\": values}\n\n from IPython.display import display\n display({\"application/vnd.vegalite.v5+json\": spec, \"text/plain\": spec}, raw=True)\n\n return input_data\n\n\n_curio_output = _curio_node()\n\ntry:\n result_82ef1d71_15c8_481c_a61b_eb172822f7a6 = _curio_output\nexcept NameError:\n result_82ef1d71_15c8_481c_a61b_eb172822f7a6 = None\n", + "metadata": { + "id": "82ef1d71-15c8-481c-a61b-eb172822f7a6", + "language": "python", + "nodeId": "82ef1d71-15c8-481c-a61b-eb172822f7a6", + "nodeType": "VIS_VEGA", + "in": "DEFAULT", + "out": "DEFAULT" + } + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/docs/examples/notebooks/example5-original-notebook-W.ipynb b/docs/examples/notebooks/example5-original-notebook-W.ipynb new file mode 100644 index 00000000..739f0b70 --- /dev/null +++ b/docs/examples/notebooks/example5-original-notebook-W.ipynb @@ -0,0 +1,64 @@ +{ + "cells": [ + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"92385949-b264-4108-abea-99df7a39b551\",\n \"type\": \"DATA_LOADING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n import pandas as pd\n\n sensor = pd.read_csv('/Users/jaideepnutalapati/Documents/GitHub/curio/docs/examples/data/Flooding_Complaints_to_311_20260427.csv')\n\n return sensor\n\n_curio_output = _curio_node()\n\ntry:\n data_92385949_b264_4108_abea_99df7a39b551 = _curio_output\nexcept NameError:\n data_92385949_b264_4108_abea_99df7a39b551 = None\n", + "metadata": { + "id": "92385949-b264-4108-abea-99df7a39b551", + "language": "python", + "nodeId": "92385949-b264-4108-abea-99df7a39b551", + "nodeType": "DATA_LOADING", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"d2ebbea0-a6c0-459e-8aa3-a16ce0983e79\",\n \"type\": \"COMPUTATION_ANALYSIS\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n input_0 = data_92385949_b264_4108_abea_99df7a39b551\n arg = input_0\n\n def complaints_by_zip(df):\n\n grouped = df[\"ZIP_CODE\"].fillna(\"UNKNOWN\").value_counts().reset_index()\n grouped.columns = [\"ZIP_CODE\", \"Complaint_Count\"]\n return grouped\n\n return complaints_by_zip(arg)\n\n\n_curio_output = _curio_node()\n\ntry:\n result_d2ebbea0_a6c0_459e_8aa3_a16ce0983e79 = _curio_output\nexcept NameError:\n result_d2ebbea0_a6c0_459e_8aa3_a16ce0983e79 = None\n", + "metadata": { + "id": "d2ebbea0-a6c0-459e-8aa3-a16ce0983e79", + "language": "python", + "nodeId": "d2ebbea0-a6c0-459e-8aa3-a16ce0983e79", + "nodeType": "COMPUTATION_ANALYSIS", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"6a33a101-fb35-487c-8670-fae8dc7f3828\",\n \"type\": \"DATA_POOL\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n\n return result_d2ebbea0_a6c0_459e_8aa3_a16ce0983e79\n\n\n_curio_output = _curio_node()\n\ntry:\n pool_6a33a101_fb35_487c_8670_fae8dc7f3828 = _curio_output\nexcept NameError:\n pool_6a33a101_fb35_487c_8670_fae8dc7f3828 = None\n", + "metadata": { + "id": "6a33a101-fb35-487c-8670-fae8dc7f3828", + "language": "python", + "nodeId": "6a33a101-fb35-487c-8670-fae8dc7f3828", + "nodeType": "DATA_POOL", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"6864201f-4c0e-441b-ac7c-94e7eba7580e\",\n \"type\": \"VIS_VEGA\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n\n input_data = pool_6a33a101_fb35_487c_8670_fae8dc7f3828\n\n spec = {\n \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.json\",\n \"description\": \"A simple bar chart with embedded data.\",\n \"mark\": \"bar\",\n \"encoding\": {\n \"x\": {\"field\": \"ZIP_CODE\", \"type\": \"nominal\", \"title\": \"Zip Code\", \"axis\": {\"labelAngle\": 270}},\n \"y\": {\"field\": \"Complaint_Count\", \"type\": \"quantitative\", \"title\": \"Complaints\", \"axis\": {\"labelAngle\": 0}}\n }\n }\n\n values = input_data\n if hasattr(input_data, \"to_dict\"):\n values = input_data.to_dict(orient=\"records\")\n\n if isinstance(spec, dict):\n spec[\"data\"] = {\"values\": values}\n\n from IPython.display import display\n display({\"application/vnd.vegalite.v5+json\": spec, \"text/plain\": spec}, raw=True)\n\n return input_data\n\n\n_curio_output = _curio_node()\n\ntry:\n result_6864201f_4c0e_441b_ac7c_94e7eba7580e = _curio_output\nexcept NameError:\n result_6864201f_4c0e_441b_ac7c_94e7eba7580e = None\n", + "metadata": { + "id": "6864201f-4c0e-441b-ac7c-94e7eba7580e", + "language": "python", + "nodeId": "6864201f-4c0e-441b-ac7c-94e7eba7580e", + "nodeType": "VIS_VEGA", + "in": "DEFAULT", + "out": "DEFAULT" + } + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/docs/examples/notebooks/example7-original-notebook-W.ipynb b/docs/examples/notebooks/example7-original-notebook-W.ipynb new file mode 100644 index 00000000..bde1c2d9 --- /dev/null +++ b/docs/examples/notebooks/example7-original-notebook-W.ipynb @@ -0,0 +1,52 @@ +{ + "cells": [ + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"e1a123c1-8837-47a5-9b63-038e5ebcb530\",\n \"type\": \"DATA_LOADING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n import pandas as pd\n\n df = pd.read_csv(\"/Users/jaideepnutalapati/Documents/GitHub/curio/docs/examples/data/Speed_Camera_Violations.csv\")\n df.dropna(inplace=True)\n return df\n\n_curio_output = _curio_node()\n\ntry:\n data_e1a123c1_8837_47a5_9b63_038e5ebcb530 = _curio_output\nexcept NameError:\n data_e1a123c1_8837_47a5_9b63_038e5ebcb530 = None\n", + "metadata": { + "id": "e1a123c1-8837-47a5-9b63-038e5ebcb530", + "language": "python", + "nodeId": "e1a123c1-8837-47a5-9b63-038e5ebcb530", + "nodeType": "DATA_LOADING", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"cb340f83-0a4d-457a-be66-691672f330d3\",\n \"type\": \"COMPUTATION_ANALYSIS\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n input_0 = data_e1a123c1_8837_47a5_9b63_038e5ebcb530\n arg = input_0\n\n import pandas as pd\n\n df = arg\n\n df['VIOLATION DATE'] = pd.to_datetime(df['VIOLATION DATE'], format='%m/%d/%Y')\n\n df['Year'] = df['VIOLATION DATE'].dt.year\n\n yr_sum = (df.groupby(['CAMERA ID', 'Year'])['VIOLATIONS']\n .sum()\n .reset_index()\n .rename(columns={'VIOLATIONS': 'avg_violations'}))\n\n top_ids = (df.groupby('CAMERA ID')['VIOLATIONS']\n .sum()\n .sort_values(ascending=False)\n .head(5)\n .index\n .tolist())\n\n yr_sum = yr_sum[yr_sum['CAMERA ID'].isin(top_ids)]\n\n camera_pos = (df.groupby('CAMERA ID')[['LATITUDE', 'LONGITUDE']]\n .mean()\n .reset_index())\n\n yr_sum = yr_sum.merge(camera_pos, on='CAMERA ID')\n\n return yr_sum\n\n\n_curio_output = _curio_node()\n\ntry:\n result_cb340f83_0a4d_457a_be66_691672f330d3 = _curio_output\nexcept NameError:\n result_cb340f83_0a4d_457a_be66_691672f330d3 = None\n", + "metadata": { + "id": "cb340f83-0a4d-457a-be66-691672f330d3", + "language": "python", + "nodeId": "cb340f83-0a4d-457a-be66-691672f330d3", + "nodeType": "COMPUTATION_ANALYSIS", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"c2ba6e0e-e239-4167-a382-ffd1993cb3da\",\n \"type\": \"VIS_VEGA\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n\n input_data = result_cb340f83_0a4d_457a_be66_691672f330d3\n\n spec = {\n \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.json\",\n \"data\": { \"name\": \"table\" },\n \"config\": { \"bar\": { \"continuousBandSize\": 18 } },\n \"hconcat\": [\n {\n \"width\": 320,\n \"height\": 260,\n \"selection\": { \"yrBrush\": { \"type\": \"interval\", \"encodings\": [\"x\"] } },\n \"mark\": { \"type\": \"bar\" },\n \"encoding\": {\n \"x\": { \"field\": \"Year\", \"type\": \"quantitative\", \"title\": \"Year\" },\n \"y\": {\n \"aggregate\": \"sum\",\n \"field\": \"avg_violations\",\n \"type\": \"quantitative\",\n \"title\": \"Total Violations\"\n },\n \"color\": {\n \"field\": \"CAMERA ID\",\n \"type\": \"nominal\",\n \"legend\": { \"title\": \"Camera ID\" }\n }\n }\n },\n {\n \"width\": 320,\n \"height\": 260,\n \"transform\": [\n { \"filter\": { \"selection\": \"yrBrush\" } },\n {\n \"aggregate\": [\n { \"op\": \"sum\", \"field\": \"avg_violations\", \"as\": \"total\" }\n ],\n \"groupby\": [\"Year\"]\n },\n { \"sort\": { \"field\": \"Year\" } }\n ],\n \"mark\": { \"type\": \"line\", \"point\": True },\n \"encoding\": {\n \"x\": {\n \"field\": \"Year\",\n \"type\": \"quantitative\",\n \"title\": \"Year (brush range)\"\n },\n \"y\": {\n \"field\": \"total\",\n \"type\": \"quantitative\",\n \"title\": \"Total Violations\"\n }\n }\n }\n ]\n }\n\n values = input_data\n if hasattr(input_data, \"to_dict\"):\n values = input_data.to_dict(orient=\"records\")\n\n if isinstance(spec, dict):\n spec[\"data\"] = {\"values\": values}\n\n from IPython.display import display\n display({\"application/vnd.vegalite.v5+json\": spec, \"text/plain\": spec}, raw=True)\n\n return input_data\n\n\n_curio_output = _curio_node()\n\ntry:\n result_c2ba6e0e_e239_4167_a382_ffd1993cb3da = _curio_output\nexcept NameError:\n result_c2ba6e0e_e239_4167_a382_ffd1993cb3da = None\n", + "metadata": { + "id": "c2ba6e0e-e239-4167-a382-ffd1993cb3da", + "language": "python", + "nodeId": "c2ba6e0e-e239-4167-a382-ffd1993cb3da", + "nodeType": "VIS_VEGA", + "in": "DEFAULT", + "out": "DEFAULT" + } + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/docs/examples/notebooks/example8-original-notebook-W.ipynb b/docs/examples/notebooks/example8-original-notebook-W.ipynb new file mode 100644 index 00000000..5bd89d8e --- /dev/null +++ b/docs/examples/notebooks/example8-original-notebook-W.ipynb @@ -0,0 +1,304 @@ +{ + "cells": [ + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"6f4c2cd3-e83e-4e85-81de-3ec50986a2ed\",\n \"type\": \"DATA_LOADING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n import pandas as pd\n\n df = pd.read_csv(\"data/red-light-violation.csv\")\n return df\n\n_curio_output = _curio_node()\n\ntry:\n data_6f4c2cd3_e83e_4e85_81de_3ec50986a2ed = _curio_output\nexcept NameError:\n data_6f4c2cd3_e83e_4e85_81de_3ec50986a2ed = None\n", + "metadata": { + "id": "6f4c2cd3-e83e-4e85-81de-3ec50986a2ed", + "language": "python", + "nodeId": "6f4c2cd3-e83e-4e85-81de-3ec50986a2ed", + "nodeType": "DATA_LOADING", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"2704287e-a72c-454d-b6f2-7bca1e521397\",\n \"type\": \"DATA_CLEANING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n input_0 = data_6f4c2cd3_e83e_4e85_81de_3ec50986a2ed\n arg = input_0\n\n import pandas as pd\n\n df = arg.copy()\n df['VIOLATION DATE'] = pd.to_datetime(df['VIOLATION DATE'])\n df['Year'] = df['VIOLATION DATE'].dt.year\n df['Month'] = df['VIOLATION DATE'].dt.month\n\n def assign_season(month):\n if month in [12, 1, 2]:\n return \"Winter\"\n elif month in [3, 4, 5]:\n return \"Spring\"\n elif month in [6, 7, 8]:\n return \"Summer\"\n else:\n return \"Fall\"\n\n df['Season'] = df['Month'].apply(assign_season)\n\n df_trend = df.groupby(['VIOLATION DATE', 'Year', 'Season'])['VIOLATIONS'].sum().reset_index()\n df_trend['VIOLATION DATE'] = df_trend['VIOLATION DATE'].astype(str)\n\n return pd.DataFrame(df_trend)\n\n\n_curio_output = _curio_node()\n\ntry:\n result_2704287e_a72c_454d_b6f2_7bca1e521397 = _curio_output\nexcept NameError:\n result_2704287e_a72c_454d_b6f2_7bca1e521397 = None\n", + "metadata": { + "id": "2704287e-a72c-454d-b6f2-7bca1e521397", + "language": "python", + "nodeId": "2704287e-a72c-454d-b6f2-7bca1e521397", + "nodeType": "DATA_CLEANING", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"04062f8c-289c-4588-84b2-21be45adf916\",\n \"type\": \"VIS_VEGA\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n\n input_data = result_2704287e_a72c_454d_b6f2_7bca1e521397\n\n spec = {\n \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.json\",\n \"width\": 750,\n \"height\": 400,\n \"title\": \"Seasonal Violation Trend (Daily)\",\n \"mark\": {\n \"type\": \"line\",\n \"point\": True\n },\n \"encoding\": {\n \"x\": {\n \"field\": \"VIOLATION DATE\",\n \"type\": \"temporal\",\n \"title\": \"Date\",\n \"axis\": {\n \"format\": \"%Y %b\",\n \"labelAngle\": -45\n }\n },\n \"y\": {\n \"field\": \"VIOLATIONS\",\n \"type\": \"quantitative\",\n \"title\": \"Violations\"\n },\n \"color\": {\n \"field\": \"Season\",\n \"type\": \"nominal\",\n \"title\": \"Season\",\n \"scale\": {\n \"domain\": [\"Winter\", \"Spring\", \"Summer\", \"Fall\"],\n \"range\": [\"#1f77b4\", \"#2ca02c\", \"#ff7f0e\", \"#9467bd\"]\n }\n },\n \"tooltip\": [\n { \"field\": \"VIOLATION DATE\", \"type\": \"temporal\", \"title\": \"Date\" },\n { \"field\": \"Season\", \"type\": \"nominal\" },\n { \"field\": \"VIOLATIONS\", \"type\": \"quantitative\" }\n ]\n }\n }\n\n values = input_data\n if hasattr(input_data, \"to_dict\"):\n values = input_data.to_dict(orient=\"records\")\n\n if isinstance(spec, dict):\n spec[\"data\"] = {\"values\": values}\n\n from IPython.display import display\n display({\"application/vnd.vegalite.v5+json\": spec, \"text/plain\": spec}, raw=True)\n\n return input_data\n\n\n_curio_output = _curio_node()\n\ntry:\n result_04062f8c_289c_4588_84b2_21be45adf916 = _curio_output\nexcept NameError:\n result_04062f8c_289c_4588_84b2_21be45adf916 = None\n", + "metadata": { + "id": "04062f8c-289c-4588-84b2-21be45adf916", + "language": "python", + "nodeId": "04062f8c-289c-4588-84b2-21be45adf916", + "nodeType": "VIS_VEGA", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"f4cb8452-3fbf-48b3-9a4e-42d0607428cb\",\n \"type\": \"DATA_CLEANING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n input_0 = data_6f4c2cd3_e83e_4e85_81de_3ec50986a2ed\n arg = input_0\n\n import pandas as pd\n\n df = arg.copy()\n df['VIOLATION DATE'] = pd.to_datetime(df['VIOLATION DATE'])\n df['Year'] = df['VIOLATION DATE'].dt.year\n df['Month'] = df['VIOLATION DATE'].dt.month\n\n heatmap_data = df.groupby(['Year', 'Month'])['VIOLATIONS'].sum().reset_index()\n\n return pd.DataFrame(heatmap_data)\n\n\n_curio_output = _curio_node()\n\ntry:\n result_f4cb8452_3fbf_48b3_9a4e_42d0607428cb = _curio_output\nexcept NameError:\n result_f4cb8452_3fbf_48b3_9a4e_42d0607428cb = None\n", + "metadata": { + "id": "f4cb8452-3fbf-48b3-9a4e-42d0607428cb", + "language": "python", + "nodeId": "f4cb8452-3fbf-48b3-9a4e-42d0607428cb", + "nodeType": "DATA_CLEANING", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"624087f5-cd1f-4348-8b15-ea9eed203770\",\n \"type\": \"MERGE_FLOW\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n\n inputs = [\n result_2704287e_a72c_454d_b6f2_7bca1e521397,\n result_f4cb8452_3fbf_48b3_9a4e_42d0607428cb\n ]\n\n merged_inputs = [i for i in inputs if i is not None]\n\n return merged_inputs\n\n\n_curio_output = _curio_node()\n\ntry:\n merged_624087f5_cd1f_4348_8b15_ea9eed203770 = _curio_output\nexcept NameError:\n merged_624087f5_cd1f_4348_8b15_ea9eed203770 = None\n", + "metadata": { + "id": "624087f5-cd1f-4348-8b15-ea9eed203770", + "language": "python", + "nodeId": "624087f5-cd1f-4348-8b15-ea9eed203770", + "nodeType": "MERGE_FLOW", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"ab8d8046-b68f-419f-bb36-3454e576afd4\",\n \"type\": \"COMPUTATION_ANALYSIS\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n input_0 = merged_624087f5_cd1f_4348_8b15_ea9eed203770\n arg = input_0\n\n import pandas as pd\n\n df_trend = pd.DataFrame(arg[0])\n heatmap_data = pd.DataFrame(arg[1])\n\n df_trend['VIOLATION DATE'] = pd.to_datetime(df_trend['VIOLATION DATE'])\n\n summary = heatmap_data.groupby('Year')['VIOLATIONS'].sum().reset_index()\n\n merged = df_trend.merge(summary, on='Year', how='left')\n\n final = merged[['VIOLATION DATE', 'VIOLATIONS_x', 'Season', 'VIOLATIONS_y']]\n\n final.columns = ['VIOLATION DATE', 'Daily Violations', 'Season', 'Yearly Total']\n\n final['VIOLATION DATE'] = final['VIOLATION DATE'].astype(str)\n\n array_data = final.to_dict(orient='records')\n\n shape = [final.shape[0], final.shape[1]]\n\n return pd.DataFrame(array_data)\n\n\n_curio_output = _curio_node()\n\ntry:\n result_ab8d8046_b68f_419f_bb36_3454e576afd4 = _curio_output\nexcept NameError:\n result_ab8d8046_b68f_419f_bb36_3454e576afd4 = None\n", + "metadata": { + "id": "ab8d8046-b68f-419f-bb36-3454e576afd4", + "language": "python", + "nodeId": "ab8d8046-b68f-419f-bb36-3454e576afd4", + "nodeType": "COMPUTATION_ANALYSIS", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"3024166a-cd2c-460e-9063-d49f912774cc\",\n \"type\": \"DATA_CLEANING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n input_0 = result_ab8d8046_b68f_419f_bb36_3454e576afd4\n arg = input_0\n\n import pandas as pd\n\n df = arg.copy()\n\n df['VIOLATION DATE'] = pd.to_datetime(df['VIOLATION DATE'], errors='coerce')\n\n if 'Year' not in df.columns:\n df['Year'] = df['VIOLATION DATE'].dt.year\n if 'Month' not in df.columns:\n df['Month'] = df['VIOLATION DATE'].dt.month\n\n df['Daily Violations'] = pd.to_numeric(df['Daily Violations'], errors='coerce')\n df['Yearly Total'] = pd.to_numeric(df['Yearly Total'], errors='coerce')\n\n df['VIOLATION DATE'] = df['VIOLATION DATE'].astype(str)\n\n return pd.DataFrame(df)\n\n\n_curio_output = _curio_node()\n\ntry:\n result_3024166a_cd2c_460e_9063_d49f912774cc = _curio_output\nexcept NameError:\n result_3024166a_cd2c_460e_9063_d49f912774cc = None\n", + "metadata": { + "id": "3024166a-cd2c-460e-9063-d49f912774cc", + "language": "python", + "nodeId": "3024166a-cd2c-460e-9063-d49f912774cc", + "nodeType": "DATA_CLEANING", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"b3beb8b4-d227-48af-b0c1-d9b99564888b\",\n \"type\": \"VIS_VEGA\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n\n input_data = result_3024166a_cd2c_460e_9063_d49f912774cc\n\n spec = {\n \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.json\",\n \"hconcat\": [\n {\n \"width\": 300,\n \"height\": 300,\n \"title\": \"Monthly Violations Heatmap\",\n \"params\": [\n {\n \"name\": \"yearFilter\",\n \"select\": {\n \"type\": \"point\",\n \"fields\": [\"Year\"],\n \"on\": \"click\"\n }\n }\n ],\n \"mark\": \"rect\",\n \"encoding\": {\n \"x\": {\n \"field\": \"Month\",\n \"type\": \"ordinal\",\n \"title\": \"Month\"\n },\n \"y\": {\n \"field\": \"Year\",\n \"type\": \"ordinal\",\n \"title\": \"Year\"\n },\n \"color\": {\n \"aggregate\": \"sum\",\n \"field\": \"Yearly Total\",\n \"type\": \"quantitative\",\n \"scale\": {\n \"scheme\": \"orangered\"\n },\n \"title\": \"Violations\"\n },\n \"tooltip\": [\n { \"field\": \"Year\", \"type\": \"ordinal\" },\n { \"field\": \"Month\", \"type\": \"ordinal\" },\n {\n \"aggregate\": \"sum\",\n \"field\": \"Yearly Total\",\n \"type\": \"quantitative\",\n \"title\": \"Total Violations\"\n }\n ]\n }\n },\n {\n \"width\": 600,\n \"height\": 300,\n \"title\": \"Seasonal Violation Trend (Daily)\",\n \"transform\": [\n {\n \"filter\": \"yearFilter.Year == null || datum.Year == yearFilter.Year\"\n }\n ],\n \"mark\": \"line\",\n \"encoding\": {\n \"x\": {\n \"field\": \"VIOLATION DATE\",\n \"type\": \"temporal\",\n \"title\": \"Date\"\n },\n \"y\": {\n \"field\": \"Daily Violations\",\n \"type\": \"quantitative\",\n \"title\": \"Violations\"\n },\n \"color\": {\n \"field\": \"Season\",\n \"type\": \"nominal\"\n },\n \"tooltip\": [\n { \"field\": \"VIOLATION DATE\", \"type\": \"temporal\" },\n { \"field\": \"Daily Violations\", \"type\": \"quantitative\" },\n { \"field\": \"Season\", \"type\": \"nominal\" }\n ]\n }\n }\n ]\n }\n\n values = input_data\n if hasattr(input_data, \"to_dict\"):\n values = input_data.to_dict(orient=\"records\")\n\n if isinstance(spec, dict):\n spec[\"data\"] = {\"values\": values}\n\n from IPython.display import display\n display({\"application/vnd.vegalite.v5+json\": spec, \"text/plain\": spec}, raw=True)\n\n return input_data\n\n\n_curio_output = _curio_node()\n\ntry:\n result_b3beb8b4_d227_48af_b0c1_d9b99564888b = _curio_output\nexcept NameError:\n result_b3beb8b4_d227_48af_b0c1_d9b99564888b = None\n", + "metadata": { + "id": "b3beb8b4-d227-48af-b0c1-d9b99564888b", + "language": "python", + "nodeId": "b3beb8b4-d227-48af-b0c1-d9b99564888b", + "nodeType": "VIS_VEGA", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"1c3f8346-a156-4716-9b67-5e3cd0c4b256\",\n \"type\": \"DATA_CLEANING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n input_0 = data_6f4c2cd3_e83e_4e85_81de_3ec50986a2ed\n arg = input_0\n\n import pandas as pd\n\n df = arg\n\n df[\"VIOLATION DATE\"] = pd.to_datetime(df[\"VIOLATION DATE\"])\n\n df[\"Month\"] = df[\"VIOLATION DATE\"].dt.month\n df[\"Year\"] = df[\"VIOLATION DATE\"].dt.year\n\n def assign_season(month):\n if month in [12, 1, 2]:\n return \"Winter\"\n elif month in [3, 4, 5]:\n return \"Spring\"\n elif month in [6, 7, 8]:\n return \"Summer\"\n else:\n return \"Fall\"\n\n df[\"Season\"] = df[\"Month\"].apply(assign_season)\n\n df_seasonal = df.groupby([\"Year\", \"Season\"])[\"VIOLATIONS\"].sum().reset_index()\n\n return pd.DataFrame(df_seasonal)\n\n\n_curio_output = _curio_node()\n\ntry:\n result_1c3f8346_a156_4716_9b67_5e3cd0c4b256 = _curio_output\nexcept NameError:\n result_1c3f8346_a156_4716_9b67_5e3cd0c4b256 = None\n", + "metadata": { + "id": "1c3f8346-a156-4716-9b67-5e3cd0c4b256", + "language": "python", + "nodeId": "1c3f8346-a156-4716-9b67-5e3cd0c4b256", + "nodeType": "DATA_CLEANING", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"9acbea5b-9bb7-4eea-84dd-3dc16e25e634\",\n \"type\": \"VIS_VEGA\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n\n input_data = result_1c3f8346_a156_4716_9b67_5e3cd0c4b256\n\n spec = {\n \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.json\",\n \"title\": \"Seasonal Red-Light Violations Over Time\",\n \"mark\": \"area\",\n \"encoding\": {\n \"x\": {\n \"field\": \"Year\",\n \"type\": \"ordinal\",\n \"title\": \"Year\"\n },\n \"y\": {\n \"field\": \"VIOLATIONS\",\n \"type\": \"quantitative\",\n \"title\": \"Total Violations\"\n },\n \"color\": {\n \"field\": \"Season\",\n \"type\": \"nominal\",\n \"title\": \"Season\"\n },\n \"tooltip\": [\n { \"field\": \"Year\", \"type\": \"ordinal\" },\n { \"field\": \"Season\", \"type\": \"nominal\" },\n { \"field\": \"VIOLATIONS\", \"type\": \"quantitative\" }\n ]\n },\n \"width\": 600,\n \"height\": 400\n }\n\n values = input_data\n if hasattr(input_data, \"to_dict\"):\n values = input_data.to_dict(orient=\"records\")\n\n if isinstance(spec, dict):\n spec[\"data\"] = {\"values\": values}\n\n from IPython.display import display\n display({\"application/vnd.vegalite.v5+json\": spec, \"text/plain\": spec}, raw=True)\n\n return input_data\n\n\n_curio_output = _curio_node()\n\ntry:\n result_9acbea5b_9bb7_4eea_84dd_3dc16e25e634 = _curio_output\nexcept NameError:\n result_9acbea5b_9bb7_4eea_84dd_3dc16e25e634 = None\n", + "metadata": { + "id": "9acbea5b-9bb7-4eea-84dd-3dc16e25e634", + "language": "python", + "nodeId": "9acbea5b-9bb7-4eea-84dd-3dc16e25e634", + "nodeType": "VIS_VEGA", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"ad6d1689-5be3-4b18-a72a-ed54f74621ee\",\n \"type\": \"DATA_CLEANING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n input_0 = data_6f4c2cd3_e83e_4e85_81de_3ec50986a2ed\n arg = input_0\n\n import pandas as pd\n\n df = arg\n df[\"VIOLATION DATE\"] = pd.to_datetime(df[\"VIOLATION DATE\"])\n df[\"Year\"] = df[\"VIOLATION DATE\"].dt.year\n\n grouped = df.groupby([\"INTERSECTION\", \"Year\"])[\"VIOLATIONS\"].sum().reset_index()\n grouped[\"Rank\"] = grouped.groupby(\"Year\")[\"VIOLATIONS\"].rank(ascending=False, method=\"first\")\n\n top3 = grouped[grouped[\"Rank\"] <= 3]\n\n return pd.DataFrame(top3)\n\n\n_curio_output = _curio_node()\n\ntry:\n result_ad6d1689_5be3_4b18_a72a_ed54f74621ee = _curio_output\nexcept NameError:\n result_ad6d1689_5be3_4b18_a72a_ed54f74621ee = None\n", + "metadata": { + "id": "ad6d1689-5be3-4b18-a72a-ed54f74621ee", + "language": "python", + "nodeId": "ad6d1689-5be3-4b18-a72a-ed54f74621ee", + "nodeType": "DATA_CLEANING", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"9c70e329-602e-4309-98d7-b12f44c99319\",\n \"type\": \"VIS_VEGA\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n\n input_data = result_ad6d1689_5be3_4b18_a72a_ed54f74621ee\n\n spec = {\n \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.json\",\n \"title\": \"Top 3 Intersections with Most Violations by Year\",\n \"mark\": \"bar\",\n \"encoding\": {\n \"x\": {\n \"field\": \"Year\",\n \"type\": \"ordinal\",\n \"title\": \"Year\"\n },\n \"y\": {\n \"field\": \"VIOLATIONS\",\n \"type\": \"quantitative\",\n \"title\": \"Violations\"\n },\n \"color\": {\n \"field\": \"INTERSECTION\",\n \"type\": \"nominal\",\n \"title\": \"Intersection\"\n },\n \"tooltip\": [\n { \"field\": \"INTERSECTION\", \"type\": \"nominal\" },\n { \"field\": \"VIOLATIONS\", \"type\": \"quantitative\" }\n ]\n },\n \"width\": 600,\n \"height\": 400\n }\n\n values = input_data\n if hasattr(input_data, \"to_dict\"):\n values = input_data.to_dict(orient=\"records\")\n\n if isinstance(spec, dict):\n spec[\"data\"] = {\"values\": values}\n\n from IPython.display import display\n display({\"application/vnd.vegalite.v5+json\": spec, \"text/plain\": spec}, raw=True)\n\n return input_data\n\n\n_curio_output = _curio_node()\n\ntry:\n result_9c70e329_602e_4309_98d7_b12f44c99319 = _curio_output\nexcept NameError:\n result_9c70e329_602e_4309_98d7_b12f44c99319 = None\n", + "metadata": { + "id": "9c70e329-602e-4309-98d7-b12f44c99319", + "language": "python", + "nodeId": "9c70e329-602e-4309-98d7-b12f44c99319", + "nodeType": "VIS_VEGA", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"da73932f-b91a-4542-98df-d731a888b8b4\",\n \"type\": \"DATA_CLEANING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n input_0 = data_6f4c2cd3_e83e_4e85_81de_3ec50986a2ed\n arg = input_0\n\n import pandas as pd\n import numpy as np\n\n df = arg.copy()\n df[\"VIOLATION DATE\"] = pd.to_datetime(df[\"VIOLATION DATE\"])\n df[\"Year\"] = df[\"VIOLATION DATE\"].dt.year\n\n\n grouped = df.groupby(\"INTERSECTION\").agg({\n \"CAMERA ID\": \"nunique\",\n \"VIOLATIONS\": \"sum\"\n }).reset_index().rename(columns={\"CAMERA ID\": \"CAMERA_COUNT\"})\n\n grouped[\"CAMERA_BIN\"] = grouped[\"CAMERA_COUNT\"].apply(lambda x: \"4+\" if x >= 4 else str(x))\n\n\n camera_order = {\"1\": 1, \"2\": 2, \"3\": 3, \"4+\": 4}\n grouped[\"x_base\"] = grouped[\"CAMERA_BIN\"].map(camera_order)\n np.random.seed(42)\n grouped[\"jittered_x\"] = grouped[\"x_base\"] + np.random.uniform(-0.2, 0.2, size=len(grouped))\n\n\n return pd.DataFrame(grouped)\n\n\n_curio_output = _curio_node()\n\ntry:\n result_da73932f_b91a_4542_98df_d731a888b8b4 = _curio_output\nexcept NameError:\n result_da73932f_b91a_4542_98df_d731a888b8b4 = None\n", + "metadata": { + "id": "da73932f-b91a-4542-98df-d731a888b8b4", + "language": "python", + "nodeId": "da73932f-b91a-4542-98df-d731a888b8b4", + "nodeType": "DATA_CLEANING", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"2bbbd05f-f5cb-48a2-b96f-c55a9ac95a34\",\n \"type\": \"DATA_CLEANING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n input_0 = data_6f4c2cd3_e83e_4e85_81de_3ec50986a2ed\n arg = input_0\n\n import pandas as pd\n\n df = arg.copy()\n\n df[\"VIOLATION DATE\"] = pd.to_datetime(df[\"VIOLATION DATE\"])\n df[\"Year\"] = df[\"VIOLATION DATE\"].dt.year\n\n trend = df.groupby(['INTERSECTION', 'Year'])['VIOLATIONS'].sum().reset_index()\n first = trend.groupby('INTERSECTION').first().reset_index()\n last = trend.groupby('INTERSECTION').last().reset_index()\n\n change = first.merge(last, on='INTERSECTION', suffixes=('_first', '_last'))\n change = change[change['VIOLATIONS_first'] > 0]\n\n change['Percent_Reduction'] = ((change['VIOLATIONS_first'] - change['VIOLATIONS_last']) / change['VIOLATIONS_first']) * 100\n\n\n return pd.DataFrame(change)\n\n\n_curio_output = _curio_node()\n\ntry:\n result_2bbbd05f_f5cb_48a2_b96f_c55a9ac95a34 = _curio_output\nexcept NameError:\n result_2bbbd05f_f5cb_48a2_b96f_c55a9ac95a34 = None\n", + "metadata": { + "id": "2bbbd05f-f5cb-48a2-b96f-c55a9ac95a34", + "language": "python", + "nodeId": "2bbbd05f-f5cb-48a2-b96f-c55a9ac95a34", + "nodeType": "DATA_CLEANING", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"04689857-6cf6-4ee9-801d-e7b075c16da5\",\n \"type\": \"MERGE_FLOW\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n\n inputs = [\n result_da73932f_b91a_4542_98df_d731a888b8b4,\n result_2bbbd05f_f5cb_48a2_b96f_c55a9ac95a34\n ]\n\n merged_inputs = [i for i in inputs if i is not None]\n\n return merged_inputs\n\n\n_curio_output = _curio_node()\n\ntry:\n merged_04689857_6cf6_4ee9_801d_e7b075c16da5 = _curio_output\nexcept NameError:\n merged_04689857_6cf6_4ee9_801d_e7b075c16da5 = None\n", + "metadata": { + "id": "04689857-6cf6-4ee9-801d-e7b075c16da5", + "language": "python", + "nodeId": "04689857-6cf6-4ee9-801d-e7b075c16da5", + "nodeType": "MERGE_FLOW", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"bed93b38-cdb2-4db3-be54-7e60d68013d9\",\n \"type\": \"COMPUTATION_ANALYSIS\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n input_0 = merged_04689857_6cf6_4ee9_801d_e7b075c16da5\n arg = input_0\n\n import pandas as pd\n\n grouped = pd.DataFrame(arg[0])\n change = pd.DataFrame(arg[1])\n\n merged = grouped.merge(\n change[['INTERSECTION', 'Percent_Reduction']],\n on='INTERSECTION',\n how='left'\n )\n\n merged['VIOLATIONS'] = pd.to_numeric(merged['VIOLATIONS'], errors='coerce')\n merged['Percent_Reduction'] = pd.to_numeric(merged['Percent_Reduction'], errors='coerce')\n\n merged = merged.dropna(subset=['Percent_Reduction'])\n\n array_data = merged.to_dict(orient='records')\n\n shape = [merged.shape[0], merged.shape[1]]\n\n return pd.DataFrame(array_data)\n\n\n_curio_output = _curio_node()\n\ntry:\n result_bed93b38_cdb2_4db3_be54_7e60d68013d9 = _curio_output\nexcept NameError:\n result_bed93b38_cdb2_4db3_be54_7e60d68013d9 = None\n", + "metadata": { + "id": "bed93b38-cdb2-4db3-be54-7e60d68013d9", + "language": "python", + "nodeId": "bed93b38-cdb2-4db3-be54-7e60d68013d9", + "nodeType": "COMPUTATION_ANALYSIS", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"b5ff6442-ea3f-468f-8c85-8fb634eb88f1\",\n \"type\": \"DATA_CLEANING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n input_0 = result_bed93b38_cdb2_4db3_be54_7e60d68013d9\n arg = input_0\n\n import pandas as pd\n\n df = arg.copy()\n\n df['VIOLATIONS'] = pd.to_numeric(df['VIOLATIONS'], errors='coerce')\n df['CAMERA_COUNT'] = pd.to_numeric(df['CAMERA_COUNT'], errors='coerce')\n df['Percent_Reduction'] = pd.to_numeric(df['Percent_Reduction'], errors='coerce')\n df['jittered_x'] = pd.to_numeric(df['jittered_x'], errors='coerce')\n\n df = df.dropna(subset=['VIOLATIONS', 'CAMERA_COUNT', 'Percent_Reduction', 'jittered_x'])\n\n\n return pd.DataFrame(df)\n\n\n_curio_output = _curio_node()\n\ntry:\n result_b5ff6442_ea3f_468f_8c85_8fb634eb88f1 = _curio_output\nexcept NameError:\n result_b5ff6442_ea3f_468f_8c85_8fb634eb88f1 = None\n", + "metadata": { + "id": "b5ff6442-ea3f-468f-8c85-8fb634eb88f1", + "language": "python", + "nodeId": "b5ff6442-ea3f-468f-8c85-8fb634eb88f1", + "nodeType": "DATA_CLEANING", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"e1c71f22-c26c-498b-964e-7413763c812a\",\n \"type\": \"VIS_VEGA\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n\n input_data = result_b5ff6442_ea3f_468f_8c85_8fb634eb88f1\n\n spec = {\n \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.json\",\n \"params\": [\n {\n \"name\": \"cameraFilter\",\n \"bind\": {\n \"input\": \"select\",\n \"options\": [\"1\", \"2\", \"3\", \"4+\"],\n \"labels\": [\"1 Camera\", \"2 Cameras\", \"3 Cameras\", \"4+ Cameras\"]\n }\n }\n ],\n \"hconcat\": [\n {\n \"width\": 500,\n \"mark\": \"boxplot\",\n \"encoding\": {\n \"x\": {\n \"field\": \"CAMERA_BIN\",\n \"type\": \"nominal\",\n \"title\": \"Camera Count\"\n },\n \"y\": {\n \"field\": \"VIOLATIONS\",\n \"type\": \"quantitative\",\n \"title\": \"Violations\"\n },\n \"color\": {\n \"field\": \"CAMERA_BIN\",\n \"type\": \"nominal\"\n }\n }\n },\n {\n \"width\": 500,\n \"mark\": {\n \"type\": \"bar\",\n \"cursor\": \"pointer\"\n },\n \"transform\": [\n { \"filter\": \"cameraFilter == null || datum.CAMERA_BIN == cameraFilter\" }\n ],\n \"encoding\": {\n \"x\": {\n \"field\": \"Percent_Reduction\",\n \"type\": \"quantitative\",\n \"title\": \"Percent Reduction\"\n },\n \"y\": {\n \"field\": \"INTERSECTION\",\n \"type\": \"nominal\",\n \"sort\": \"-x\",\n \"title\": \"Intersection\"\n },\n \"color\": {\n \"field\": \"Percent_Reduction\",\n \"type\": \"quantitative\",\n \"scale\": { \"scheme\": \"blues\" }\n }\n }\n }\n ]\n }\n\n values = input_data\n if hasattr(input_data, \"to_dict\"):\n values = input_data.to_dict(orient=\"records\")\n\n if isinstance(spec, dict):\n spec[\"data\"] = {\"values\": values}\n\n from IPython.display import display\n display({\"application/vnd.vegalite.v5+json\": spec, \"text/plain\": spec}, raw=True)\n\n return input_data\n\n\n_curio_output = _curio_node()\n\ntry:\n result_e1c71f22_c26c_498b_964e_7413763c812a = _curio_output\nexcept NameError:\n result_e1c71f22_c26c_498b_964e_7413763c812a = None\n", + "metadata": { + "id": "e1c71f22-c26c-498b-964e-7413763c812a", + "language": "python", + "nodeId": "e1c71f22-c26c-498b-964e-7413763c812a", + "nodeType": "VIS_VEGA", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"be851609-c0f7-4cae-afae-2094965ead93\",\n \"type\": \"DATA_CLEANING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n input_0 = data_6f4c2cd3_e83e_4e85_81de_3ec50986a2ed\n arg = input_0\n\n import pandas as pd\n\n df = pd.DataFrame(arg)\n\n df['VIOLATION DATE'] = pd.to_datetime(df['VIOLATION DATE'], errors='coerce')\n\n df['Year'] = df['VIOLATION DATE'].dt.year\n\n df['VIOLATION DATE'] = df['VIOLATION DATE'].astype(str)\n\n\n return pd.DataFrame(df)\n\n\n_curio_output = _curio_node()\n\ntry:\n result_be851609_c0f7_4cae_afae_2094965ead93 = _curio_output\nexcept NameError:\n result_be851609_c0f7_4cae_afae_2094965ead93 = None\n", + "metadata": { + "id": "be851609-c0f7-4cae-afae-2094965ead93", + "language": "python", + "nodeId": "be851609-c0f7-4cae-afae-2094965ead93", + "nodeType": "DATA_CLEANING", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"18b287f6-0a05-4dbc-92d3-4eb10f177bc5\",\n \"type\": \"DATA_CLEANING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n input_0 = data_6f4c2cd3_e83e_4e85_81de_3ec50986a2ed\n arg = input_0\n\n import pandas as pd\n\n df = arg\n\n df_map = df.groupby(['INTERSECTION', 'LATITUDE', 'LONGITUDE']).agg({\n 'VIOLATIONS': 'sum',\n 'CAMERA ID': 'nunique'\n }).reset_index().rename(columns={\n 'VIOLATIONS': 'TOTAL_VIOLATIONS',\n 'CAMERA ID': 'CAMERA_COUNT'\n })\n\n\n df_map = df_map.dropna(subset=['LATITUDE', 'LONGITUDE'])\n\n df_map['CAMERA_BIN'] = df_map['CAMERA_COUNT'].apply(lambda x: \"4+\" if x >= 4 else str(int(x)))\n\n return pd.DataFrame(df_map)\n\n\n_curio_output = _curio_node()\n\ntry:\n result_18b287f6_0a05_4dbc_92d3_4eb10f177bc5 = _curio_output\nexcept NameError:\n result_18b287f6_0a05_4dbc_92d3_4eb10f177bc5 = None\n", + "metadata": { + "id": "18b287f6-0a05-4dbc-92d3-4eb10f177bc5", + "language": "python", + "nodeId": "18b287f6-0a05-4dbc-92d3-4eb10f177bc5", + "nodeType": "DATA_CLEANING", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"412c23ca-fafd-4af8-ab87-abd9df17241a\",\n \"type\": \"MERGE_FLOW\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n\n inputs = [\n result_be851609_c0f7_4cae_afae_2094965ead93,\n result_18b287f6_0a05_4dbc_92d3_4eb10f177bc5\n ]\n\n merged_inputs = [i for i in inputs if i is not None]\n\n return merged_inputs\n\n\n_curio_output = _curio_node()\n\ntry:\n merged_412c23ca_fafd_4af8_ab87_abd9df17241a = _curio_output\nexcept NameError:\n merged_412c23ca_fafd_4af8_ab87_abd9df17241a = None\n", + "metadata": { + "id": "412c23ca-fafd-4af8-ab87-abd9df17241a", + "language": "python", + "nodeId": "412c23ca-fafd-4af8-ab87-abd9df17241a", + "nodeType": "MERGE_FLOW", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"0ef770f3-e789-40b0-a6e5-5f5fea079389\",\n \"type\": \"COMPUTATION_ANALYSIS\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n input_0 = merged_412c23ca_fafd_4af8_ab87_abd9df17241a\n arg = input_0\n\n import pandas as pd\n\n df_base = pd.DataFrame(arg[0])\n df_additional = pd.DataFrame(arg[1])\n\n df_map = df_base.groupby(['INTERSECTION', 'LATITUDE', 'LONGITUDE']).agg({\n 'VIOLATIONS': 'sum',\n 'CAMERA ID': 'nunique'\n }).reset_index().rename(columns={\n 'VIOLATIONS': 'TOTAL_VIOLATIONS',\n 'CAMERA ID': 'CAMERA_COUNT'\n })\n\n df_map = df_map.dropna(subset=['LATITUDE', 'LONGITUDE'])\n\n df_map[\"CAMERA_BIN\"] = df_map[\"CAMERA_COUNT\"].apply(lambda x: \"4+\" if x >= 4 else str(int(x)))\n\n array_data = df_map.to_dict(orient='records')\n\n return pd.DataFrame(array_data)\n\n\n_curio_output = _curio_node()\n\ntry:\n result_0ef770f3_e789_40b0_a6e5_5f5fea079389 = _curio_output\nexcept NameError:\n result_0ef770f3_e789_40b0_a6e5_5f5fea079389 = None\n", + "metadata": { + "id": "0ef770f3-e789-40b0-a6e5-5f5fea079389", + "language": "python", + "nodeId": "0ef770f3-e789-40b0-a6e5-5f5fea079389", + "nodeType": "COMPUTATION_ANALYSIS", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"d7e27f2a-2175-4fe0-8bff-8dfcd95f36f2\",\n \"type\": \"DATA_CLEANING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n input_0 = result_0ef770f3_e789_40b0_a6e5_5f5fea079389\n arg = input_0\n\n import pandas as pd\n\n df = pd.DataFrame(arg)\n\n df['TOTAL_VIOLATIONS'] = pd.to_numeric(df['TOTAL_VIOLATIONS'], errors='coerce')\n df['CAMERA_COUNT'] = pd.to_numeric(df['CAMERA_COUNT'], errors='coerce')\n\n df['CAMERA_BIN'] = df['CAMERA_BIN'].astype(str)\n\n df['LATITUDE'] = pd.to_numeric(df['LATITUDE'], errors='coerce')\n df['LONGITUDE'] = pd.to_numeric(df['LONGITUDE'], errors='coerce')\n\n return pd.DataFrame(df)\n\n\n_curio_output = _curio_node()\n\ntry:\n result_d7e27f2a_2175_4fe0_8bff_8dfcd95f36f2 = _curio_output\nexcept NameError:\n result_d7e27f2a_2175_4fe0_8bff_8dfcd95f36f2 = None\n", + "metadata": { + "id": "d7e27f2a-2175-4fe0-8bff-8dfcd95f36f2", + "language": "python", + "nodeId": "d7e27f2a-2175-4fe0-8bff-8dfcd95f36f2", + "nodeType": "DATA_CLEANING", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"3ad91d4c-3f0f-4db6-98a0-83fcbc564598\",\n \"type\": \"VIS_VEGA\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n\n input_data = result_d7e27f2a_2175_4fe0_8bff_8dfcd95f36f2\n\n spec = {\n \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.json\",\n \"hconcat\": [\n {\n \"width\": 600,\n \"height\": 500,\n \"title\": \"Spatial Map – Select Area to Filter\",\n \"params\": [\n {\n \"name\": \"spatialBrush\",\n \"select\": {\n \"type\": \"interval\",\n \"encodings\": [\"x\", \"y\"]\n }\n }\n ],\n \"mark\": \"circle\",\n \"encoding\": {\n \"x\": {\n \"field\": \"LONGITUDE\",\n \"type\": \"quantitative\",\n \"scale\": { \"domain\": [-87.95, -87.5] },\n \"title\": \"Longitude\"\n },\n \"y\": {\n \"field\": \"LATITUDE\",\n \"type\": \"quantitative\",\n \"scale\": { \"domain\": [41.6, 42.1] },\n \"title\": \"Latitude\"\n },\n \"size\": {\n \"field\": \"TOTAL_VIOLATIONS\",\n \"type\": \"quantitative\",\n \"scale\": { \"range\": [20, 800] },\n \"title\": \"Total Violations\"\n },\n \"color\": {\n \"field\": \"CAMERA_BIN\",\n \"type\": \"nominal\",\n \"title\": \"Camera Count\",\n \"scale\": { \"scheme\": \"plasma\" }\n },\n \"tooltip\": [\n { \"field\": \"INTERSECTION\", \"type\": \"nominal\" },\n { \"field\": \"TOTAL_VIOLATIONS\", \"type\": \"quantitative\" },\n { \"field\": \"CAMERA_COUNT\", \"type\": \"quantitative\" }\n ]\n }\n },\n {\n \"width\": 400,\n \"height\": 500,\n \"title\": \"Top Intersections by Total Violations (Filtered by Spatial Selection)\",\n \"mark\": \"bar\",\n \"transform\": [\n {\n \"filter\": { \"param\": \"spatialBrush\" }\n },\n {\n \"window\": [{ \"op\": \"rank\", \"as\": \"rank\" }],\n \"sort\": [{ \"field\": \"TOTAL_VIOLATIONS\", \"order\": \"descending\" }]\n },\n {\n \"filter\": \"datum.rank <= 15\"\n }\n ],\n \"encoding\": {\n \"x\": {\n \"field\": \"TOTAL_VIOLATIONS\",\n \"type\": \"quantitative\",\n \"title\": \"Total Violations\"\n },\n \"y\": {\n \"field\": \"INTERSECTION\",\n \"type\": \"nominal\",\n \"sort\": \"-x\",\n \"title\": \"Intersection\"\n },\n \"color\": {\n \"field\": \"TOTAL_VIOLATIONS\",\n \"type\": \"quantitative\",\n \"scale\": { \"scheme\": \"blues\" },\n \"legend\": None\n },\n \"tooltip\": [\n { \"field\": \"INTERSECTION\", \"type\": \"nominal\" },\n { \"field\": \"TOTAL_VIOLATIONS\", \"type\": \"quantitative\" }\n ]\n }\n }\n ]\n }\n\n values = input_data\n if hasattr(input_data, \"to_dict\"):\n values = input_data.to_dict(orient=\"records\")\n\n if isinstance(spec, dict):\n spec[\"data\"] = {\"values\": values}\n\n from IPython.display import display\n display({\"application/vnd.vegalite.v5+json\": spec, \"text/plain\": spec}, raw=True)\n\n return input_data\n\n\n_curio_output = _curio_node()\n\ntry:\n result_3ad91d4c_3f0f_4db6_98a0_83fcbc564598 = _curio_output\nexcept NameError:\n result_3ad91d4c_3f0f_4db6_98a0_83fcbc564598 = None\n", + "metadata": { + "id": "3ad91d4c-3f0f-4db6-98a0-83fcbc564598", + "language": "python", + "nodeId": "3ad91d4c-3f0f-4db6-98a0-83fcbc564598", + "nodeType": "VIS_VEGA", + "in": "DEFAULT", + "out": "DEFAULT" + } + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/docs/examples/notebooks/example9-original-notebook-W.ipynb b/docs/examples/notebooks/example9-original-notebook-W.ipynb new file mode 100644 index 00000000..269c2210 --- /dev/null +++ b/docs/examples/notebooks/example9-original-notebook-W.ipynb @@ -0,0 +1,52 @@ +{ + "cells": [ + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"345e363c-7676-45e3-9fc7-3c44f52b1b6a\",\n \"type\": \"DATA_LOADING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n import pandas as pd\n\n df = pd.read_csv(\"/Users/jaideepnutalapati/Documents/GitHub/curio/docs/examples/data/energy_dataset.csv\")\n return df\n\n_curio_output = _curio_node()\n\ntry:\n data_345e363c_7676_45e3_9fc7_3c44f52b1b6a = _curio_output\nexcept NameError:\n data_345e363c_7676_45e3_9fc7_3c44f52b1b6a = None\n", + "metadata": { + "id": "345e363c-7676-45e3-9fc7-3c44f52b1b6a", + "language": "python", + "nodeId": "345e363c-7676-45e3-9fc7-3c44f52b1b6a", + "nodeType": "DATA_LOADING", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"b9d6eeac-790f-46aa-ac25-4d88514a47d8\",\n \"type\": \"DATA_CLEANING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n input_0 = data_345e363c_7676_45e3_9fc7_3c44f52b1b6a\n arg = input_0\n\n edf = arg[['Data Year', 'ID', 'Property Name', 'Address', 'ZIP Code', 'Chicago Energy Rating', 'Community Area', 'Primary Property Type', 'Gross Floor Area - Buildings (sq ft)', 'Year Built', '# of Buildings', 'ENERGY STAR Score', 'Site EUI (kBtu/sq ft)', 'Source EUI (kBtu/sq ft)', 'Weather Normalized Site EUI (kBtu/sq ft)', 'Weather Normalized Source EUI (kBtu/sq ft)', 'Total GHG Emissions (Metric Tons CO2e)', 'GHG Intensity (kg CO2e/sq ft)', 'Latitude', 'Longitude', 'Location']]\n\n # Rename the data columns for consistency and easy use\n edf.columns = ['Year', 'ID', 'Property Name', 'Address', 'ZIP Code', 'Chicago Energy Rating', 'Community Area', 'Primary Property Type', 'Gross Floor Area', 'Year Built', '# of Buildings', 'ENERGY STAR Score', 'Site EUI', 'Source EUI', 'Weather Normalized Site EUI', 'Weather Normalized Source EUI', 'Total GHG Emissions', 'GHG Intensity', 'Latitude', 'Longitude', 'Location']\n\n # Filter out rows with missing data\n edf = edf.dropna()\n edf['ZIP Code'] = edf['ZIP Code'].astype(int)\n\n return edf\n\n\n_curio_output = _curio_node()\n\ntry:\n result_b9d6eeac_790f_46aa_ac25_4d88514a47d8 = _curio_output\nexcept NameError:\n result_b9d6eeac_790f_46aa_ac25_4d88514a47d8 = None\n", + "metadata": { + "id": "b9d6eeac-790f-46aa-ac25-4d88514a47d8", + "language": "python", + "nodeId": "b9d6eeac-790f-46aa-ac25-4d88514a47d8", + "nodeType": "DATA_CLEANING", + "in": "DEFAULT", + "out": "DEFAULT" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"5db6d526-79a1-40cf-9605-f2f525e513c0\",\n \"type\": \"VIS_VEGA\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\ndef _curio_node():\n\n\n input_data = result_b9d6eeac_790f_46aa_ac25_4d88514a47d8\n\n spec = {\n \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.json\",\n \"description\": \"ENERGY STAR Score by Primary Property Type (mean bars with median ticks)\",\n \"title\": \"ENERGY STAR Score by Primary Property Type\",\n \"data\": { \"name\": \"edf\" },\n \"width\": 600,\n \"height\": 400,\n \"layer\": [\n {\n \"transform\": [\n {\n \"aggregate\": [\n { \"op\": \"mean\", \"field\": \"ENERGY STAR Score\", \"as\": \"mean_score\" }\n ],\n \"groupby\": [\"Primary Property Type\"]\n }\n ],\n \"mark\": \"bar\",\n \"encoding\": {\n \"y\": {\n \"field\": \"Primary Property Type\",\n \"type\": \"nominal\",\n \"title\": \"Primary Property Type\"\n },\n \"x\": {\n \"field\": \"mean_score\",\n \"type\": \"quantitative\",\n \"scale\": { \"domain\": [0, 100] },\n \"title\": \"Mean ENERGY STAR Score\"\n },\n \"tooltip\": [\n {\n \"field\": \"Primary Property Type\",\n \"type\": \"nominal\",\n \"title\": \"Primary Property Type\"\n },\n {\n \"field\": \"mean_score\",\n \"type\": \"quantitative\",\n \"title\": \"Mean ENERGY STAR Score\",\n \"format\": \".2f\"\n }\n ]\n }\n },\n {\n \"transform\": [\n {\n \"aggregate\": [\n { \"op\": \"median\", \"field\": \"ENERGY STAR Score\", \"as\": \"median_score\" }\n ],\n \"groupby\": [\"Primary Property Type\"]\n }\n ],\n \"mark\": {\n \"type\": \"tick\",\n \"color\": \"red\",\n \"thickness\": 2\n },\n \"encoding\": {\n \"y\": {\n \"field\": \"Primary Property Type\",\n \"type\": \"nominal\"\n },\n \"x\": {\n \"field\": \"median_score\",\n \"type\": \"quantitative\",\n \"scale\": { \"domain\": [0, 100] },\n \"title\": \"Median ENERGY STAR Score\"\n },\n \"tooltip\": [\n {\n \"field\": \"Primary Property Type\",\n \"type\": \"nominal\",\n \"title\": \"Primary Property Type\"\n },\n {\n \"field\": \"median_score\",\n \"type\": \"quantitative\",\n \"title\": \"Median ENERGY STAR Score\",\n \"format\": \".2f\"\n }\n ]\n }\n }\n ]\n }\n\n values = input_data\n if hasattr(input_data, \"to_dict\"):\n values = input_data.to_dict(orient=\"records\")\n\n if isinstance(spec, dict):\n spec[\"data\"] = {\"values\": values}\n\n from IPython.display import display\n display({\"application/vnd.vegalite.v5+json\": spec, \"text/plain\": spec}, raw=True)\n\n return input_data\n\n\n_curio_output = _curio_node()\n\ntry:\n result_5db6d526_79a1_40cf_9605_f2f525e513c0 = _curio_output\nexcept NameError:\n result_5db6d526_79a1_40cf_9605_f2f525e513c0 = None\n", + "metadata": { + "id": "5db6d526-79a1-40cf-9605-f2f525e513c0", + "language": "python", + "nodeId": "5db6d526-79a1-40cf-9605-f2f525e513c0", + "nodeType": "VIS_VEGA", + "in": "DEFAULT", + "out": "DEFAULT" + } + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file From 9179450a65a3b2bb19db3344bf8bd35c8626823c Mon Sep 17 00:00:00 2001 From: Vamsi Dath Date: Mon, 27 Apr 2026 22:31:11 -0500 Subject: [PATCH 08/13] Modified UTK generation logic and bidirectional edge support --- .../urban-workflows/src/NotebookConvertor.ts | 408 ++++++++++++++++-- 1 file changed, 367 insertions(+), 41 deletions(-) diff --git a/utk_curio/frontend/urban-workflows/src/NotebookConvertor.ts b/utk_curio/frontend/urban-workflows/src/NotebookConvertor.ts index 78ea2459..3ca96662 100644 --- a/utk_curio/frontend/urban-workflows/src/NotebookConvertor.ts +++ b/utk_curio/frontend/urban-workflows/src/NotebookConvertor.ts @@ -36,6 +36,7 @@ interface TrillEdge { target: string; sourceHandle?: string; targetHandle?: string; + type?: string; } interface TrillDataflow { @@ -78,6 +79,25 @@ interface TrillMeta { out?: string; } +interface NotebookTrillConnection { + id?: string; + source: string; + target: string; + sourceHandle?: string; + targetHandle?: string; + bidirectional?: boolean; + type?: string; +} + +interface NotebookTrillMetadata { + id?: string; + type?: string; + in?: string; + out?: string; + inputs?: NotebookTrillConnection[]; + outputs?: NotebookTrillConnection[]; +} + export class TrillNotebookConverter { private executionGraph: Record = {}; @@ -128,6 +148,8 @@ export class TrillNotebookConverter { const edges: TrillEdge[] = []; const nodeInputs: Record = {}; const producedByVar: Record = {}; + const explicitEdges = new Map(); + let sawExplicitConnections = false; let previousNodeId: string | null = null; const position = { x: 100, y: 100 }; @@ -146,17 +168,18 @@ export class TrillNotebookConverter { const source = cell.source; const code = Array.isArray(source) ? source.join("") : String(source ?? ""); + const notebookMeta = this.extractNotebookTrillMetadata(cell); const trillMeta = this.extractTrillVariable(code); const nodeId = - trillMeta?.id ?? `notebook_cell_${uuid()}`; + notebookMeta?.id ?? trillMeta?.id ?? `notebook_cell_${uuid()}`; const inferredNodeType = this.inferNodeType(code); const nodeType = - trillMeta?.type ?? inferredNodeType; + notebookMeta?.type ?? trillMeta?.type ?? inferredNodeType; const nodeIn = - trillMeta?.in ?? "DEFAULT"; + notebookMeta?.in ?? trillMeta?.in ?? "DEFAULT"; const nodeOut = - trillMeta?.out ?? "DEFAULT"; + notebookMeta?.out ?? trillMeta?.out ?? "DEFAULT"; const codeWithoutMeta = this.removeTrillVariable(code); const inputVars = this.extractInputVariables(codeWithoutMeta); @@ -188,37 +211,81 @@ export class TrillNotebookConverter { position.y += 150; }); - const targetInputCount: Record = {}; - const edgeKeys = new Set(); + for (const rawCell of rawCells) { + const cell = rawCell as Record; + if (cell.cell_type !== "code") { + continue; + } - for (const node of nodes) { - const inputs = nodeInputs[node.id] ?? []; - for (const inputVar of inputs) { - const sourceNodeId = producedByVar[inputVar]; - if (!sourceNodeId || sourceNodeId === node.id) { - continue; - } + const notebookMeta = this.extractNotebookTrillMetadata(cell); + if (!notebookMeta) { + continue; + } - const edgeKey = `${sourceNodeId}->${node.id}::${inputVar}`; - if (edgeKeys.has(edgeKey)) { + const nodeId = notebookMeta.id; + if (!nodeId) { + continue; + } + + const serializedConnections = [ + ...(notebookMeta.outputs ?? []), + ...(notebookMeta.inputs ?? []), + ]; + + if (serializedConnections.length > 0) { + sawExplicitConnections = true; + } + + for (const connection of serializedConnections) { + const edge = this.normalizeNotebookConnection(connection); + if (!edge) { continue; } - edgeKeys.add(edgeKey); - let targetHandle = "in"; - if (node.type === NodeType.MERGE_FLOW) { - const count = targetInputCount[node.id] ?? 0; - targetHandle = `in_${count}`; - targetInputCount[node.id] = count + 1; + const key = this.edgeKey(edge); + if (!explicitEdges.has(key)) { + explicitEdges.set(key, edge); } + } + } - edges.push({ - id: `edge_${uuid()}`, - source: sourceNodeId, - sourceHandle: "out", - target: node.id, - targetHandle, - }); + if (sawExplicitConnections) { + edges.push(...explicitEdges.values()); + } + + const targetInputCount: Record = {}; + if (!sawExplicitConnections) { + const edgeKeys = new Set(); + + for (const node of nodes) { + const inputs = nodeInputs[node.id] ?? []; + for (const inputVar of inputs) { + const sourceNodeId = producedByVar[inputVar]; + if (!sourceNodeId || sourceNodeId === node.id) { + continue; + } + + const edgeKey = `${sourceNodeId}->${node.id}::${inputVar}`; + if (edgeKeys.has(edgeKey)) { + continue; + } + edgeKeys.add(edgeKey); + + let targetHandle = "in"; + if (node.type === NodeType.MERGE_FLOW) { + const count = targetInputCount[node.id] ?? 0; + targetHandle = `in_${count}`; + targetInputCount[node.id] = count + 1; + } + + edges.push({ + id: `edge_${uuid()}`, + source: sourceNodeId, + sourceHandle: "out", + target: node.id, + targetHandle, + }); + } } } @@ -273,12 +340,15 @@ export class TrillNotebookConverter { const sourceInfo = this.executionGraph[source]; const targetInfo = this.executionGraph[target]; + const bidirectional = this.isBidirectionalEdge(edge); - targetInfo.dependencies.add(source); - sourceInfo.dependents.add(target); + if (!bidirectional) { + targetInfo.dependencies.add(source); + sourceInfo.dependents.add(target); + } - const targetHandle = edge.targetHandle ?? "in"; - const sourceHandle = edge.sourceHandle ?? "out"; + const targetHandle = edge.targetHandle ?? (bidirectional ? "in/out" : "in"); + const sourceHandle = edge.sourceHandle ?? (bidirectional ? "in/out" : "out"); if (!targetInfo.inputs[targetHandle]) { targetInfo.inputs[targetHandle] = []; @@ -287,7 +357,7 @@ export class TrillNotebookConverter { targetInfo.inputs[targetHandle].push({ source, sourceHandle, - bidirectional: false, + bidirectional, }); if (!sourceInfo.outputs[sourceHandle]) { @@ -297,7 +367,7 @@ export class TrillNotebookConverter { sourceInfo.outputs[sourceHandle].push({ target, targetHandle, - bidirectional: false, + bidirectional, }); } } @@ -362,6 +432,7 @@ export class TrillNotebookConverter { } const nodeMeta = `__trill_node__ = {\n "id": "${nodeId}",\n "type": "${nodeType}",\n "in": "${node.in ?? "DEFAULT"}",\n "out": "${node.out ?? "DEFAULT"}"\n}\n\n`; + const notebookMeta = this.buildNotebookTrillMetadata(nodeId, nodeType, node); return { cell_type: "code", @@ -373,6 +444,7 @@ export class TrillNotebookConverter { nodeType, in: node.in ?? "DEFAULT", out: node.out ?? "DEFAULT", + trill: notebookMeta, }, }; } @@ -472,21 +544,125 @@ export class TrillNotebookConverter { } private generateUtkVisualizationCode(node: TrillNode, nodeInfo: GraphNodeInfo): string { - const code = this.ensureUtkImport(node.content ?? ""); const inputs = this.getInputVariables(nodeInfo); const outputs = this.getOutputVariables(node.id); + const containerId = `utk-container-${node.id.substring(0, 8)}`; + // Build input data handling const inputLines = inputs.map((value, index) => `input_${index} = ${value}`).join("\n"); - - let argBlock = ""; + let dataVar = "None"; if (inputs.length === 1) { - argBlock = "arg = input_0\n"; + dataVar = "input_0"; } else if (inputs.length > 1) { - argBlock = `arg = [${inputs.map((_, index) => `input_${index}`).join(", ")}]\n`; + dataVar = `[${inputs.map((_, index) => `input_${index}`).join(", ")}]`; } - const body = `${inputLines}\n${argBlock}\n${code}\n`; - return this.wrapNodeExecution(body, outputs); + // Generate the enhanced UTK notebook code + const utkCode = this.generateUtkNotebookCode(node, containerId, dataVar); + const body = `${inputLines}\n\n${utkCode}`; + return this.wrapNodeExecution(body, outputs, containerId); + } + + private generateUtkNotebookCode(node: TrillNode, containerId: string, dataVar: string): string { + // Set up UTK with serverless mode and notebook environment + const utkSetup = ` +# Configure UTK for serverless/notebook environment +import utk +import json +from IPython.display import HTML, Javascript, display + +utk.Environment.serverless = True + +# Create grammar structure +grammar = { + "components": [{ + "id": "notebook_map", + "json": { + "camera": { + "wEye": [0, 0, 1000], + "wLookAt": [0, 0, 0], + "wUp": [0, 1, 0] + }, + "grid": {"width": 12, "height": 4}, + "knots": [], + "map_style": [], + "widgets": [{ + "type": "TOGGLE_KNOT" + }] + }, + "position": {"x": 0, "y": 0, "width": 12, "height": 4} + }], + "grid": {"width": 12, "height": 4}, + "knots": [] +} + +# If content has grammar, parse and merge it +grammar_content = """${node.content ?? "{}"}""".strip() +if grammar_content and grammar_content != "{}": + try: + parsed_grammar = json.loads(grammar_content) + # Merge parsed grammar with our structure + if "components" in parsed_grammar: + grammar["components"][0]["json"].update(parsed_grammar.get("json", {})) + if "knots" in parsed_grammar: + grammar["knots"] = parsed_grammar["knots"] + except json.JSONDecodeError: + pass + +# Load geospatial data if available +geospatial_data = None +if ${dataVar} is not None: + data_input = ${dataVar} + # Handle multi-input case + if isinstance(data_input, list): + data_input = data_input[0] if data_input else None + + if data_input is not None: + # Check if it's a geodataframe + try: + import geopandas as gpd + if isinstance(data_input, gpd.GeoDataFrame): + # Convert to GeoJSON + geojson_data = json.loads(data_input.to_json()) + geospatial_data = utk.physical_from_geojson(geojson_data) + + # Add layers to grammar + if geospatial_data and "components" in grammar: + if "layers" not in grammar["components"][0]["json"]: + grammar["components"][0]["json"]["layers"] = [] + # Add layer for the geospatial data + grammar["components"][0]["json"]["layers"].append({ + "type": "geospatial", + "data": geospatial_data.to_dict() if hasattr(geospatial_data, 'to_dict') else geospatial_data + }) + except Exception as e: + pass + +# Create HTML container +html_container = f'
' +display(HTML(html_container)) + +# Initialize UTK in browser +js_initialization = f""" +require(['utk'], function(utk) {{ + utk.Environment.serverless = true; + const container = document.getElementById('${containerId}'); + const grammar = {json.dumps(grammar)}; + + try {{ + const interpreter = new utk.GrammarInterpreter('notebook', grammar, container); + // Store reference for potential interactions + window._utk_interpreter_${node.id.substring(0, 8)} = interpreter; + }} catch(e) {{ + console.error('UTK initialization error:', e); + container.innerHTML = '
Error initializing UTK visualization
'; + }} +}}); +""" +display(Javascript(js_initialization)) +`; + + return utkSetup; } private inferNodeType(code: string): NodeType { @@ -708,6 +884,156 @@ export class TrillNotebookConverter { } } + private extractNotebookTrillMetadata(cell: Record): NotebookTrillMetadata | null { + const metadata = cell.metadata as Record | undefined; + if (!metadata) { + return null; + } + + const trill = metadata.trill as Record | undefined; + const source = trill ?? metadata; + + const parseConnections = (value: unknown): NotebookTrillConnection[] => { + if (!Array.isArray(value)) { + return []; + } + + return value + .map((entry) => entry as Record) + .filter((entry): entry is Record => !!entry) + .map((entry) => ({ + id: typeof entry.id === "string" ? entry.id : undefined, + source: typeof entry.source === "string" ? entry.source : "", + target: typeof entry.target === "string" ? entry.target : "", + sourceHandle: typeof entry.sourceHandle === "string" ? entry.sourceHandle : undefined, + targetHandle: typeof entry.targetHandle === "string" ? entry.targetHandle : undefined, + bidirectional: typeof entry.bidirectional === "boolean" ? entry.bidirectional : undefined, + type: typeof entry.type === "string" ? entry.type : undefined, + })) + .filter((entry) => !!entry.source && !!entry.target); + }; + + const nodeId = + typeof source.nodeId === "string" + ? source.nodeId + : typeof source.id === "string" + ? source.id + : undefined; + + const nodeType = + typeof source.nodeType === "string" + ? source.nodeType + : typeof source.type === "string" + ? source.type + : undefined; + + const nodeIn = + typeof source.in === "string" + ? source.in + : undefined; + + const nodeOut = + typeof source.out === "string" + ? source.out + : undefined; + + const inputs = parseConnections(source.inputs); + const outputs = parseConnections(source.outputs); + + if (!nodeId && !nodeType && inputs.length === 0 && outputs.length === 0) { + return null; + } + + return { + id: nodeId, + type: nodeType, + in: nodeIn, + out: nodeOut, + inputs, + outputs, + }; + } + + private buildNotebookTrillMetadata(nodeId: string, nodeType: string, node: TrillNode): NotebookTrillMetadata { + const nodeInfo = this.executionGraph[nodeId]; + const inputs: NotebookTrillConnection[] = []; + const outputs: NotebookTrillConnection[] = []; + + for (const [targetHandle, connections] of Object.entries(nodeInfo.inputs)) { + for (const connection of connections) { + inputs.push({ + source: connection.source, + target: nodeId, + sourceHandle: connection.sourceHandle, + targetHandle, + bidirectional: connection.bidirectional, + type: connection.bidirectional ? "Interaction" : undefined, + }); + } + } + + for (const [sourceHandle, connections] of Object.entries(nodeInfo.outputs)) { + for (const connection of connections) { + outputs.push({ + source: nodeId, + target: connection.target, + sourceHandle, + targetHandle: connection.targetHandle, + bidirectional: connection.bidirectional, + type: connection.bidirectional ? "Interaction" : undefined, + }); + } + } + + return { + id: nodeId, + type: nodeType, + in: node.in ?? "DEFAULT", + out: node.out ?? "DEFAULT", + inputs, + outputs, + }; + } + + private normalizeNotebookConnection(connection: NotebookTrillConnection): TrillEdge | null { + if (!connection.source || !connection.target) { + return null; + } + + const bidirectional = + connection.bidirectional === true || + connection.type === "Interaction" || + connection.sourceHandle === "in/out" || + connection.targetHandle === "in/out"; + + return { + id: connection.id ?? `edge_${uuid()}`, + source: connection.source, + target: connection.target, + sourceHandle: connection.sourceHandle ?? (bidirectional ? "in/out" : "out"), + targetHandle: connection.targetHandle ?? (bidirectional ? "in/out" : "in"), + type: bidirectional ? "Interaction" : connection.type, + }; + } + + private edgeKey(edge: TrillEdge): string { + return [ + edge.source, + edge.target, + edge.sourceHandle ?? "", + edge.targetHandle ?? "", + edge.type ?? "", + ].join("::"); + } + + private isBidirectionalEdge(edge: TrillEdge): boolean { + return ( + edge.type === "Interaction" || + edge.sourceHandle === "in/out" || + edge.targetHandle === "in/out" + ); + } + private removeTrillVariable(code: string): string { const pattern = /__trill_node__\s*=\s*\{[\s\S]*?\}\n?/; return code.replace(pattern, ""); From f67dca5e45ede148d829f8fadffffc7a4bf86f53 Mon Sep 17 00:00:00 2001 From: Vamsi Dath Date: Mon, 27 Apr 2026 23:34:13 -0500 Subject: [PATCH 09/13] curio/ipynb code refactoring and warning feature --- .../urban-workflows/src/NotebookConvertor.ts | 1097 ++++------------- .../src/components/menus/top/UpMenu.tsx | 23 +- .../src/notebook-convertor/codegen.ts | 181 +++ .../grammarDetection.ts | 0 .../src/notebook-convertor/graph.ts | 271 ++++ .../src/notebook-convertor/importEdges.ts | 116 ++ .../src/notebook-convertor/metadata.ts | 117 ++ .../src/notebook-convertor/parsing.ts | 285 +++++ .../src/notebook-convertor/types.ts | 92 ++ 9 files changed, 1300 insertions(+), 882 deletions(-) create mode 100644 utk_curio/frontend/urban-workflows/src/notebook-convertor/codegen.ts rename utk_curio/frontend/urban-workflows/src/{adapters => notebook-convertor}/grammarDetection.ts (100%) create mode 100644 utk_curio/frontend/urban-workflows/src/notebook-convertor/graph.ts create mode 100644 utk_curio/frontend/urban-workflows/src/notebook-convertor/importEdges.ts create mode 100644 utk_curio/frontend/urban-workflows/src/notebook-convertor/metadata.ts create mode 100644 utk_curio/frontend/urban-workflows/src/notebook-convertor/parsing.ts create mode 100644 utk_curio/frontend/urban-workflows/src/notebook-convertor/types.ts diff --git a/utk_curio/frontend/urban-workflows/src/NotebookConvertor.ts b/utk_curio/frontend/urban-workflows/src/NotebookConvertor.ts index 3ca96662..f289c673 100644 --- a/utk_curio/frontend/urban-workflows/src/NotebookConvertor.ts +++ b/utk_curio/frontend/urban-workflows/src/NotebookConvertor.ts @@ -1,102 +1,57 @@ import { v4 as uuid } from "uuid"; -import { isUtkSpec, isVegaLiteSpec, tryParseJsonObject } from "./adapters/grammarDetection"; +import { isUtkSpec, isVegaLiteSpec, tryParseJsonObject } from "./notebook-convertor/grammarDetection"; import { NodeType } from "./constants"; - -type JsonValue = string | number | boolean | null | JsonObject | JsonValue[]; -interface JsonObject { - [key: string]: JsonValue; -} - -export interface NotebookCell { - cell_type: "code" | "markdown" | string; - source: string; - metadata?: Record; -} - -export interface Notebook { - cells: NotebookCell[]; - metadata: Record; - nbformat: number; - nbformat_minor: number; -} - -interface TrillNode { - id: string; - type: string; - x: number; - y: number; - content?: string; - in?: string; - out?: string; -} - -interface TrillEdge { - id: string; - source: string; - target: string; - sourceHandle?: string; - targetHandle?: string; - type?: string; -} - -interface TrillDataflow { - nodes: TrillNode[]; - edges: TrillEdge[]; - name: string; - task: string; - timestamp: number; - provenance_id: string; -} - -export interface TrillSpec { - dataflow: TrillDataflow; -} - -interface InputConnection { - source: string; - sourceHandle: string; - bidirectional: boolean; -} - -interface OutputConnection { - target: string; - targetHandle: string; - bidirectional: boolean; -} - -interface GraphNodeInfo { - node: TrillNode; - dependencies: Set; - dependents: Set; - inputs: Record; - outputs: Record; -} - -interface TrillMeta { - id?: string; - type?: string; - in?: string; - out?: string; -} - -interface NotebookTrillConnection { - id?: string; - source: string; - target: string; - sourceHandle?: string; - targetHandle?: string; - bidirectional?: boolean; - type?: string; -} - -interface NotebookTrillMetadata { - id?: string; - type?: string; - in?: string; - out?: string; - inputs?: NotebookTrillConnection[]; - outputs?: NotebookTrillConnection[]; -} +import { + GraphNodeInfo, + Notebook, + NotebookCell, + NotebookTrillConnection, + NotebookTrillConnectionsMetadata, + TrillEdge, + TrillMeta, + TrillNode, + TrillSpec, +} from "./notebook-convertor/types"; +import { extractAssignedObjectLiteral, removeAssignedObjectVariables } from "./notebook-convertor/metadata"; +import { buildLinearFallbackEdges, collectExplicitEdgesFromCells, inferEdgesFromVariables } from "./notebook-convertor/importEdges"; +import { + buildComputationBody, + buildDataPoolBody, + buildMergeFlowBody, + buildUtkNotebookCode, + buildUtkVisualizationBody, + buildVegaVisualizationBody, + buildTableVisualizationBody, + buildTextVisualizationBody, + buildImageVisualizationBody, + buildConstantsBody, + buildCommentBody, + getUtkDataVar, +} from "./notebook-convertor/codegen"; +import { + buildExecutionGraph, + buildNotebookTrillConnectionsMetadata, + edgeKey, + getInputVariables, + getOutputVariables, + normalizeNotebookConnection, + sanitizeId, + topologicalSort, +} from "./notebook-convertor/graph"; +import { + deindentText, + extractInputVariables, + extractProducedVariables, + extractVegaLiteSpecCode, + replaceKeywordsOutsideStrings, + stripGeneratedNodePrelude, +} from "./notebook-convertor/parsing"; + +export type { + Notebook, + NotebookCell, + TrillSpec, +} from "./notebook-convertor/types"; export class TrillNotebookConverter { private executionGraph: Record = {}; @@ -105,9 +60,9 @@ export class TrillNotebookConverter { const nodes = trillJson.dataflow?.nodes ?? []; const edges = trillJson.dataflow?.edges ?? []; - this.buildExecutionGraph(nodes, edges); + this.executionGraph = buildExecutionGraph(nodes, edges); - const executionOrder = this.topologicalSort(); + const executionOrder = topologicalSort(this.executionGraph); const cells: NotebookCell[] = []; for (const nodeId of executionOrder) { @@ -139,7 +94,7 @@ export class TrillNotebookConverter { }; } - public notebookToTrill(notebook: Partial | Record): TrillSpec { + public notebookToTrill(notebook: Partial | Record): { trillSpec: TrillSpec; warnings?: string[] } { const rawCells = Array.isArray((notebook as { cells?: unknown[] }).cells) ? ((notebook as { cells: unknown[] }).cells ?? []) : []; @@ -148,13 +103,11 @@ export class TrillNotebookConverter { const edges: TrillEdge[] = []; const nodeInputs: Record = {}; const producedByVar: Record = {}; - const explicitEdges = new Map(); - let sawExplicitConnections = false; - - let previousNodeId: string | null = null; const position = { x: 100, y: 100 }; const importedWorkflowId = uuid(); + let foundTrillMeta = false; + rawCells.forEach((rawCell) => { const cell = rawCell as Record; if (cell.cell_type !== "code") { @@ -168,27 +121,29 @@ export class TrillNotebookConverter { const source = cell.source; const code = Array.isArray(source) ? source.join("") : String(source ?? ""); - const notebookMeta = this.extractNotebookTrillMetadata(cell); const trillMeta = this.extractTrillVariable(code); + if (trillMeta) { + foundTrillMeta = true; + } const nodeId = - notebookMeta?.id ?? trillMeta?.id ?? `notebook_cell_${uuid()}`; + trillMeta?.id ?? `notebook_cell_${uuid()}`; const inferredNodeType = this.inferNodeType(code); const nodeType = - notebookMeta?.type ?? trillMeta?.type ?? inferredNodeType; + trillMeta?.type ?? inferredNodeType; const nodeIn = - notebookMeta?.in ?? trillMeta?.in ?? "DEFAULT"; + trillMeta?.in ?? "DEFAULT"; const nodeOut = - notebookMeta?.out ?? trillMeta?.out ?? "DEFAULT"; + trillMeta?.out ?? "DEFAULT"; const codeWithoutMeta = this.removeTrillVariable(code); - const inputVars = this.extractInputVariables(codeWithoutMeta); - const producedVars = this.extractProducedVariables(codeWithoutMeta); + const inputVars = extractInputVariables(codeWithoutMeta); + const producedVars = extractProducedVariables(codeWithoutMeta); const cleanCodeBody = this.unwrapCurioNodeExecution(codeWithoutMeta); const cleanCode = nodeType === NodeType.VIS_VEGA - ? this.normalizeVegaSpecForCurio(this.extractVegaLiteSpecCode(cleanCodeBody)) + ? this.normalizeVegaSpecForCurio(extractVegaLiteSpecCode(cleanCodeBody)) : cleanCodeBody; const node: TrillNode = { @@ -206,106 +161,44 @@ export class TrillNotebookConverter { for (const producedVar of producedVars) { producedByVar[producedVar] = nodeId; } - - previousNodeId = nodeId; position.y += 150; }); - for (const rawCell of rawCells) { - const cell = rawCell as Record; - if (cell.cell_type !== "code") { - continue; - } - - const notebookMeta = this.extractNotebookTrillMetadata(cell); - if (!notebookMeta) { - continue; - } - - const nodeId = notebookMeta.id; - if (!nodeId) { - continue; - } - - const serializedConnections = [ - ...(notebookMeta.outputs ?? []), - ...(notebookMeta.inputs ?? []), - ]; - - if (serializedConnections.length > 0) { - sawExplicitConnections = true; - } - - for (const connection of serializedConnections) { - const edge = this.normalizeNotebookConnection(connection); - if (!edge) { - continue; - } - - const key = this.edgeKey(edge); - if (!explicitEdges.has(key)) { - explicitEdges.set(key, edge); - } - } - } + const { sawExplicitConnections, explicitEdges } = collectExplicitEdgesFromCells( + rawCells, + (code) => this.extractTrillConnectionsVariable(code), + (connection) => normalizeNotebookConnection(connection), + (edge) => edgeKey(edge), + ); if (sawExplicitConnections) { - edges.push(...explicitEdges.values()); + edges.push(...explicitEdges); } - const targetInputCount: Record = {}; if (!sawExplicitConnections) { - const edgeKeys = new Set(); - - for (const node of nodes) { - const inputs = nodeInputs[node.id] ?? []; - for (const inputVar of inputs) { - const sourceNodeId = producedByVar[inputVar]; - if (!sourceNodeId || sourceNodeId === node.id) { - continue; - } - - const edgeKey = `${sourceNodeId}->${node.id}::${inputVar}`; - if (edgeKeys.has(edgeKey)) { - continue; - } - edgeKeys.add(edgeKey); - - let targetHandle = "in"; - if (node.type === NodeType.MERGE_FLOW) { - const count = targetInputCount[node.id] ?? 0; - targetHandle = `in_${count}`; - targetInputCount[node.id] = count + 1; - } - - edges.push({ - id: `edge_${uuid()}`, - source: sourceNodeId, - sourceHandle: "out", - target: node.id, - targetHandle, - }); - } - } + edges.push( + ...inferEdgesFromVariables( + nodes, + nodeInputs, + producedByVar, + (node) => node.type === NodeType.MERGE_FLOW, + () => `edge_${uuid()}`, + ), + ); } if (edges.length === 0) { - let linearPreviousId: string | null = null; - for (const node of nodes) { - if (linearPreviousId) { - edges.push({ - id: `edge_${uuid()}`, - source: linearPreviousId, - sourceHandle: "out", - target: node.id, - targetHandle: "in", - }); - } - linearPreviousId = node.id; - } + edges.push(...buildLinearFallbackEdges(nodes, () => `edge_${uuid()}`)); } - return { + const warnings: string[] = []; + if (!foundTrillMeta && !sawExplicitConnections) { + warnings.push( + "No Trill metadata detected in notebook cells; import used inference which may be lossy. See docs/IPYNB-USAGE.md for a recommended cell template.", + ); + } + + const spec: TrillSpec = { dataflow: { nodes, edges, @@ -315,94 +208,11 @@ export class TrillNotebookConverter { provenance_id: importedWorkflowId, }, }; - } - - private buildExecutionGraph(nodes: TrillNode[], edges: TrillEdge[]): void { - this.executionGraph = {}; - - for (const node of nodes) { - this.executionGraph[node.id] = { - node, - dependencies: new Set(), - dependents: new Set(), - inputs: {}, - outputs: {}, - }; - } - - for (const edge of edges) { - const source = edge.source; - const target = edge.target; - - if (!this.executionGraph[source] || !this.executionGraph[target]) { - continue; - } - - const sourceInfo = this.executionGraph[source]; - const targetInfo = this.executionGraph[target]; - const bidirectional = this.isBidirectionalEdge(edge); - - if (!bidirectional) { - targetInfo.dependencies.add(source); - sourceInfo.dependents.add(target); - } - - const targetHandle = edge.targetHandle ?? (bidirectional ? "in/out" : "in"); - const sourceHandle = edge.sourceHandle ?? (bidirectional ? "in/out" : "out"); - - if (!targetInfo.inputs[targetHandle]) { - targetInfo.inputs[targetHandle] = []; - } - - targetInfo.inputs[targetHandle].push({ - source, - sourceHandle, - bidirectional, - }); - - if (!sourceInfo.outputs[sourceHandle]) { - sourceInfo.outputs[sourceHandle] = []; - } - sourceInfo.outputs[sourceHandle].push({ - target, - targetHandle, - bidirectional, - }); - } + return { trillSpec: spec, warnings: warnings.length > 0 ? warnings : undefined }; } - private topologicalSort(): string[] { - const visited = new Set(); - const visiting = new Set(); - const result: string[] = []; - - const visit = (nodeId: string): void => { - if (visiting.has(nodeId)) { - throw new Error("Circular dependency detected"); - } - - if (visited.has(nodeId)) { - return; - } - - visiting.add(nodeId); - - for (const dep of this.executionGraph[nodeId].dependencies) { - visit(dep); - } - visiting.delete(nodeId); - visited.add(nodeId); - result.push(nodeId); - }; - - for (const nodeId of Object.keys(this.executionGraph)) { - visit(nodeId); - } - - return result; - } private generateCellForNode(node: TrillNode): NotebookCell | null { const nodeType = node.type; @@ -417,34 +227,48 @@ export class TrillNotebookConverter { if (nodeType === NodeType.DATA_LOADING) { code = this.generateDataLoadingCode(node); + } else if (nodeType === NodeType.DATA_EXPORT) { + code = this.generateDataExportCode(node, nodeInfo); } else if (nodeType === NodeType.MERGE_FLOW) { code = this.generateMergeFlowCode(node, nodeInfo); } else if (nodeType === NodeType.DATA_POOL) { code = this.generateDataPoolCode(node, nodeInfo); } else if (nodeType === NodeType.DATA_SUMMARY) { code = this.generateDataSummaryCode(node, nodeInfo); + } else if (nodeType === NodeType.DATA_CLEANING) { + code = this.generateDataCleaningCode(node, nodeInfo); + } else if (nodeType === NodeType.DATA_TRANSFORMATION) { + code = this.generateDataTransformationCode(node, nodeInfo); + } else if (nodeType === NodeType.FLOW_SWITCH) { + code = this.generateFlowSwitchCode(node, nodeInfo); } else if (nodeType === NodeType.VIS_VEGA) { code = this.generateVegaVisualizationCode(node, nodeInfo); } else if (nodeType === NodeType.VIS_UTK) { code = this.generateUtkVisualizationCode(node, nodeInfo); + } else if (nodeType === NodeType.VIS_TABLE) { + code = this.generateVisTableCode(node, nodeInfo); + } else if (nodeType === NodeType.VIS_TEXT) { + code = this.generateVisTextCode(node, nodeInfo); + } else if (nodeType === NodeType.VIS_IMAGE) { + code = this.generateVisImageCode(node, nodeInfo); + } else if (nodeType === NodeType.CONSTANTS) { + code = this.generateConstantsCode(node, nodeInfo); + } else if (nodeType === NodeType.COMMENTS) { + code = this.generateCommentsCode(node, nodeInfo); } else { code = this.generateComputationCode(node, nodeInfo); } const nodeMeta = `__trill_node__ = {\n "id": "${nodeId}",\n "type": "${nodeType}",\n "in": "${node.in ?? "DEFAULT"}",\n "out": "${node.out ?? "DEFAULT"}"\n}\n\n`; - const notebookMeta = this.buildNotebookTrillMetadata(nodeId, nodeType, node); + const notebookConnectionsMeta = buildNotebookTrillConnectionsMetadata(nodeId, this.executionGraph); + const connectionsMeta = `__trill_connections__ = ${JSON.stringify(notebookConnectionsMeta, null, 2)}\n\n`; return { cell_type: "code", - source: nodeMeta + code, + source: nodeMeta + connectionsMeta + code, metadata: { id: nodeId, language: "python", - nodeId, - nodeType, - in: node.in ?? "DEFAULT", - out: node.out ?? "DEFAULT", - trill: notebookMeta, }, }; } @@ -471,198 +295,129 @@ export class TrillNotebookConverter { private generateDataLoadingCode(node: TrillNode): string { const code = node.content ?? ""; - const outputs = this.getOutputVariables(node.id); + const outputs = getOutputVariables(node.id, node.type, this.executionGraph); return this.wrapNodeExecution(code, outputs); } - private generateComputationCode(node: TrillNode, nodeInfo: GraphNodeInfo): string { - const code = node.content ?? ""; - const inputs = this.getInputVariables(nodeInfo); - const outputs = this.getOutputVariables(node.id); + private generateDataExportCode(node: TrillNode, nodeInfo: GraphNodeInfo): string { + const inputs = getInputVariables(nodeInfo, this.executionGraph); + const outputs = getOutputVariables(node.id, node.type, this.executionGraph); + const body = buildComputationBody(inputs, node.content ?? ""); + return this.wrapNodeExecution(body, outputs); + } - const inputLines = inputs.map((value, index) => `input_${index} = ${value}`).join("\n"); + private generateDataCleaningCode(node: TrillNode, nodeInfo: GraphNodeInfo): string { + const inputs = getInputVariables(nodeInfo, this.executionGraph); + const outputs = getOutputVariables(node.id, node.type, this.executionGraph); + const body = buildComputationBody(inputs, node.content ?? ""); + return this.wrapNodeExecution(body, outputs); + } - let argBlock = ""; - if (inputs.length === 1) { - argBlock = "arg = input_0\n"; - } else if (inputs.length > 1) { - argBlock = `arg = [${inputs.map((_, index) => `input_${index}`).join(", ")}]\n`; - } + private generateDataTransformationCode(node: TrillNode, nodeInfo: GraphNodeInfo): string { + const inputs = getInputVariables(nodeInfo, this.executionGraph); + const outputs = getOutputVariables(node.id, node.type, this.executionGraph); + const body = buildComputationBody(inputs, node.content ?? ""); + return this.wrapNodeExecution(body, outputs); + } - const body = `${inputLines}\n${argBlock}\n${code}\n`; + private generateFlowSwitchCode(node: TrillNode, nodeInfo: GraphNodeInfo): string { + const inputs = getInputVariables(nodeInfo, this.executionGraph); + const outputs = getOutputVariables(node.id, node.type, this.executionGraph); + const body = buildComputationBody(inputs, node.content ?? ""); return this.wrapNodeExecution(body, outputs); } - private generateMergeFlowCode(node: TrillNode, nodeInfo: GraphNodeInfo): string { - const inputs = this.getInputVariables(nodeInfo); - const outputs = this.getOutputVariables(node.id); + private generateComputationCode(node: TrillNode, nodeInfo: GraphNodeInfo): string { + const code = node.content ?? ""; + const inputs = getInputVariables(nodeInfo, this.executionGraph); + const outputs = getOutputVariables(node.id, node.type, this.executionGraph); - const joinedInputs = inputs.join(",\n"); - const indentedInputs = this.indent(joinedInputs, 4); + const body = buildComputationBody(inputs, code); + return this.wrapNodeExecution(body, outputs); + } - const body = `\ninputs = [\n${indentedInputs}\n]\n\nmerged_inputs = [i for i in inputs if i is not None]\n\nreturn merged_inputs\n`; + private generateMergeFlowCode(node: TrillNode, nodeInfo: GraphNodeInfo): string { + const inputs = getInputVariables(nodeInfo, this.executionGraph); + const outputs = getOutputVariables(node.id, node.type, this.executionGraph); + + const body = buildMergeFlowBody(inputs); return this.wrapNodeExecution(body, outputs); } private generateDataPoolCode(node: TrillNode, nodeInfo: GraphNodeInfo): string { - const inputs = this.getInputVariables(nodeInfo); - const outputs = this.getOutputVariables(node.id); + const inputs = getInputVariables(nodeInfo, this.executionGraph); + const outputs = getOutputVariables(node.id, node.type, this.executionGraph); const source = inputs.length > 0 ? inputs[0] : "None"; - const body = `\nreturn ${source}\n`; + const body = buildDataPoolBody(source); return this.wrapNodeExecution(body, outputs); } private generateDataSummaryCode(node: TrillNode, nodeInfo: GraphNodeInfo): string { const code = node.content ?? ""; - const inputs = this.getInputVariables(nodeInfo); - const outputs = this.getOutputVariables(node.id); - - const inputLines = inputs.map((value, index) => `input_${index} = ${value}`).join("\n"); + const inputs = getInputVariables(nodeInfo, this.executionGraph); + const outputs = getOutputVariables(node.id, node.type, this.executionGraph); - let argBlock = ""; - if (inputs.length === 1) { - argBlock = "arg = input_0\n"; - } else if (inputs.length > 1) { - argBlock = `arg = [${inputs.map((_, index) => `input_${index}`).join(", ")}]\n`; - } - - const body = `${inputLines}\n${argBlock}\n${code}\n`; + const body = buildComputationBody(inputs, code); const primaryOutput = outputs[0] ?? "_curio_output"; return this.wrapNodeExecution(body, outputs, primaryOutput); } private generateVegaVisualizationCode(node: TrillNode, nodeInfo: GraphNodeInfo): string { const code = this.normalizeVegaSpecForNotebook(node.content ?? ""); - const inputs = this.getInputVariables(nodeInfo); + const inputs = getInputVariables(nodeInfo, this.executionGraph); const inputVar = inputs.length > 0 ? inputs[0] : "None"; - const outputs = this.getOutputVariables(node.id); + const outputs = getOutputVariables(node.id, node.type, this.executionGraph); - const body = `\ninput_data = ${inputVar}\n\nspec = ${code.trim()}\n\nvalues = input_data\nif hasattr(input_data, "to_dict"):\n values = input_data.to_dict(orient="records")\n\nif isinstance(spec, dict):\n spec["data"] = {"values": values}\n\nfrom IPython.display import display\ndisplay({"application/vnd.vegalite.v5+json": spec, "text/plain": spec}, raw=True)\n\nreturn input_data\n`; + const body = buildVegaVisualizationBody(inputVar, code); return this.wrapNodeExecution(body, outputs); } private generateUtkVisualizationCode(node: TrillNode, nodeInfo: GraphNodeInfo): string { - const inputs = this.getInputVariables(nodeInfo); - const outputs = this.getOutputVariables(node.id); + const inputs = getInputVariables(nodeInfo, this.executionGraph); + const outputs = getOutputVariables(node.id, node.type, this.executionGraph); const containerId = `utk-container-${node.id.substring(0, 8)}`; - // Build input data handling - const inputLines = inputs.map((value, index) => `input_${index} = ${value}`).join("\n"); - let dataVar = "None"; - if (inputs.length === 1) { - dataVar = "input_0"; - } else if (inputs.length > 1) { - dataVar = `[${inputs.map((_, index) => `input_${index}`).join(", ")}]`; - } + const dataVar = getUtkDataVar(inputs); - // Generate the enhanced UTK notebook code - const utkCode = this.generateUtkNotebookCode(node, containerId, dataVar); - const body = `${inputLines}\n\n${utkCode}`; + const utkCode = buildUtkNotebookCode(node.content ?? "{}", containerId, dataVar, node.id); + const body = buildUtkVisualizationBody(inputs, utkCode); return this.wrapNodeExecution(body, outputs, containerId); } - private generateUtkNotebookCode(node: TrillNode, containerId: string, dataVar: string): string { - // Set up UTK with serverless mode and notebook environment - const utkSetup = ` -# Configure UTK for serverless/notebook environment -import utk -import json -from IPython.display import HTML, Javascript, display - -utk.Environment.serverless = True - -# Create grammar structure -grammar = { - "components": [{ - "id": "notebook_map", - "json": { - "camera": { - "wEye": [0, 0, 1000], - "wLookAt": [0, 0, 0], - "wUp": [0, 1, 0] - }, - "grid": {"width": 12, "height": 4}, - "knots": [], - "map_style": [], - "widgets": [{ - "type": "TOGGLE_KNOT" - }] - }, - "position": {"x": 0, "y": 0, "width": 12, "height": 4} - }], - "grid": {"width": 12, "height": 4}, - "knots": [] -} + private generateVisTableCode(node: TrillNode, nodeInfo: GraphNodeInfo): string { + const inputs = getInputVariables(nodeInfo, this.executionGraph); + const outputs = getOutputVariables(node.id, node.type, this.executionGraph); + const inputVar = inputs.length > 0 ? inputs[0] : "None"; + const body = buildTableVisualizationBody(inputVar); + return this.wrapNodeExecution(body, outputs); + } + + private generateVisTextCode(node: TrillNode, nodeInfo: GraphNodeInfo): string { + const inputs = getInputVariables(nodeInfo, this.executionGraph); + const outputs = getOutputVariables(node.id, node.type, this.executionGraph); + const inputVar = inputs.length > 0 ? inputs[0] : "None"; + const body = buildTextVisualizationBody(inputVar); + return this.wrapNodeExecution(body, outputs); + } + + private generateVisImageCode(node: TrillNode, nodeInfo: GraphNodeInfo): string { + const inputs = getInputVariables(nodeInfo, this.executionGraph); + const outputs = getOutputVariables(node.id, node.type, this.executionGraph); + const inputVar = inputs.length > 0 ? inputs[0] : "None"; + const body = buildImageVisualizationBody(inputVar); + return this.wrapNodeExecution(body, outputs); + } + + private generateConstantsCode(node: TrillNode, nodeInfo: GraphNodeInfo): string { + // Constants should be emitted as top-level definitions so they persist in the notebook namespace. + return buildConstantsBody(node.content ?? ""); + } -# If content has grammar, parse and merge it -grammar_content = """${node.content ?? "{}"}""".strip() -if grammar_content and grammar_content != "{}": - try: - parsed_grammar = json.loads(grammar_content) - # Merge parsed grammar with our structure - if "components" in parsed_grammar: - grammar["components"][0]["json"].update(parsed_grammar.get("json", {})) - if "knots" in parsed_grammar: - grammar["knots"] = parsed_grammar["knots"] - except json.JSONDecodeError: - pass - -# Load geospatial data if available -geospatial_data = None -if ${dataVar} is not None: - data_input = ${dataVar} - # Handle multi-input case - if isinstance(data_input, list): - data_input = data_input[0] if data_input else None - - if data_input is not None: - # Check if it's a geodataframe - try: - import geopandas as gpd - if isinstance(data_input, gpd.GeoDataFrame): - # Convert to GeoJSON - geojson_data = json.loads(data_input.to_json()) - geospatial_data = utk.physical_from_geojson(geojson_data) - - # Add layers to grammar - if geospatial_data and "components" in grammar: - if "layers" not in grammar["components"][0]["json"]: - grammar["components"][0]["json"]["layers"] = [] - # Add layer for the geospatial data - grammar["components"][0]["json"]["layers"].append({ - "type": "geospatial", - "data": geospatial_data.to_dict() if hasattr(geospatial_data, 'to_dict') else geospatial_data - }) - except Exception as e: - pass - -# Create HTML container -html_container = f'
' -display(HTML(html_container)) - -# Initialize UTK in browser -js_initialization = f""" -require(['utk'], function(utk) {{ - utk.Environment.serverless = true; - const container = document.getElementById('${containerId}'); - const grammar = {json.dumps(grammar)}; - - try {{ - const interpreter = new utk.GrammarInterpreter('notebook', grammar, container); - // Store reference for potential interactions - window._utk_interpreter_${node.id.substring(0, 8)} = interpreter; - }} catch(e) {{ - console.error('UTK initialization error:', e); - container.innerHTML = '
Error initializing UTK visualization
'; - }} -}}); -""" -display(Javascript(js_initialization)) -`; - - return utkSetup; + private generateCommentsCode(node: TrillNode, nodeInfo: GraphNodeInfo): string { + // Emit comments as commented code lines within the cell so they are preserved. + return buildCommentBody(node.content ?? ""); } private inferNodeType(code: string): NodeType { @@ -679,7 +434,7 @@ display(Javascript(js_initialization)) } } - const parsedVegaSpec = tryParseJsonObject(this.extractVegaLiteSpecCode(codeWithoutMeta)); + const parsedVegaSpec = tryParseJsonObject(extractVegaLiteSpecCode(codeWithoutMeta)); if (parsedVegaSpec && isVegaLiteSpec(parsedVegaSpec)) { return NodeType.VIS_VEGA; } @@ -698,22 +453,9 @@ display(Javascript(js_initialization)) return NodeType.COMPUTATION_ANALYSIS; } - private ensureUtkImport(code: string): string { - const utkPattern = /(^|\n)\s*(?:from\s+utk\s+import\s+|import\s+utk\b)|\butk\s*\./; - if (utkPattern.test(code)) { - return code; - } - - const trimmedCode = code.trim(); - if (!trimmedCode) { - return "import utk"; - } - - return `import utk\n\n${trimmedCode}`; - } private normalizeVegaSpecForNotebook(specCode: string): string { - return this.replaceKeywordsOutsideStrings(specCode, { + return replaceKeywordsOutsideStrings(specCode, { true: "True", false: "False", null: "None", @@ -721,91 +463,13 @@ display(Javascript(js_initialization)) } private normalizeVegaSpecForCurio(specCode: string): string { - return this.replaceKeywordsOutsideStrings(specCode, { + return replaceKeywordsOutsideStrings(specCode, { True: "true", False: "false", None: "null", }); } - private replaceKeywordsOutsideStrings(text: string, replacements: Record): string { - const keys = Object.keys(replacements).sort((a, b) => b.length - a.length); - let result = ""; - let index = 0; - let inString = false; - let quoteChar = ""; - - const isIdentifierChar = (char: string | undefined): boolean => { - if (!char) { - return false; - } - - const code = char.charCodeAt(0); - return ( - (code >= 65 && code <= 90) || - (code >= 97 && code <= 122) || - (code >= 48 && code <= 57) || - char === "_" - ); - }; - - while (index < text.length) { - const char = text[index]; - - if (inString) { - result += char; - - if (char === "\\") { - index += 1; - if (index < text.length) { - result += text[index]; - } - } else if (char === quoteChar) { - inString = false; - quoteChar = ""; - } - - index += 1; - continue; - } - - if (char === '"' || char === "'") { - inString = true; - quoteChar = char; - result += char; - index += 1; - continue; - } - - let replaced = false; - - for (const key of keys) { - if (!text.startsWith(key, index)) { - continue; - } - - const prev = index > 0 ? text[index - 1] : undefined; - const next = index + key.length < text.length ? text[index + key.length] : undefined; - - if (isIdentifierChar(prev) || isIdentifierChar(next)) { - continue; - } - - result += replacements[key]; - index += key.length; - replaced = true; - break; - } - - if (!replaced) { - result += char; - index += 1; - } - } - - return result; - } - private indent(text: string, spaces: number): string { const prefix = " ".repeat(spaces); return text @@ -814,69 +478,15 @@ display(Javascript(js_initialization)) .join("\n"); } - private getOutputVariable(nodeId: string, sourceHandle: string = "out"): string { - const nodeType = this.executionGraph[nodeId].node.type; - const safeId = this.sanitizeId(nodeId); - - let baseOutput = ""; - - if (nodeType === NodeType.DATA_LOADING) { - baseOutput = `data_${safeId}`; - } else if (nodeType === NodeType.MERGE_FLOW) { - baseOutput = `merged_${safeId}`; - } else if (nodeType === NodeType.DATA_POOL) { - baseOutput = `pool_${safeId}`; - } else { - baseOutput = `result_${safeId}`; - } - - if (!sourceHandle || sourceHandle === "out") { - return baseOutput; - } - - return `${baseOutput}_${this.sanitizeId(sourceHandle)}`; - } - - private getOutputVariables(nodeId: string): string[] { - const outputHandles = new Set(["out"]); - const nodeInfo = this.executionGraph[nodeId]; - - for (const outputHandle of Object.keys(nodeInfo.outputs)) { - outputHandles.add(outputHandle); - } - - return Array.from(outputHandles).map((handle) => this.getOutputVariable(nodeId, handle)); - } - - private getInputVariables(nodeInfo: GraphNodeInfo): string[] { - const variables: string[] = []; - - for (const connections of Object.values(nodeInfo.inputs)) { - for (const inputInfo of connections) { - if (!inputInfo.bidirectional) { - variables.push(this.getOutputVariable(inputInfo.source, inputInfo.sourceHandle)); - } - } - } - - return variables; - } - - private sanitizeId(nodeId: string): string { - return nodeId.replace(/[^a-zA-Z0-9]/g, "_"); - } - private extractTrillVariable(code: string): TrillMeta | null { - const pattern = /__trill_node__\s*=\s*(\{[\s\S]*?\})/; - const match = code.match(pattern); - - if (!match || !match[1]) { + const assignedObject = extractAssignedObjectLiteral(code, "__trill_node__"); + if (!assignedObject) { return null; } try { // Accept Python-style single quotes in older notebook exports. - const normalized = match[1].replace(/'/g, '"'); + const normalized = assignedObject.replace(/'/g, '"'); const parsed = JSON.parse(normalized) as TrillMeta; return parsed; } catch { @@ -884,15 +494,7 @@ display(Javascript(js_initialization)) } } - private extractNotebookTrillMetadata(cell: Record): NotebookTrillMetadata | null { - const metadata = cell.metadata as Record | undefined; - if (!metadata) { - return null; - } - - const trill = metadata.trill as Record | undefined; - const source = trill ?? metadata; - + private extractTrillConnectionsVariable(code: string): NotebookTrillConnectionsMetadata | null { const parseConnections = (value: unknown): NotebookTrillConnection[] => { if (!Array.isArray(value)) { return []; @@ -913,130 +515,34 @@ display(Javascript(js_initialization)) .filter((entry) => !!entry.source && !!entry.target); }; - const nodeId = - typeof source.nodeId === "string" - ? source.nodeId - : typeof source.id === "string" - ? source.id - : undefined; - - const nodeType = - typeof source.nodeType === "string" - ? source.nodeType - : typeof source.type === "string" - ? source.type - : undefined; - - const nodeIn = - typeof source.in === "string" - ? source.in - : undefined; - - const nodeOut = - typeof source.out === "string" - ? source.out - : undefined; - - const inputs = parseConnections(source.inputs); - const outputs = parseConnections(source.outputs); - - if (!nodeId && !nodeType && inputs.length === 0 && outputs.length === 0) { + const assignedObject = extractAssignedObjectLiteral(code, "__trill_connections__"); + if (!assignedObject) { return null; } - return { - id: nodeId, - type: nodeType, - in: nodeIn, - out: nodeOut, - inputs, - outputs, - }; - } - - private buildNotebookTrillMetadata(nodeId: string, nodeType: string, node: TrillNode): NotebookTrillMetadata { - const nodeInfo = this.executionGraph[nodeId]; - const inputs: NotebookTrillConnection[] = []; - const outputs: NotebookTrillConnection[] = []; - - for (const [targetHandle, connections] of Object.entries(nodeInfo.inputs)) { - for (const connection of connections) { - inputs.push({ - source: connection.source, - target: nodeId, - sourceHandle: connection.sourceHandle, - targetHandle, - bidirectional: connection.bidirectional, - type: connection.bidirectional ? "Interaction" : undefined, - }); - } - } - - for (const [sourceHandle, connections] of Object.entries(nodeInfo.outputs)) { - for (const connection of connections) { - outputs.push({ - source: nodeId, - target: connection.target, - sourceHandle, - targetHandle: connection.targetHandle, - bidirectional: connection.bidirectional, - type: connection.bidirectional ? "Interaction" : undefined, - }); - } + let parsed: Record; + try { + // Accept Python-style single quotes in older notebook exports. + parsed = JSON.parse(assignedObject.replace(/'/g, '"')) as Record; + } catch { + return null; } - return { - id: nodeId, - type: nodeType, - in: node.in ?? "DEFAULT", - out: node.out ?? "DEFAULT", - inputs, - outputs, - }; - } + const inputs = parseConnections(parsed.inputs); + const outputs = parseConnections(parsed.outputs); - private normalizeNotebookConnection(connection: NotebookTrillConnection): TrillEdge | null { - if (!connection.source || !connection.target) { + if (inputs.length === 0 && outputs.length === 0) { return null; } - const bidirectional = - connection.bidirectional === true || - connection.type === "Interaction" || - connection.sourceHandle === "in/out" || - connection.targetHandle === "in/out"; - return { - id: connection.id ?? `edge_${uuid()}`, - source: connection.source, - target: connection.target, - sourceHandle: connection.sourceHandle ?? (bidirectional ? "in/out" : "out"), - targetHandle: connection.targetHandle ?? (bidirectional ? "in/out" : "in"), - type: bidirectional ? "Interaction" : connection.type, + inputs, + outputs, }; } - private edgeKey(edge: TrillEdge): string { - return [ - edge.source, - edge.target, - edge.sourceHandle ?? "", - edge.targetHandle ?? "", - edge.type ?? "", - ].join("::"); - } - - private isBidirectionalEdge(edge: TrillEdge): boolean { - return ( - edge.type === "Interaction" || - edge.sourceHandle === "in/out" || - edge.targetHandle === "in/out" - ); - } - private removeTrillVariable(code: string): string { - const pattern = /__trill_node__\s*=\s*\{[\s\S]*?\}\n?/; - return code.replace(pattern, ""); + return removeAssignedObjectVariables(code, ["__trill_node__", "__trill_connections__"]); } private unwrapCurioNodeExecution(code: string): string { @@ -1062,177 +568,8 @@ display(Javascript(js_initialization)) } const body = code.slice(bodyStart + 2, bodyEnd); - const deindentedBody = this.deindent(body, 4).trimEnd(); - return this.stripGeneratedNodePrelude(deindentedBody).trim(); - } - - private stripGeneratedNodePrelude(code: string): string { - const lines = code.split("\n"); - let index = 0; - - while (index < lines.length) { - const line = lines[index].trim(); - - if (line.startsWith("input_") && line.includes(" = ")) { - index += 1; - continue; - } - - if (line.startsWith("arg = ")) { - index += 1; - continue; - } - - break; - } - - let end = lines.length; - - while (end > index) { - const line = lines[end - 1].trim(); - - if (line === "return input_data") { - end -= 1; - continue; - } - - if (line === "") { - end -= 1; - continue; - } - - break; - } - - return lines.slice(index, end).join("\n").trimEnd(); - } - - private extractVegaLiteSpecCode(code: string): string { - const specAssignMarker = "spec ="; - const specAssignStart = code.indexOf(specAssignMarker); - - if (specAssignStart < 0) { - return code; - } - - const objectStart = code.indexOf("{", specAssignStart); - if (objectStart < 0) { - return code; - } - - let depth = 0; - let objectEnd = -1; - - for (let index = objectStart; index < code.length; index += 1) { - const char = code[index]; - - if (char === "{") { - depth += 1; - } else if (char === "}") { - depth -= 1; - if (depth === 0) { - objectEnd = index; - break; - } - } - } - - if (objectEnd < 0) { - return code; - } - - return code.slice(objectStart, objectEnd + 1).trim(); - } - - private extractInputVariables(code: string): string[] { - const variables: string[] = []; - const lines = code.split("\n"); - - for (const rawLine of lines) { - const line = rawLine.trim(); - - if (line.startsWith("input_") && line.includes(" = ")) { - const rhs = line.slice(line.indexOf("=") + 1).trim(); - if (this.isSimpleVariableName(rhs)) { - variables.push(rhs); - } - } - - if (line.startsWith("input_data = ")) { - const rhs = line.slice("input_data = ".length).trim(); - if (this.isSimpleVariableName(rhs)) { - variables.push(rhs); - } - } - } - - return Array.from(new Set(variables)); - } - - private extractProducedVariables(code: string): string[] { - const variables: string[] = []; - const lines = code.split("\n"); - let primaryVar = ""; - - for (const rawLine of lines) { - const line = rawLine.trim(); - - if (line.endsWith("= _curio_output")) { - const lhs = line.slice(0, line.indexOf("=")).trim(); - if (this.isSimpleVariableName(lhs)) { - primaryVar = lhs; - variables.push(lhs); - } - continue; - } - - if (primaryVar && line.endsWith(`= ${primaryVar}`)) { - const lhs = line.slice(0, line.indexOf("=")).trim(); - if (this.isSimpleVariableName(lhs)) { - variables.push(lhs); - } - } - } - - return Array.from(new Set(variables)); - } - - private isSimpleVariableName(value: string): boolean { - if (!value) { - return false; - } - - const first = value.charCodeAt(0); - const startsWithLetterOrUnderscore = - value[0] === "_" || - (first >= 65 && first <= 90) || - (first >= 97 && first <= 122); - - if (!startsWithLetterOrUnderscore) { - return false; - } - - for (let i = 1; i < value.length; i += 1) { - const char = value[i]; - const code = value.charCodeAt(i); - const isAlphaNum = - (code >= 65 && code <= 90) || - (code >= 97 && code <= 122) || - (code >= 48 && code <= 57); - if (!(isAlphaNum || char === "_")) { - return false; - } - } - - return true; - } - - private deindent(text: string, spaces: number): string { - const prefix = " ".repeat(spaces); - return text - .split("\n") - .map((line) => (line.startsWith(prefix) ? line.slice(spaces) : line)) - .join("\n"); + const deindentedBody = deindentText(body, 4).trimEnd(); + return stripGeneratedNodePrelude(deindentedBody).trim(); } public serializeNotebook(notebook: Notebook): string { diff --git a/utk_curio/frontend/urban-workflows/src/components/menus/top/UpMenu.tsx b/utk_curio/frontend/urban-workflows/src/components/menus/top/UpMenu.tsx index 70a65e6e..0d2ba0ce 100644 --- a/utk_curio/frontend/urban-workflows/src/components/menus/top/UpMenu.tsx +++ b/utk_curio/frontend/urban-workflows/src/components/menus/top/UpMenu.tsx @@ -56,6 +56,7 @@ export default function UpMenu({ const [activeMenu, setActiveMenu] = useState(null); const [saving, setSaving] = useState(false); const [aiModeOn, setAiModeOn] = useState(false); + const [importWarning, setImportWarning] = useState(null); const menuBarRef = useRef(null); const loadTrillInputRef = useRef(null); @@ -236,8 +237,15 @@ export default function UpMenu({ try { const jsonContent = JSON.parse(event.target.result); const converter = new TrillNotebookConverter(); - const trillSpec = converter.notebookToTrill(jsonContent); - loadTrill(trillSpec); + const result = converter.notebookToTrill(jsonContent); + if ((result as any).trillSpec) { + loadTrill((result as any).trillSpec); + const warnings = (result as any).warnings as string[] | undefined; + setImportWarning(warnings && warnings.length > 0 ? warnings.join("\n") : null); + } else { + loadTrill(result as any); + setImportWarning(null); + } } catch (err) { console.error("Invalid notebook file:", err); } @@ -342,6 +350,12 @@ export default function UpMenu({ setTutorialOpen(false); }, [tutorialOpen]); + useEffect(() => { + if (!importWarning) return; + const id = window.setTimeout(() => setImportWarning(null), 3000); + return () => window.clearTimeout(id); + }, [importWarning]); + return ( <> + {importWarning && ( +
+ Notebook import notice: {importWarning} +
+ )} Curio logo diff --git a/utk_curio/frontend/urban-workflows/src/notebook-convertor/codegen.ts b/utk_curio/frontend/urban-workflows/src/notebook-convertor/codegen.ts new file mode 100644 index 00000000..92bc52a8 --- /dev/null +++ b/utk_curio/frontend/urban-workflows/src/notebook-convertor/codegen.ts @@ -0,0 +1,181 @@ +function buildInputPrelude(inputs: string[]): { inputLines: string; argBlock: string } { + const inputLines = inputs.map((value, index) => `input_${index} = ${value}`).join("\n"); + + let argBlock = ""; + if (inputs.length === 1) { + argBlock = "arg = input_0\n"; + } else if (inputs.length > 1) { + argBlock = `arg = [${inputs.map((_, index) => `input_${index}`).join(", ")}]\n`; + } + + return { inputLines, argBlock }; +} + +function indentNonEmptyLines(text: string, spaces: number): string { + const prefix = " ".repeat(spaces); + return text + .split("\n") + .map((line) => (line.trim().length > 0 ? `${prefix}${line}` : line)) + .join("\n"); +} + +export function buildComputationBody(inputs: string[], code: string): string { + const { inputLines, argBlock } = buildInputPrelude(inputs); + return `${inputLines}\n${argBlock}\n${code}\n`; +} + +export function buildMergeFlowBody(inputs: string[]): string { + const joinedInputs = inputs.join(",\n"); + const indentedInputs = indentNonEmptyLines(joinedInputs, 4); + return `\ninputs = [\n${indentedInputs}\n]\n\nmerged_inputs = [i for i in inputs if i is not None]\n\nreturn merged_inputs\n`; +} + +export function buildDataPoolBody(source: string): string { + return `\nreturn ${source}\n`; +} + +export function buildVegaVisualizationBody(inputVar: string, normalizedSpecCode: string): string { + return `\ninput_data = ${inputVar}\n\nspec = ${normalizedSpecCode.trim()}\n\nvalues = input_data\nif hasattr(input_data, "to_dict"):\n values = input_data.to_dict(orient="records")\n\nif isinstance(spec, dict):\n spec["data"] = {"values": values}\n\nfrom IPython.display import display\ndisplay({"application/vnd.vegalite.v5+json": spec, "text/plain": spec}, raw=True)\n\nreturn input_data\n`; +} + +export function buildTableVisualizationBody(inputVar: string): string { + return `\ninput_data = ${inputVar}\nfrom IPython.display import display\ndisplay(input_data)\n\nreturn input_data\n`; +} + +export function buildTextVisualizationBody(inputVar: string): string { + return `\ninput_data = ${inputVar}\nfrom IPython.display import display\ndisplay(str(input_data))\n\nreturn input_data\n`; +} + +export function buildImageVisualizationBody(inputVar: string): string { + return `\ninput_data = ${inputVar}\nfrom IPython.display import display, Image\ntry:\n display(Image(input_data))\nexcept Exception:\n display(input_data)\n\nreturn input_data\n`; +} + +export function buildConstantsBody(code: string): string { + return `\n${code}\n`; +} + +export function buildCommentBody(content: string): string { + if (!content) return "\n"; + return content + .split("\n") + .map((line) => `# ${line}`) + .join("\n") + "\n"; +} + +export function buildUtkVisualizationBody(inputs: string[], utkCode: string): string { + const inputLines = inputs.map((value, index) => `input_${index} = ${value}`).join("\n"); + return `${inputLines}\n\n${utkCode}`; +} + +export function getUtkDataVar(inputs: string[]): string { + if (inputs.length === 1) { + return "input_0"; + } + + if (inputs.length > 1) { + return `[${inputs.map((_, index) => `input_${index}`).join(", ")}]`; + } + + return "None"; +} + +export function buildUtkNotebookCode(nodeContent: string, containerId: string, dataVar: string, nodeId: string): string { + const nodeIdShort = nodeId.substring(0, 8); + + return ` +# Configure UTK for serverless/notebook environment +import utk +import json +from IPython.display import HTML, Javascript, display + +utk.Environment.serverless = True + +# Create grammar structure +grammar = { + "components": [{ + "id": "notebook_map", + "json": { + "camera": { + "wEye": [0, 0, 1000], + "wLookAt": [0, 0, 0], + "wUp": [0, 1, 0] + }, + "grid": {"width": 12, "height": 4}, + "knots": [], + "map_style": [], + "widgets": [{ + "type": "TOGGLE_KNOT" + }] + }, + "position": {"x": 0, "y": 0, "width": 12, "height": 4} + }], + "grid": {"width": 12, "height": 4}, + "knots": [] +} + +# If content has grammar, parse and merge it +grammar_content = """${nodeContent || "{}"}""".strip() +if grammar_content and grammar_content != "{}": + try: + parsed_grammar = json.loads(grammar_content) + # Merge parsed grammar with our structure + if "components" in parsed_grammar: + grammar["components"][0]["json"].update(parsed_grammar.get("json", {})) + if "knots" in parsed_grammar: + grammar["knots"] = parsed_grammar["knots"] + except json.JSONDecodeError: + pass + +# Load geospatial data if available +geospatial_data = None +if ${dataVar} is not None: + data_input = ${dataVar} + # Handle multi-input case + if isinstance(data_input, list): + data_input = data_input[0] if data_input else None + + if data_input is not None: + # Check if it's a geodataframe + try: + import geopandas as gpd + if isinstance(data_input, gpd.GeoDataFrame): + # Convert to GeoJSON + geojson_data = json.loads(data_input.to_json()) + geospatial_data = utk.physical_from_geojson(geojson_data) + + # Add layers to grammar + if geospatial_data and "components" in grammar: + if "layers" not in grammar["components"][0]["json"]: + grammar["components"][0]["json"]["layers"] = [] + # Add layer for the geospatial data + grammar["components"][0]["json"]["layers"].append({ + "type": "geospatial", + "data": geospatial_data.to_dict() if hasattr(geospatial_data, 'to_dict') else geospatial_data + }) + except Exception as e: + pass + +# Create HTML container +html_container = f'
' +display(HTML(html_container)) + +# Initialize UTK in browser +js_initialization = f""" +require(['utk'], function(utk) {{ + utk.Environment.serverless = true; + const container = document.getElementById('${containerId}'); + const grammar = {json.dumps(grammar)}; + + try {{ + const interpreter = new utk.GrammarInterpreter('notebook', grammar, container); + // Store reference for potential interactions + window._utk_interpreter_${nodeIdShort} = interpreter; + }} catch(e) {{ + console.error('UTK initialization error:', e); + container.innerHTML = '
Error initializing UTK visualization
'; + }} +}}); +""" +display(Javascript(js_initialization)) +`; +} diff --git a/utk_curio/frontend/urban-workflows/src/adapters/grammarDetection.ts b/utk_curio/frontend/urban-workflows/src/notebook-convertor/grammarDetection.ts similarity index 100% rename from utk_curio/frontend/urban-workflows/src/adapters/grammarDetection.ts rename to utk_curio/frontend/urban-workflows/src/notebook-convertor/grammarDetection.ts diff --git a/utk_curio/frontend/urban-workflows/src/notebook-convertor/graph.ts b/utk_curio/frontend/urban-workflows/src/notebook-convertor/graph.ts new file mode 100644 index 00000000..fa0a3115 --- /dev/null +++ b/utk_curio/frontend/urban-workflows/src/notebook-convertor/graph.ts @@ -0,0 +1,271 @@ +import { v4 as uuid } from "uuid"; +import { GraphNodeInfo, NotebookTrillConnection, NotebookTrillConnectionsMetadata, TrillEdge, TrillNode } from "./types"; + +/** + * Build an execution graph from nodes and edges. + * Execution graph tracks dependencies, dependents, and input/output connections for each node. + */ +export function buildExecutionGraph( + nodes: TrillNode[], + edges: TrillEdge[], +): Record { + const graph: Record = {}; + + for (const node of nodes) { + graph[node.id] = { + node, + dependencies: new Set(), + dependents: new Set(), + inputs: {}, + outputs: {}, + }; + } + + for (const edge of edges) { + const source = edge.source; + const target = edge.target; + + if (!graph[source] || !graph[target]) { + continue; + } + + const sourceInfo = graph[source]; + const targetInfo = graph[target]; + const bidirectional = isBidirectionalEdge(edge); + + if (!bidirectional) { + targetInfo.dependencies.add(source); + sourceInfo.dependents.add(target); + } + + const targetHandle = edge.targetHandle ?? (bidirectional ? "in/out" : "in"); + const sourceHandle = edge.sourceHandle ?? (bidirectional ? "in/out" : "out"); + + if (!targetInfo.inputs[targetHandle]) { + targetInfo.inputs[targetHandle] = []; + } + + targetInfo.inputs[targetHandle].push({ + source, + sourceHandle, + bidirectional, + }); + + if (!sourceInfo.outputs[sourceHandle]) { + sourceInfo.outputs[sourceHandle] = []; + } + + sourceInfo.outputs[sourceHandle].push({ + target, + targetHandle, + bidirectional, + }); + } + + return graph; +} + +/** + * Perform topological sort on execution graph to determine execution order. + */ +export function topologicalSort(executionGraph: Record): string[] { + const visited = new Set(); + const visiting = new Set(); + const result: string[] = []; + + const visit = (nodeId: string): void => { + if (visiting.has(nodeId)) { + throw new Error("Circular dependency detected"); + } + + if (visited.has(nodeId)) { + return; + } + + visiting.add(nodeId); + + for (const dep of executionGraph[nodeId].dependencies) { + visit(dep); + } + + visiting.delete(nodeId); + visited.add(nodeId); + result.push(nodeId); + }; + + for (const nodeId of Object.keys(executionGraph)) { + visit(nodeId); + } + + return result; +} + +/** + * Generate output variable name for a node based on its type and handle. + */ +export function getOutputVariable( + nodeId: string, + nodeType: string, + sourceHandle: string = "out", +): string { + const safeId = sanitizeId(nodeId); + + let baseOutput = ""; + + // Check for specific node types + if (nodeType === "DATA_LOADING") { + baseOutput = `data_${safeId}`; + } else if (nodeType === "MERGE_FLOW") { + baseOutput = `merged_${safeId}`; + } else if (nodeType === "DATA_POOL") { + baseOutput = `pool_${safeId}`; + } else { + baseOutput = `result_${safeId}`; + } + + if (!sourceHandle || sourceHandle === "out") { + return baseOutput; + } + + return `${baseOutput}_${sanitizeId(sourceHandle)}`; +} + +/** + * Get all output variables for a node. + */ +export function getOutputVariables( + nodeId: string, + nodeType: string, + executionGraph: Record, +): string[] { + const outputHandles = new Set(["out"]); + const nodeInfo = executionGraph[nodeId]; + + for (const outputHandle of Object.keys(nodeInfo.outputs)) { + outputHandles.add(outputHandle); + } + + return Array.from(outputHandles).map((handle) => + getOutputVariable(nodeId, nodeType, handle), + ); +} + +/** + * Get input variables for a node from its incoming connections. + */ +export function getInputVariables( + nodeInfo: GraphNodeInfo, + executionGraph: Record, +): string[] { + const variables: string[] = []; + + for (const connections of Object.values(nodeInfo.inputs)) { + for (const inputInfo of connections) { + if (!inputInfo.bidirectional) { + const sourceNodeType = executionGraph[inputInfo.source].node.type; + variables.push( + getOutputVariable(inputInfo.source, sourceNodeType, inputInfo.sourceHandle), + ); + } + } + } + + return variables; +} + +/** + * Sanitize node IDs by replacing non-alphanumeric characters. + */ +export function sanitizeId(nodeId: string): string { + return nodeId.replace(/[^a-zA-Z0-9]/g, "_"); +} + +/** + * Check if an edge is bidirectional. + */ +export function isBidirectionalEdge(edge: TrillEdge): boolean { + return ( + edge.type === "Interaction" || + edge.sourceHandle === "in/out" || + edge.targetHandle === "in/out" + ); +} + +/** + * Generate a unique key for an edge for deduplication. + */ +export function edgeKey(edge: TrillEdge): string { + return [ + edge.source, + edge.target, + edge.sourceHandle ?? "", + edge.targetHandle ?? "", + edge.type ?? "", + ].join("::"); +} + +/** + * Normalize notebook connection to TrillEdge format. + */ +export function normalizeNotebookConnection(connection: NotebookTrillConnection): TrillEdge | null { + if (!connection.source || !connection.target) { + return null; + } + + const bidirectional = + connection.bidirectional === true || + connection.type === "Interaction" || + connection.sourceHandle === "in/out" || + connection.targetHandle === "in/out"; + + return { + id: connection.id ?? `edge_${uuid()}`, + source: connection.source, + target: connection.target, + sourceHandle: connection.sourceHandle ?? (bidirectional ? "in/out" : "out"), + targetHandle: connection.targetHandle ?? (bidirectional ? "in/out" : "in"), + type: bidirectional ? "Interaction" : connection.type, + }; +} + +/** + * Build notebook-format connections metadata for a node. + * This metadata is serialized into the notebook cell for export. + */ +export function buildNotebookTrillConnectionsMetadata( + nodeId: string, + executionGraph: Record, +): NotebookTrillConnectionsMetadata { + const nodeInfo = executionGraph[nodeId]; + const inputs: NotebookTrillConnection[] = []; + const outputs: NotebookTrillConnection[] = []; + + for (const [targetHandle, connections] of Object.entries(nodeInfo.inputs)) { + for (const connection of connections) { + inputs.push({ + source: connection.source, + target: nodeId, + sourceHandle: connection.sourceHandle, + targetHandle, + type: connection.bidirectional ? "Interaction" : undefined, + }); + } + } + + for (const [sourceHandle, connections] of Object.entries(nodeInfo.outputs)) { + for (const connection of connections) { + outputs.push({ + source: nodeId, + target: connection.target, + sourceHandle, + targetHandle: connection.targetHandle, + type: connection.bidirectional ? "Interaction" : undefined, + }); + } + } + + return { + inputs, + outputs, + }; +} diff --git a/utk_curio/frontend/urban-workflows/src/notebook-convertor/importEdges.ts b/utk_curio/frontend/urban-workflows/src/notebook-convertor/importEdges.ts new file mode 100644 index 00000000..1b2ed7ee --- /dev/null +++ b/utk_curio/frontend/urban-workflows/src/notebook-convertor/importEdges.ts @@ -0,0 +1,116 @@ +import { NotebookTrillConnection, NotebookTrillConnectionsMetadata, TrillEdge, TrillNode } from "./types"; + +export function collectExplicitEdgesFromCells( + rawCells: unknown[], + extractConnections: (code: string) => NotebookTrillConnectionsMetadata | null, + normalizeConnection: (connection: NotebookTrillConnection) => TrillEdge | null, + edgeKey: (edge: TrillEdge) => string, +): { sawExplicitConnections: boolean; explicitEdges: TrillEdge[] } { + let sawExplicitConnections = false; + const explicitEdges = new Map(); + + for (const rawCell of rawCells) { + const cell = rawCell as Record; + if (cell.cell_type !== "code") { + continue; + } + + const source = cell.source; + const code = Array.isArray(source) ? source.join("") : String(source ?? ""); + const notebookConnectionsMeta = extractConnections(code); + if (!notebookConnectionsMeta) { + continue; + } + + const serializedConnections = [ + ...(notebookConnectionsMeta.outputs ?? []), + ...(notebookConnectionsMeta.inputs ?? []), + ]; + + if (serializedConnections.length > 0) { + sawExplicitConnections = true; + } + + for (const connection of serializedConnections) { + const edge = normalizeConnection(connection); + if (!edge) { + continue; + } + + const key = edgeKey(edge); + if (!explicitEdges.has(key)) { + explicitEdges.set(key, edge); + } + } + } + + return { + sawExplicitConnections, + explicitEdges: Array.from(explicitEdges.values()), + }; +} + +export function inferEdgesFromVariables( + nodes: TrillNode[], + nodeInputs: Record, + producedByVar: Record, + isMergeFlow: (node: TrillNode) => boolean, + createEdgeId: () => string, +): TrillEdge[] { + const edges: TrillEdge[] = []; + const targetInputCount: Record = {}; + const edgeKeys = new Set(); + + for (const node of nodes) { + const inputs = nodeInputs[node.id] ?? []; + for (const inputVar of inputs) { + const sourceNodeId = producedByVar[inputVar]; + if (!sourceNodeId || sourceNodeId === node.id) { + continue; + } + + const edgeKey = `${sourceNodeId}->${node.id}::${inputVar}`; + if (edgeKeys.has(edgeKey)) { + continue; + } + edgeKeys.add(edgeKey); + + let targetHandle = "in"; + if (isMergeFlow(node)) { + const count = targetInputCount[node.id] ?? 0; + targetHandle = `in_${count}`; + targetInputCount[node.id] = count + 1; + } + + edges.push({ + id: createEdgeId(), + source: sourceNodeId, + sourceHandle: "out", + target: node.id, + targetHandle, + }); + } + } + + return edges; +} + +export function buildLinearFallbackEdges(nodes: TrillNode[], createEdgeId: () => string): TrillEdge[] { + const edges: TrillEdge[] = []; + let linearPreviousId: string | null = null; + + for (const node of nodes) { + if (linearPreviousId) { + edges.push({ + id: createEdgeId(), + source: linearPreviousId, + sourceHandle: "out", + target: node.id, + targetHandle: "in", + }); + } + linearPreviousId = node.id; + } + + return edges; +} diff --git a/utk_curio/frontend/urban-workflows/src/notebook-convertor/metadata.ts b/utk_curio/frontend/urban-workflows/src/notebook-convertor/metadata.ts new file mode 100644 index 00000000..8b830641 --- /dev/null +++ b/utk_curio/frontend/urban-workflows/src/notebook-convertor/metadata.ts @@ -0,0 +1,117 @@ +export function extractAssignedObjectLiteral(code: string, variableName: string): string | null { + const assignmentPattern = new RegExp(`${variableName}\\s*=`); + const assignmentMatch = assignmentPattern.exec(code); + if (!assignmentMatch) { + return null; + } + + const assignmentStart = assignmentMatch.index + assignmentMatch[0].length; + const objectStart = code.indexOf("{", assignmentStart); + if (objectStart < 0) { + return null; + } + + let depth = 0; + let inString = false; + let quoteChar = ""; + + for (let index = objectStart; index < code.length; index += 1) { + const char = code[index]; + + if (inString) { + if (char === "\\") { + index += 1; + continue; + } + + if (char === quoteChar) { + inString = false; + quoteChar = ""; + } + continue; + } + + if (char === '"' || char === "'") { + inString = true; + quoteChar = char; + continue; + } + + if (char === "{") { + depth += 1; + } else if (char === "}") { + depth -= 1; + if (depth === 0) { + return code.slice(objectStart, index + 1); + } + } + } + + return null; +} + +function removeAssignedObjectVariable(text: string, variableName: string): string { + const assignmentPattern = new RegExp(`${variableName}\\s*=`); + const assignmentMatch = assignmentPattern.exec(text); + if (!assignmentMatch) { + return text; + } + + const objectStart = text.indexOf("{", assignmentMatch.index + assignmentMatch[0].length); + if (objectStart < 0) { + return text; + } + + let depth = 0; + let inString = false; + let quoteChar = ""; + let objectEnd = -1; + + for (let index = objectStart; index < text.length; index += 1) { + const char = text[index]; + + if (inString) { + if (char === "\\") { + index += 1; + continue; + } + + if (char === quoteChar) { + inString = false; + quoteChar = ""; + } + continue; + } + + if (char === '"' || char === "'") { + inString = true; + quoteChar = char; + continue; + } + + if (char === "{") { + depth += 1; + } else if (char === "}") { + depth -= 1; + if (depth === 0) { + objectEnd = index; + break; + } + } + } + + if (objectEnd < 0) { + return text; + } + + let endIndex = objectEnd + 1; + while (endIndex < text.length && (text[endIndex] === "\n" || text[endIndex] === "\r")) { + endIndex += 1; + } + + return text.slice(0, assignmentMatch.index) + text.slice(endIndex); +} + +export function removeAssignedObjectVariables(code: string, variableNames: string[]): string { + return variableNames.reduce((current, variableName) => removeAssignedObjectVariable(current, variableName), code); +} diff --git a/utk_curio/frontend/urban-workflows/src/notebook-convertor/parsing.ts b/utk_curio/frontend/urban-workflows/src/notebook-convertor/parsing.ts new file mode 100644 index 00000000..d8e543df --- /dev/null +++ b/utk_curio/frontend/urban-workflows/src/notebook-convertor/parsing.ts @@ -0,0 +1,285 @@ +export function replaceKeywordsOutsideStrings(text: string, replacements: Record): string { + const keys = Object.keys(replacements).sort((a, b) => b.length - a.length); + let result = ""; + let index = 0; + let inString = false; + let quoteChar = ""; + + const isIdentifierChar = (char: string | undefined): boolean => { + if (!char) { + return false; + } + + const code = char.charCodeAt(0); + return ( + (code >= 65 && code <= 90) || + (code >= 97 && code <= 122) || + (code >= 48 && code <= 57) || + char === "_" + ); + }; + + while (index < text.length) { + const char = text[index]; + + if (inString) { + result += char; + + if (char === "\\") { + index += 1; + if (index < text.length) { + result += text[index]; + } + } else if (char === quoteChar) { + inString = false; + quoteChar = ""; + } + + index += 1; + continue; + } + + if (char === '"' || char === "'") { + inString = true; + quoteChar = char; + result += char; + index += 1; + continue; + } + + let replaced = false; + + for (const key of keys) { + if (!text.startsWith(key, index)) { + continue; + } + + const prev = index > 0 ? text[index - 1] : undefined; + const next = index + key.length < text.length ? text[index + key.length] : undefined; + + if (isIdentifierChar(prev) || isIdentifierChar(next)) { + continue; + } + + result += replacements[key]; + index += key.length; + replaced = true; + break; + } + + if (!replaced) { + result += char; + index += 1; + } + } + + return result; +} + +export function deindentText(text: string, spaces: number): string { + const prefix = " ".repeat(spaces); + return text + .split("\n") + .map((line) => (line.startsWith(prefix) ? line.slice(spaces) : line)) + .join("\n"); +} + +export function stripGeneratedNodePrelude(code: string): string { + const lines = code.split("\n"); + let index = 0; + + while (index < lines.length) { + const line = lines[index].trim(); + + if (line.startsWith("input_") && line.includes(" = ")) { + index += 1; + continue; + } + + if (line.startsWith("arg = ")) { + index += 1; + continue; + } + + break; + } + + let end = lines.length; + + while (end > index) { + const line = lines[end - 1].trim(); + + if (line === "return input_data") { + end -= 1; + continue; + } + + if (line === "") { + end -= 1; + continue; + } + + break; + } + + return lines.slice(index, end).join("\n").trimEnd(); +} + +export function extractVegaLiteSpecCode(code: string): string { + const specAssignMarker = "spec ="; + const specAssignStart = code.indexOf(specAssignMarker); + + if (specAssignStart < 0) { + return code; + } + + const objectStart = code.indexOf("{", specAssignStart); + if (objectStart < 0) { + return code; + } + + let depth = 0; + let objectEnd = -1; + + for (let index = objectStart; index < code.length; index += 1) { + const char = code[index]; + + if (char === "{") { + depth += 1; + } else if (char === "}") { + depth -= 1; + if (depth === 0) { + objectEnd = index; + break; + } + } + } + + if (objectEnd < 0) { + return code; + } + + return code.slice(objectStart, objectEnd + 1).trim(); +} + +function isSimpleVariableName(value: string): boolean { + if (!value) { + return false; + } + + const first = value.charCodeAt(0); + const startsWithLetterOrUnderscore = + value[0] === "_" || + (first >= 65 && first <= 90) || + (first >= 97 && first <= 122); + + if (!startsWithLetterOrUnderscore) { + return false; + } + + for (let i = 1; i < value.length; i += 1) { + const char = value[i]; + const code = value.charCodeAt(i); + const isAlphaNum = + (code >= 65 && code <= 90) || + (code >= 97 && code <= 122) || + (code >= 48 && code <= 57); + if (!(isAlphaNum || char === "_")) { + return false; + } + } + + return true; +} + +export function extractInputVariables(code: string): string[] { + const variables: string[] = []; + const lines = code.split("\n"); + let inInputsBlock = false; + + const pushIfSimpleVariable = (value: string): void => { + const normalized = value.trim().replace(/,$/, ""); + if (isSimpleVariableName(normalized)) { + variables.push(normalized); + } + }; + + for (const rawLine of lines) { + const line = rawLine.trim(); + + if (inInputsBlock) { + const closeIndex = line.indexOf("]"); + if (closeIndex >= 0) { + const token = line.slice(0, closeIndex).trim(); + if (token.length > 0) { + pushIfSimpleVariable(token); + } + inInputsBlock = false; + continue; + } + + if (line.length > 0) { + pushIfSimpleVariable(line); + } + continue; + } + + if (line.startsWith("inputs = [")) { + const afterBracket = line.slice("inputs = [".length).trim(); + const closeIndex = afterBracket.indexOf("]"); + + if (closeIndex >= 0) { + const token = afterBracket.slice(0, closeIndex).trim(); + if (token.length > 0) { + pushIfSimpleVariable(token); + } + } else { + if (afterBracket.length > 0) { + pushIfSimpleVariable(afterBracket); + } + inInputsBlock = true; + } + continue; + } + + if (line.startsWith("input_") && line.includes(" = ")) { + const rhs = line.slice(line.indexOf("=") + 1).trim(); + pushIfSimpleVariable(rhs); + } + + if (line.startsWith("input_data = ")) { + const rhs = line.slice("input_data = ".length).trim(); + pushIfSimpleVariable(rhs); + } + } + + return Array.from(new Set(variables)); +} + +export function extractProducedVariables(code: string): string[] { + const variables: string[] = []; + const lines = code.split("\n"); + let primaryVar = ""; + + for (const rawLine of lines) { + const line = rawLine.trim(); + + if (line.endsWith("= _curio_output")) { + const lhs = line.slice(0, line.indexOf("=")).trim(); + if (isSimpleVariableName(lhs)) { + primaryVar = lhs; + variables.push(lhs); + } + continue; + } + + if (primaryVar && line.endsWith(`= ${primaryVar}`)) { + const lhs = line.slice(0, line.indexOf("=")).trim(); + if (isSimpleVariableName(lhs)) { + variables.push(lhs); + } + } + } + + return Array.from(new Set(variables)); +} diff --git a/utk_curio/frontend/urban-workflows/src/notebook-convertor/types.ts b/utk_curio/frontend/urban-workflows/src/notebook-convertor/types.ts new file mode 100644 index 00000000..43b401bc --- /dev/null +++ b/utk_curio/frontend/urban-workflows/src/notebook-convertor/types.ts @@ -0,0 +1,92 @@ +export type JsonValue = string | number | boolean | null | JsonObject | JsonValue[]; + +export interface JsonObject { + [key: string]: JsonValue; +} + +export interface NotebookCell { + cell_type: "code" | "markdown" | string; + source: string; + metadata?: Record; +} + +export interface Notebook { + cells: NotebookCell[]; + metadata: Record; + nbformat: number; + nbformat_minor: number; +} + +export interface TrillNode { + id: string; + type: string; + x: number; + y: number; + content?: string; + in?: string; + out?: string; +} + +export interface TrillEdge { + id: string; + source: string; + target: string; + sourceHandle?: string; + targetHandle?: string; + type?: string; +} + +export interface TrillDataflow { + nodes: TrillNode[]; + edges: TrillEdge[]; + name: string; + task: string; + timestamp: number; + provenance_id: string; +} + +export interface TrillSpec { + dataflow: TrillDataflow; +} + +export interface InputConnection { + source: string; + sourceHandle: string; + bidirectional: boolean; +} + +export interface OutputConnection { + target: string; + targetHandle: string; + bidirectional: boolean; +} + +export interface GraphNodeInfo { + node: TrillNode; + dependencies: Set; + dependents: Set; + inputs: Record; + outputs: Record; +} + +export interface TrillMeta { + id?: string; + type?: string; + in?: string; + out?: string; +} + +export interface NotebookTrillConnection { + id?: string; + source: string; + target: string; + sourceHandle?: string; + targetHandle?: string; + bidirectional?: boolean; + type?: string; +} + +export interface NotebookTrillConnectionsMetadata { + inputs?: NotebookTrillConnection[]; + outputs?: NotebookTrillConnection[]; +} From 721cf80e9698131fce21704589bb0b4596c5ecf3 Mon Sep 17 00:00:00 2001 From: Jaideep Nutalapati Date: Tue, 28 Apr 2026 00:18:02 -0500 Subject: [PATCH 10/13] implemented the extract utk spec code --- .../urban-workflows/src/NotebookConvertor.ts | 13 +++++- .../src/notebook-convertor/parsing.ts | 45 +++++++++++++++++++ 2 files changed, 57 insertions(+), 1 deletion(-) diff --git a/utk_curio/frontend/urban-workflows/src/NotebookConvertor.ts b/utk_curio/frontend/urban-workflows/src/NotebookConvertor.ts index f289c673..8a87d0a5 100644 --- a/utk_curio/frontend/urban-workflows/src/NotebookConvertor.ts +++ b/utk_curio/frontend/urban-workflows/src/NotebookConvertor.ts @@ -43,6 +43,7 @@ import { extractInputVariables, extractProducedVariables, extractVegaLiteSpecCode, + extractUtkSpecCode, replaceKeywordsOutsideStrings, stripGeneratedNodePrelude, } from "./notebook-convertor/parsing"; @@ -144,7 +145,9 @@ export class TrillNotebookConverter { const cleanCode = nodeType === NodeType.VIS_VEGA ? this.normalizeVegaSpecForCurio(extractVegaLiteSpecCode(cleanCodeBody)) - : cleanCodeBody; + : nodeType === NodeType.VIS_UTK + ? this.normalizeUtkSpecForCurio(extractUtkSpecCode(cleanCodeBody)) + : cleanCodeBody; const node: TrillNode = { id: nodeId, @@ -470,6 +473,14 @@ export class TrillNotebookConverter { }); } + private normalizeUtkSpecForCurio(specCode: string): string { + return replaceKeywordsOutsideStrings(specCode, { + True: "true", + False: "false", + None: "null", + }); + } + private indent(text: string, spaces: number): string { const prefix = " ".repeat(spaces); return text diff --git a/utk_curio/frontend/urban-workflows/src/notebook-convertor/parsing.ts b/utk_curio/frontend/urban-workflows/src/notebook-convertor/parsing.ts index d8e543df..f9350d79 100644 --- a/utk_curio/frontend/urban-workflows/src/notebook-convertor/parsing.ts +++ b/utk_curio/frontend/urban-workflows/src/notebook-convertor/parsing.ts @@ -162,6 +162,51 @@ export function extractVegaLiteSpecCode(code: string): string { return code.slice(objectStart, objectEnd + 1).trim(); } +export function extractUtkSpecCode(code: string): string { + const markers = ["grammar =", "utk_spec =", "utk_grammar ="]; + let specAssignStart = -1; + + for (const marker of markers) { + const index = code.indexOf(marker); + if (index >= 0) { + specAssignStart = index; + break; + } + } + + if (specAssignStart < 0) { + return code; + } + + const objectStart = code.indexOf("{", specAssignStart); + if (objectStart < 0) { + return code; + } + + let depth = 0; + let objectEnd = -1; + + for (let index = objectStart; index < code.length; index += 1) { + const char = code[index]; + + if (char === "{") { + depth += 1; + } else if (char === "}") { + depth -= 1; + if (depth === 0) { + objectEnd = index; + break; + } + } + } + + if (objectEnd < 0) { + return code; + } + + return code.slice(objectStart, objectEnd + 1).trim(); +} + function isSimpleVariableName(value: string): boolean { if (!value) { return false; From 09ef1b5160079c06c04f42afa0b4a3c310c0cc64 Mon Sep 17 00:00:00 2001 From: Jaideep Nutalapati Date: Tue, 28 Apr 2026 05:52:52 -0500 Subject: [PATCH 11/13] fixed image, utk, mergeflow nodes for notebook conversion --- .../urban-workflows/src/NotebookConvertor.ts | 83 +++++++++++++++++-- .../src/notebook-convertor/codegen.ts | 56 ++++++++++++- .../src/notebook-convertor/graph.ts | 27 +++++- .../src/notebook-convertor/parsing.ts | 12 +++ utk_curio/sandbox/app/api.py | 12 ++- utk_curio/sandbox/python_wrapper.txt | 42 ++++++---- 6 files changed, 202 insertions(+), 30 deletions(-) diff --git a/utk_curio/frontend/urban-workflows/src/NotebookConvertor.ts b/utk_curio/frontend/urban-workflows/src/NotebookConvertor.ts index 8a87d0a5..5700a0f1 100644 --- a/utk_curio/frontend/urban-workflows/src/NotebookConvertor.ts +++ b/utk_curio/frontend/urban-workflows/src/NotebookConvertor.ts @@ -174,11 +174,32 @@ export class TrillNotebookConverter { (edge) => edgeKey(edge), ); - if (sawExplicitConnections) { - edges.push(...explicitEdges); + // MergeFlow edges are built from the ordered variable list in the cell body + // (the `inputs = [var0, var1, ...]` list), assigning in_0/in_1/... by position. + // Edge IDs embed the slot handle so useCode.ts can recover targetHandle on load. + const mergeFlowTargetIds = new Set( + nodes.filter((n) => n.type === NodeType.MERGE_FLOW).map((n) => n.id), + ); + const mergeFlowEdges: TrillEdge[] = []; + for (const node of nodes) { + if (node.type !== NodeType.MERGE_FLOW) continue; + (nodeInputs[node.id] ?? []).forEach((varName, slotIndex) => { + const sourceId = producedByVar[varName]; + if (!sourceId || sourceId === node.id) return; + const handle = `in_${slotIndex}`; + mergeFlowEdges.push({ + id: `edge_${handle}_${sanitizeId(sourceId)}_${sanitizeId(node.id)}`, + source: sourceId, + sourceHandle: "out", + target: node.id, + targetHandle: handle, + }); + }); } - if (!sawExplicitConnections) { + if (sawExplicitConnections) { + edges.push(...explicitEdges.filter((e) => !mergeFlowTargetIds.has(e.target))); + } else { edges.push( ...inferEdgesFromVariables( nodes, @@ -186,10 +207,12 @@ export class TrillNotebookConverter { producedByVar, (node) => node.type === NodeType.MERGE_FLOW, () => `edge_${uuid()}`, - ), + ).filter((e) => !mergeFlowTargetIds.has(e.target)), ); } + edges.push(...mergeFlowEdges); + if (edges.length === 0) { edges.push(...buildLinearFallbackEdges(nodes, () => `edge_${uuid()}`)); } @@ -453,6 +476,24 @@ export class TrillNotebookConverter { return NodeType.VIS_VEGA; } + // Detect VIS_IMAGE patterns + const imagePattern = /from IPython\.display import.*Image|display\(Image\(|\.to_json\(\)|image_id|image_content/; + if (imagePattern.test(codeWithoutMeta)) { + return NodeType.VIS_IMAGE; + } + + // Detect VIS_TABLE patterns + const tablePattern = /from IPython\.display import.*display\s*\n.*display\s*\(.*input_data\s*\)|\.display\(\)|DataFrame.*display/; + if (tablePattern.test(codeWithoutMeta)) { + return NodeType.VIS_TABLE; + } + + // Detect VIS_TEXT patterns + const textPattern = /display\(str\(.*input_data.*\)\)/; + if (textPattern.test(codeWithoutMeta)) { + return NodeType.VIS_TEXT; + } + return NodeType.COMPUTATION_ANALYSIS; } @@ -567,18 +608,42 @@ export class TrillNotebookConverter { const tryStart = code.indexOf(tryMarker, outputStart >= 0 ? outputStart : 0); const exceptStart = code.indexOf(exceptMarker, tryStart >= 0 ? tryStart : 0); + // If any of the expected markers are missing, return code as-is if (functionStart < 0 || outputStart < 0 || tryStart < 0 || exceptStart < 0) { return code; } - const bodyStart = code.indexOf("\n\n", functionStart); - const bodyEnd = code.lastIndexOf("\n\n", outputStart); - - if (bodyStart < 0 || bodyEnd < 0 || bodyEnd <= bodyStart) { + // Find the start of the function body (after "def _curio_node():") + const functionDefEnd = functionStart + functionMarker.length; + const bodyStartSearch = code.indexOf("\n", functionDefEnd); + if (bodyStartSearch < 0) { return code; } - const body = code.slice(bodyStart + 2, bodyEnd); + // Look for the first non-empty line after the function definition + let bodyStart = bodyStartSearch; + let bodyStartLineNum = bodyStart; + + // Skip to start of actual code body (after function definition line) + // Allow for empty lines immediately after function def + while (bodyStart < outputStart && code[bodyStart] === '\n') { + bodyStart++; + } + + // Find end of body (just before _curio_output assignment) + let bodyEnd = outputStart - 1; + + // Trim trailing whitespace/newlines before the output marker + while (bodyEnd > bodyStart && /[\n\s]/.test(code[bodyEnd])) { + bodyEnd--; + } + + if (bodyEnd <= bodyStart) { + // Body is empty + return ""; + } + + const body = code.slice(bodyStart, bodyEnd + 1); const deindentedBody = deindentText(body, 4).trimEnd(); return stripGeneratedNodePrelude(deindentedBody).trim(); } diff --git a/utk_curio/frontend/urban-workflows/src/notebook-convertor/codegen.ts b/utk_curio/frontend/urban-workflows/src/notebook-convertor/codegen.ts index 92bc52a8..671f83f2 100644 --- a/utk_curio/frontend/urban-workflows/src/notebook-convertor/codegen.ts +++ b/utk_curio/frontend/urban-workflows/src/notebook-convertor/codegen.ts @@ -47,9 +47,63 @@ export function buildTextVisualizationBody(inputVar: string): string { } export function buildImageVisualizationBody(inputVar: string): string { - return `\ninput_data = ${inputVar}\nfrom IPython.display import display, Image\ntry:\n display(Image(input_data))\nexcept Exception:\n display(input_data)\n\nreturn input_data\n`; + return ` +input_data = ${inputVar} +from IPython.display import display, Image, HTML +import pandas as pd +from io import BytesIO +import base64 + +# Handle both DataFrame and direct image input +if isinstance(input_data, pd.DataFrame): + # Expecting DataFrame with 'image_id' and 'image_content' columns (base64 encoded) + cols = input_data.columns.tolist() + image_col = 'image_content' if 'image_content' in cols else (cols[1] if len(cols) > 1 else None) + id_col = 'image_id' if 'image_id' in cols else cols[0] + + if image_col is None: + display(input_data) + else: + # Create HTML grid for images + grid_html = '
' + + for idx, row in input_data.iterrows(): + image_id = str(row[id_col]) if id_col in row else f"Image {idx}" + image_content = row[image_col] + + try: + # Check if image_content is base64 string + if isinstance(image_content, str): + if not image_content.startswith('data:image'): + image_content = f'data:image/png;base64,{image_content}' + grid_html += f'

{image_id}

' + else: + grid_html += f'

Invalid image format for {image_id}

' + except Exception as e: + grid_html += f'

Error displaying {image_id}: {str(e)}

' + + grid_html += '
' + display(HTML(grid_html)) +else: + # Single image or fallback + try: + if isinstance(input_data, str): + # Base64 encoded image or file path + if input_data.startswith('data:image') or input_data.startswith('/') or input_data.endswith(('.jpg', '.jpeg', '.png', '.gif', '.bmp')): + display(Image(input_data)) + else: + # Assume base64 encoded + display(Image(data=base64.b64decode(input_data))) + else: + display(Image(input_data)) + except Exception: + display(input_data) + +return input_data +`; } + export function buildConstantsBody(code: string): string { return `\n${code}\n`; } diff --git a/utk_curio/frontend/urban-workflows/src/notebook-convertor/graph.ts b/utk_curio/frontend/urban-workflows/src/notebook-convertor/graph.ts index fa0a3115..e93c90d5 100644 --- a/utk_curio/frontend/urban-workflows/src/notebook-convertor/graph.ts +++ b/utk_curio/frontend/urban-workflows/src/notebook-convertor/graph.ts @@ -38,9 +38,15 @@ export function buildExecutionGraph( sourceInfo.dependents.add(target); } - const targetHandle = edge.targetHandle ?? (bidirectional ? "in/out" : "in"); + let targetHandle = edge.targetHandle ?? (bidirectional ? "in/out" : "in"); const sourceHandle = edge.sourceHandle ?? (bidirectional ? "in/out" : "out"); + // TrillGenerator doesn't persist targetHandle, so recover MergeFlow slot from edge ID. + if (targetHandle === "in" && !bidirectional) { + const slotMatch = edge.id.match(/in_(\d+)/); + if (slotMatch) targetHandle = `in_${slotMatch[1]}`; + } + if (!targetInfo.inputs[targetHandle]) { targetInfo.inputs[targetHandle] = []; } @@ -218,12 +224,18 @@ export function normalizeNotebookConnection(connection: NotebookTrillConnection) connection.sourceHandle === "in/out" || connection.targetHandle === "in/out"; + const targetHandle = connection.targetHandle ?? (bidirectional ? "in/out" : "in"); + const isMergeSlot = /^in_\d+$/.test(targetHandle); + const fallbackId = isMergeSlot + ? `edge_${targetHandle}_${sanitizeId(connection.source)}_${sanitizeId(connection.target)}` + : `edge_${uuid()}`; + return { - id: connection.id ?? `edge_${uuid()}`, + id: connection.id ?? fallbackId, source: connection.source, target: connection.target, sourceHandle: connection.sourceHandle ?? (bidirectional ? "in/out" : "out"), - targetHandle: connection.targetHandle ?? (bidirectional ? "in/out" : "in"), + targetHandle, type: bidirectional ? "Interaction" : connection.type, }; } @@ -240,9 +252,15 @@ export function buildNotebookTrillConnectionsMetadata( const inputs: NotebookTrillConnection[] = []; const outputs: NotebookTrillConnection[] = []; + const isMergeFlowHandle = (handle: string | undefined): boolean => + !!handle && /^in_\d+$/.test(handle); + for (const [targetHandle, connections] of Object.entries(nodeInfo.inputs)) { for (const connection of connections) { inputs.push({ + id: isMergeFlowHandle(targetHandle) + ? `edge_${targetHandle}_${sanitizeId(connection.source)}_${sanitizeId(nodeId)}` + : undefined, source: connection.source, target: nodeId, sourceHandle: connection.sourceHandle, @@ -255,6 +273,9 @@ export function buildNotebookTrillConnectionsMetadata( for (const [sourceHandle, connections] of Object.entries(nodeInfo.outputs)) { for (const connection of connections) { outputs.push({ + id: isMergeFlowHandle(connection.targetHandle) + ? `edge_${connection.targetHandle}_${sanitizeId(nodeId)}_${sanitizeId(connection.target)}` + : undefined, source: nodeId, target: connection.target, sourceHandle, diff --git a/utk_curio/frontend/urban-workflows/src/notebook-convertor/parsing.ts b/utk_curio/frontend/urban-workflows/src/notebook-convertor/parsing.ts index f9350d79..6bd97d96 100644 --- a/utk_curio/frontend/urban-workflows/src/notebook-convertor/parsing.ts +++ b/utk_curio/frontend/urban-workflows/src/notebook-convertor/parsing.ts @@ -163,6 +163,18 @@ export function extractVegaLiteSpecCode(code: string): string { } export function extractUtkSpecCode(code: string): string { + // Prefer grammar_content = """...""" (generated notebook format) — it holds the + // full original spec including ex_knots, grammar, and other attributes. + const contentMarker = 'grammar_content = """'; + const contentStart = code.indexOf(contentMarker); + if (contentStart >= 0) { + const strStart = contentStart + contentMarker.length; + const strEnd = code.indexOf('"""', strStart); + if (strEnd > strStart) { + return code.slice(strStart, strEnd).trim(); + } + } + const markers = ["grammar =", "utk_spec =", "utk_grammar ="]; let specAssignStart = -1; diff --git a/utk_curio/sandbox/app/api.py b/utk_curio/sandbox/app/api.py index 95b4f163..664f46d5 100644 --- a/utk_curio/sandbox/app/api.py +++ b/utk_curio/sandbox/app/api.py @@ -159,7 +159,17 @@ def exec(): stdout = [item for item in stdout.split("\n") if item != ''] if(len(stdout) > 0): - output = json.loads(stdout[-1]) + try: + output = json.loads(stdout[-1]) + except (json.JSONDecodeError, ValueError) as e: + # If parsing fails, it means the expected JSON output wasn't generated + # This usually indicates an error occurred during user code execution + output = {} + output['path'] = "" + output['dataType'] = "str" + if stderr: + # Append the actual error to stderr for debugging + pass else: output = {} output['path'] = "" diff --git a/utk_curio/sandbox/python_wrapper.txt b/utk_curio/sandbox/python_wrapper.txt index 37b12957..2e264efe 100644 --- a/utk_curio/sandbox/python_wrapper.txt +++ b/utk_curio/sandbox/python_wrapper.txt @@ -6,6 +6,7 @@ import warnings warnings.filterwarnings('ignore') +import sys import rasterio import geopandas as gpd import pandas as pd @@ -75,8 +76,16 @@ if input is not None and not (isinstance(input, str) and input == ''): # checkIOType(parsedOutput, nodeType, False) #duckdb output dispatch: -output = userCode(incomingInput) -out_kind = detect_kind(output) +try: + output = userCode(incomingInput) +except Exception as e: + print(json.dumps({'error': str(e), 'type': type(e).__name__}), file=sys.stderr) + import traceback + traceback.print_exc(file=sys.stderr) + # Output a safe default + output = None + +out_kind = detect_kind(output) if output is not None else 'str' if out_kind == 'outputs': synthetic_out = { 'dataType': 'outputs', @@ -85,19 +94,20 @@ if out_kind == 'outputs': else: synthetic_out = {'dataType': out_kind, 'data': None} #checkIOType is not inert. It halts the pipeline when a node produces the wrong kind of thing. it's a gate, not a modifier. -checkIOType(synthetic_out, nodeType, False) - -# print(parsedOutput) -# print(type(parsedOutput['data'][0])) -# print(parsedOutput) - -# serialization output saving -# file_path = save_memory_mapped_file(parsedOutput) -# print(json.dumps(parsedOutput)) # stdout: 0,-2; node output: -2 -# print(json.dumps({'path': file_path, 'dataType': parsedOutput['dataType']})) # path: -1 -# print(json.dumps(parsedOutput)) - +try: + checkIOType(synthetic_out, nodeType, False) +except Exception as e: + print(json.dumps({'error': str(e), 'type': type(e).__name__}), file=sys.stderr) + import traceback + traceback.print_exc(file=sys.stderr) # duckdb output saving -file_path = save_to_duckdb(output, node_id=nodeType) -print(json.dumps({'path': file_path, 'dataType': out_kind})) # path: -1 \ No newline at end of file +try: + file_path = save_to_duckdb(output, node_id=nodeType) if output is not None else None + print(json.dumps({'path': file_path, 'dataType': out_kind})) # path: -1 +except Exception as e: + print(json.dumps({'error': str(e), 'type': type(e).__name__}), file=sys.stderr) + import traceback + traceback.print_exc(file=sys.stderr) + # Output a safe default to prevent JSON parsing error + print(json.dumps({'path': '', 'dataType': 'str', 'error': str(e)})) \ No newline at end of file From 798c2d642c3bc5bc5a2603d1b492ec4752d5b261 Mon Sep 17 00:00:00 2001 From: Jaideep Nutalapati Date: Tue, 28 Apr 2026 07:23:37 -0500 Subject: [PATCH 12/13] CURIO IPYNB feature documentation --- docs/IPYNB_USAGE.md | 895 ++++++++++++++++++ .../src/notebook-convertor/README.md | 559 +++++++++++ 2 files changed, 1454 insertions(+) create mode 100644 docs/IPYNB_USAGE.md create mode 100644 utk_curio/frontend/urban-workflows/src/notebook-convertor/README.md diff --git a/docs/IPYNB_USAGE.md b/docs/IPYNB_USAGE.md new file mode 100644 index 00000000..75ac7e6f --- /dev/null +++ b/docs/IPYNB_USAGE.md @@ -0,0 +1,895 @@ +# Jupyter Notebook and Curio Interoperability Usage Guide + +## Converting Notebooks to Curio Workflows + +This guide explains how to structure Jupyter notebooks for seamless conversion to Curio workflows and vice versa. It covers all supported node types, naming conventions, and provides ready-to-use templates. + +--- + +## Table of Contents + +1. [Quick Start](#quick-start) +2. [Node Types](#node-types) +3. [Metadata Structure](#metadata-structure) +4. [Naming Conventions](#naming-conventions) +5. [Node Templates](#node-templates) +6. [Best Practices](#best-practices) +7. [Troubleshooting](#troubleshooting) + +--- + +## Quick Start + +### The Minimum Viable Cell Template + +Every cell that will be converted to a Curio node needs: + +```python +__trill_node__ = { + "id": "unique-cell-id", + "type": "COMPUTATION_ANALYSIS", + "in": "DEFAULT", + "out": "DEFAULT" +} + +__trill_connections__ = { + "inputs": [], + "outputs": [] +} + +def _curio_node(): + # YOUR CODE HERE + result = process_data() + return result + +_curio_output = _curio_node() +``` + +**Key Points:** +- `__trill_node__`: Metadata identifying the node +- `__trill_connections__`: Connection information +- `_curio_node()`: Function containing your actual code +- `_curio_output`: Captures the return value + +--- + +## Node Types + +Curio supports 16 node types, each with specific purposes: + +| Node Type | Purpose | Input | Output | +|-----------|---------|-------|--------| +| **DATA_LOADING** | Load/generate data from files or APIs | None | DataFrame, GeoDataFrame, or Raster | +| **DATA_CLEANING** | Clean and preprocess data | DataFrame/GeoDataFrame | DataFrame/GeoDataFrame | +| **DATA_TRANSFORMATION** | Transform and reshape data | DataFrame/GeoDataFrame | DataFrame/GeoDataFrame | +| **COMPUTATION_ANALYSIS** | Perform calculations and analysis | Any | Any | +| **DATA_SUMMARY** | Aggregate and summarize data | DataFrame/GeoDataFrame | DataFrame/Series | +| **DATA_EXPORT** | Save/export data to files | DataFrame/GeoDataFrame | File path | +| **DATA_POOL** | Combine multiple data sources | Multiple | DataFrame/GeoDataFrame | +| **MERGE_FLOW** | Merge data from multiple branches | Multiple | DataFrame/GeoDataFrame | +| **FLOW_SWITCH** | Conditional branching based on logic | Any | Any | +| **CONSTANTS** | Define constant values | None | Value/JSON | +| **VIS_VEGA** | Create Vega-Lite visualizations | DataFrame/GeoDataFrame | Vega-Lite spec | +| **VIS_UTK** | Create UTK geospatial visualizations | GeoDataFrame | UTK spec | +| **VIS_TABLE** | Display data as interactive table | DataFrame/GeoDataFrame | Table HTML | +| **VIS_TEXT** | Display formatted text output | Any | Text | +| **VIS_IMAGE** | Display images | Image data/path | Image | +| **COMMENTS** | Add documentation cells | None | Comment text | + +--- + +## Metadata Structure + +### `__trill_node__` Dictionary + +Required metadata that identifies each node: + +```python +__trill_node__ = { + "id": "82537c44-8195-4cd3-a5fa-8a049d53d96e", # Unique identifier + "type": "COMPUTATION_ANALYSIS", # Node type + "in": "DEFAULT", # Input port label + "out": "DEFAULT" # Output port label +} +``` + +**Fields:** +- `id` (String): Unique UUID for the node. **Use UUID format** (e.g., from `uuid.uuid4()`) +- `type` (String): One of the 16 node types listed above +- `in` (String): Input connection label (usually "DEFAULT") +- `out` (String): Output connection label (usually "DEFAULT") + +### `__trill_connections__` Dictionary + +Metadata defining connections to other nodes: + +```python +__trill_connections__ = { + "inputs": [ + { + "source": "source-node-id", + "target": "current-node-id", + "sourceHandle": "out", + "targetHandle": "in", + "bidirectional": False + } + ], + "outputs": [ + { + "source": "current-node-id", + "target": "target-node-id", + "sourceHandle": "out", + "targetHandle": "in", + "bidirectional": False + } + ] +} +``` + +**Fields:** +- `inputs`: List of incoming connections +- `outputs`: List of outgoing connections +- `source`/`target`: Node IDs +- `sourceHandle`/`targetHandle`: Connection port labels +- `bidirectional`: Boolean for bidirectional connections + +--- + +## Naming Conventions + +Curio uses specific naming patterns to automatically detect and wire data dependencies: + +### Variable Naming Patterns + +| Pattern | Usage | Example | Meaning | +|---------|-------|---------|---------| +| `data_*` | Data loading output | `data_weather_df` | Output from DATA_LOADING node | +| `result_*` | Computation output | `result_aggregated` | Output from COMPUTATION_ANALYSIS | +| `cleaned_*` | Cleaned data | `cleaned_weather` | Output from DATA_CLEANING node | +| `summary_*` | Summarized data | `summary_stats` | Output from DATA_SUMMARY node | +| `pool_*` | Pooled data | `pool_combined` | Output from DATA_POOL node | +| `vis_*` | Visualization spec | `vis_chart` | Output from VIS_VEGA/VIS_UTK | +| `const_*` | Constants | `const_threshold` | Output from CONSTANTS node | + +### Node ID Naming + +Use UUID format for consistency: + +```python +import uuid + +node_id = str(uuid.uuid4()) # e.g., "82537c44-8195-4cd3-a5fa-8a049d53d96e" +``` + +### Variable Reference Comments + +Document dependencies with comments: + +```python +def _curio_node(): + # input: data_weather_df (from DATA_LOADING) + # input: result_aggregated (from COMPUTATION_ANALYSIS) + + filtered = data_weather_df[data_weather_df['temp'] > result_aggregated['avg_temp']] + return filtered +``` + +--- + +## Node Templates + +### Template 1: DATA_LOADING + +For loading data from files or APIs. + +```python +__trill_node__ = { + "id": "550e8400-e29b-41d4-a716-446655440000", + "type": "DATA_LOADING", + "in": "DEFAULT", + "out": "DEFAULT" +} + +__trill_connections__ = { + "inputs": [], + "outputs": [] +} + +def _curio_node(): + """Load data from file or API""" + import pandas as pd + + # Load your data here + data_df = pd.read_csv('path/to/your/file.csv') + + # Optional: Basic validation + print(f"Loaded {len(data_df)} rows, {len(data_df.columns)} columns") + + return data_df + +_curio_output = _curio_node() +``` + +### Template 2: DATA_CLEANING + +For cleaning and preprocessing data. + +```python +__trill_node__ = { + "id": "550e8400-e29b-41d4-a716-446655440001", + "type": "DATA_CLEANING", + "in": "DEFAULT", + "out": "DEFAULT" +} + +__trill_connections__ = { + "inputs": [ + { + "source": "550e8400-e29b-41d4-a716-446655440000", + "target": "550e8400-e29b-41d4-a716-446655440001", + "sourceHandle": "out", + "targetHandle": "in", + "bidirectional": False + } + ], + "outputs": [] +} + +def _curio_node(): + """Clean and preprocess data""" + # input: data_df (from DATA_LOADING) + + cleaned_df = data_df.copy() + + # Remove duplicates + cleaned_df = cleaned_df.drop_duplicates() + + # Handle missing values + cleaned_df = cleaned_df.fillna(method='ffill') + + # Remove rows with critical missing values + cleaned_df = cleaned_df.dropna(subset=['critical_column']) + + print(f"After cleaning: {len(cleaned_df)} rows") + + return cleaned_df + +_curio_output = _curio_node() +``` + +### Template 3: DATA_TRANSFORMATION + +For reshaping and transforming data. + +```python +__trill_node__ = { + "id": "550e8400-e29b-41d4-a716-446655440002", + "type": "DATA_TRANSFORMATION", + "in": "DEFAULT", + "out": "DEFAULT" +} + +__trill_connections__ = { + "inputs": [ + { + "source": "550e8400-e29b-41d4-a716-446655440001", + "target": "550e8400-e29b-41d4-a716-446655440002", + "sourceHandle": "out", + "targetHandle": "in", + "bidirectional": False + } + ], + "outputs": [] +} + +def _curio_node(): + """Transform data structure""" + # input: cleaned_df (from DATA_CLEANING) + + # Example transformations + transformed_df = cleaned_df.copy() + + # Reshape: pivot table + transformed_df = transformed_df.pivot_table( + index='category', + columns='date', + values='value', + aggfunc='mean' + ) + + # Or melt long format + # transformed_df = pd.melt(transformed_df, id_vars=['id'], value_name='value') + + return transformed_df + +_curio_output = _curio_node() +``` + +### Template 4: COMPUTATION_ANALYSIS + +For calculations and analysis. + +```python +__trill_node__ = { + "id": "550e8400-e29b-41d4-a716-446655440003", + "type": "COMPUTATION_ANALYSIS", + "in": "DEFAULT", + "out": "DEFAULT" +} + +__trill_connections__ = { + "inputs": [ + { + "source": "550e8400-e29b-41d4-a716-446655440002", + "target": "550e8400-e29b-41d4-a716-446655440003", + "sourceHandle": "out", + "targetHandle": "in", + "bidirectional": False + } + ], + "outputs": [] +} + +def _curio_node(): + """Perform analysis and computations""" + # input: transformed_df (from DATA_TRANSFORMATION) + import numpy as np + + # Compute derived metrics + result_analysis = { + 'mean': transformed_df.mean(), + 'std': transformed_df.std(), + 'correlation': transformed_df.corr(), + 'percentile_95': transformed_df.quantile(0.95) + } + + # Or return DataFrame + result_df = transformed_df.copy() + result_df['z_score'] = (result_df - result_df.mean()) / result_df.std() + + return result_df + +_curio_output = _curio_node() +``` + +### Template 5: DATA_SUMMARY + +For aggregation and summarization. + +```python +__trill_node__ = { + "id": "550e8400-e29b-41d4-a716-446655440004", + "type": "DATA_SUMMARY", + "in": "DEFAULT", + "out": "DEFAULT" +} + +__trill_connections__ = { + "inputs": [ + { + "source": "550e8400-e29b-41d4-a716-446655440003", + "target": "550e8400-e29b-41d4-a716-446655440004", + "sourceHandle": "out", + "targetHandle": "in", + "bidirectional": False + } + ], + "outputs": [] +} + +def _curio_node(): + """Summarize and aggregate data""" + # input: result_df (from COMPUTATION_ANALYSIS) + + # Group and aggregate + summary_data = result_df.groupby('category').agg({ + 'value': ['mean', 'sum', 'count'], + 'z_score': ['min', 'max'] + }).round(2) + + # Flatten column names + summary_data.columns = ['_'.join(col).strip() for col in summary_data.columns.values] + + return summary_data + +_curio_output = _curio_node() +``` + +### Template 6: DATA_POOL + +For combining multiple data sources. + +```python +__trill_node__ = { + "id": "550e8400-e29b-41d4-a716-446655440005", + "type": "DATA_POOL", + "in": "DEFAULT", + "out": "DEFAULT" +} + +__trill_connections__ = { + "inputs": [ + { + "source": "550e8400-e29b-41d4-a716-446655440001", + "target": "550e8400-e29b-41d4-a716-446655440005", + "sourceHandle": "out", + "targetHandle": "in", + "bidirectional": False + }, + { + "source": "550e8400-e29b-41d4-a716-446655440002", + "target": "550e8400-e29b-41d4-a716-446655440005", + "sourceHandle": "out", + "targetHandle": "in", + "bidirectional": False + } + ], + "outputs": [] +} + +def _curio_node(): + """Combine multiple data sources""" + # input: cleaned_df (from DATA_CLEANING) + # input: transformed_df (from DATA_TRANSFORMATION) + import pandas as pd + + # Concatenate + pool_data = pd.concat([cleaned_df, transformed_df], axis=0, ignore_index=True) + + # Or merge/join + # pool_data = pd.merge(cleaned_df, transformed_df, on='key_column', how='inner') + + return pool_data + +_curio_output = _curio_node() +``` + +### Template 7: VIS_VEGA + +For Vega-Lite visualizations. + +```python +__trill_node__ = { + "id": "550e8400-e29b-41d4-a716-446655440006", + "type": "VIS_VEGA", + "in": "DEFAULT", + "out": "DEFAULT" +} + +__trill_connections__ = { + "inputs": [ + { + "source": "550e8400-e29b-41d4-a716-446655440004", + "target": "550e8400-e29b-41d4-a716-446655440006", + "sourceHandle": "out", + "targetHandle": "in", + "bidirectional": False + } + ], + "outputs": [] +} + +def _curio_node(): + """Create Vega-Lite visualization""" + # input: summary_data (from DATA_SUMMARY) + + vis_spec = { + "$schema": "https://vega.github.io/schema/vega-lite/v5.json", + "description": "A visualization of aggregated data", + "data": {"values": summary_data.to_dict(orient='records')}, + "mark": "bar", + "encoding": { + "x": {"field": "category", "type": "nominal", "title": "Category"}, + "y": {"field": "value_mean", "type": "quantitative", "title": "Mean Value"}, + "color": {"field": "category", "type": "nominal"} + } + } + + return vis_spec + +_curio_output = _curio_node() +``` + +### Template 8: VIS_TABLE + +For table visualizations. + +```python +__trill_node__ = { + "id": "550e8400-e29b-41d4-a716-446655440007", + "type": "VIS_TABLE", + "in": "DEFAULT", + "out": "DEFAULT" +} + +__trill_connections__ = { + "inputs": [ + { + "source": "550e8400-e29b-41d4-a716-446655440004", + "target": "550e8400-e29b-41d4-a716-446655440007", + "sourceHandle": "out", + "targetHandle": "in", + "bidirectional": False + } + ], + "outputs": [] +} + +def _curio_node(): + """Display data as interactive table""" + # input: summary_data (from DATA_SUMMARY) + + # Table visualization automatically handles DataFrame display + # Curio will render it as an interactive table in the notebook + + return summary_data + +_curio_output = _curio_node() +``` + +### Template 9: CONSTANTS + +For defining constant values. + +```python +__trill_node__ = { + "id": "550e8400-e29b-41d4-a716-446655440008", + "type": "CONSTANTS", + "in": "DEFAULT", + "out": "DEFAULT" +} + +__trill_connections__ = { + "inputs": [], + "outputs": [] +} + +def _curio_node(): + """Define constants""" + + const_values = { + "threshold": 25.5, + "categories": ["A", "B", "C"], + "config": { + "date_format": "%Y-%m-%d", + "timezone": "UTC" + } + } + + return const_values + +_curio_output = _curio_node() +``` + +### Template 10: FLOW_SWITCH + +For conditional branching. + +```python +__trill_node__ = { + "id": "550e8400-e29b-41d4-a716-446655440009", + "type": "FLOW_SWITCH", + "in": "DEFAULT", + "out": "DEFAULT" +} + +__trill_connections__ = { + "inputs": [ + { + "source": "550e8400-e29b-41d4-a716-446655440003", + "target": "550e8400-e29b-41d4-a716-446655440009", + "sourceHandle": "out", + "targetHandle": "in", + "bidirectional": False + } + ], + "outputs": [] +} + +def _curio_node(): + """Route data based on conditions""" + # input: result_df (from COMPUTATION_ANALYSIS) + + if result_df['value'].mean() > 50: + output = result_df[result_df['value'] > 50] + condition_met = "high_values" + else: + output = result_df[result_df['value'] <= 50] + condition_met = "low_values" + + return { + "data": output, + "condition": condition_met + } + +_curio_output = _curio_node() +``` + +### Template 11: DATA_EXPORT + +For exporting/saving data. + +```python +__trill_node__ = { + "id": "550e8400-e29b-41d4-a716-446655440010", + "type": "DATA_EXPORT", + "in": "DEFAULT", + "out": "DEFAULT" +} + +__trill_connections__ = { + "inputs": [ + { + "source": "550e8400-e29b-41d4-a716-446655440004", + "target": "550e8400-e29b-41d4-a716-446655440010", + "sourceHandle": "out", + "targetHandle": "in", + "bidirectional": False + } + ], + "outputs": [] +} + +def _curio_node(): + """Export data to file""" + # input: summary_data (from DATA_SUMMARY) + import os + + # Create output directory + output_dir = "output" + os.makedirs(output_dir, exist_ok=True) + + # Export to CSV + export_path = os.path.join(output_dir, "summary_results.csv") + summary_data.to_csv(export_path) + + print(f"Data exported to: {export_path}") + + return export_path + +_curio_output = _curio_node() +``` + +### Template 12: MERGE_FLOW + +For merging multiple data branches. + +```python +__trill_node__ = { + "id": "550e8400-e29b-41d4-a716-446655440011", + "type": "MERGE_FLOW", + "in": "DEFAULT", + "out": "DEFAULT" +} + +__trill_connections__ = { + "inputs": [ + { + "source": "550e8400-e29b-41d4-a716-446655440003", + "target": "550e8400-e29b-41d4-a716-446655440011", + "sourceHandle": "out", + "targetHandle": "in_0", + "bidirectional": False + }, + { + "source": "550e8400-e29b-41d4-a716-446655440004", + "target": "550e8400-e29b-41d4-a716-446655440011", + "sourceHandle": "out", + "targetHandle": "in_1", + "bidirectional": False + } + ], + "outputs": [] +} + +def _curio_node(): + """Merge multiple data streams""" + # input: result_df (from COMPUTATION_ANALYSIS) + # input: summary_data (from DATA_SUMMARY) + import pandas as pd + + # Merge by common index or column + merged = pd.merge( + result_df.reset_index(), + summary_data.reset_index(), + on='category', + how='outer' + ) + + return merged + +_curio_output = _curio_node() +``` + +### Template 13: COMMENTS + +For adding documentation. + +```python +__trill_node__ = { + "id": "550e8400-e29b-41d4-a716-446655440012", + "type": "COMMENTS", + "in": "DEFAULT", + "out": "DEFAULT" +} + +__trill_connections__ = { + "inputs": [], + "outputs": [] +} + +def _curio_node(): + """Add documentation""" + + comment_text = """ + # Analysis Pipeline Documentation + + ## Overview + This pipeline performs weather data analysis across multiple stages: + + 1. **Data Loading**: Import raw weather data from CSV + 2. **Data Cleaning**: Remove duplicates and handle missing values + 3. **Transformation**: Pivot and reshape data for analysis + 4. **Computation**: Calculate statistical metrics + 5. **Summary**: Aggregate by category + 6. **Visualization**: Create interactive charts + + ## Key Metrics + - Mean temperature: {mean_temp}°C + - Std deviation: {std_temp}°C + - Date range: {start_date} to {end_date} + """ + + return comment_text + +_curio_output = _curio_node() +``` + +--- + +## Best Practices + +### 1. Always Use UUID for Node IDs + +```python +import uuid + +node_id = str(uuid.uuid4()) # Correct +# node_id = "custom_name" # Avoid - not a UUID +``` + +### 2. Structure Return Values Consistently + +```python +# Good: Always return a single clear output +def _curio_node(): + result = process(data) + return result # Single return + +# Avoid: Multiple returns or complex unpacking +def _curio_node(): + result1, result2 = process(data) # Can be ambiguous + return result1 +``` + +### 3. Use Explicit Variable Names + +```python +# Good +data_weather = pd.read_csv('weather.csv') +cleaned_weather = data_weather.dropna() +result_stats = cleaned_weather.describe() + +# Avoid +df = pd.read_csv('weather.csv') +df = df.dropna() +r = df.describe() +``` + +### 4. Document Dependencies with Comments + +```python +def _curio_node(): + # input: data_df (from DATA_LOADING node) + # input: const_threshold (from CONSTANTS node) + + filtered = data_df[data_df['value'] > const_threshold] + return filtered +``` + +### 5. Keep Cells Focused + +```python +# Good: Single responsibility +def _curio_node(): + # Just clean the data + df_clean = df.drop_duplicates() + df_clean = df_clean.fillna(method='ffill') + return df_clean + +# Avoid: Multiple concerns +def _curio_node(): + # Multiple transformations mixed together + df_clean = df.drop_duplicates() + df_clean = df_clean.fillna(method='ffill') + summary = df_clean.describe() + visualization = create_chart(summary) + save_file(visualization) + return df_clean +``` + +### 6. Test in Jupyter Before Converting + +1. Write and test your code in a regular Jupyter notebook +2. Once working, add the metadata and wrapping +3. Export and import to verify conversion works + +--- + +## Troubleshooting + +### Issue: "Node not recognized in import" + +**Problem**: Metadata is missing or malformed + +**Solution**: Ensure both `__trill_node__` and `__trill_connections__` are present: + +```python +# Must have both dictionaries +__trill_node__ = { ... } +__trill_connections__ = { ... } +``` + +### Issue: "Dependencies not automatically detected" + +**Problem**: Variable dependencies not wired + +**Solution**: +- Use clear naming conventions: `data_*`, `result_*` +- Add input comments: `# input: variable_name` +- Explicitly define connections in `__trill_connections__` + +### Issue: "Visualization not rendered" + +**Problem**: VIS_VEGA spec not converted correctly + +**Solution**: +- Ensure the function returns a valid Vega-Lite spec dictionary +- Must include `$schema` key +- Check Vega-Lite v5 documentation for valid encoding + +### Issue: "Error wrapping code in `_curio_node()`" + +**Problem**: Code structure incompatible with function wrapping + +**Solution**: +- Avoid module-level statements (imports should be inside the function) +- Don't reference global state +- Ensure all dependencies are passed as parameters or defined locally + +--- + +## Complete Example Notebook Structure + +See [docs/examples/notebooks](../examples/notebooks/) for complete working examples: + +- `example10-original-notebook-W.ipynb` - DATA_LOADING example +- `example3-original-notebook-NW.ipynb` - Multi-node workflow +- `example5-original-notebook-W.ipynb` - Complete pipeline + +Screenshots available in [docs/examples/notebooks/screenshots](../examples/notebooks/screenshots/) + +--- + +## Additional Resources + +- [NotebookConvertor API](../../utk_curio/frontend/urban-workflows/src/notebook-convertor/README.md) +- [Vega-Lite Specification](https://vega.github.io/vega-lite/) +- [UTK Documentation](https://urbantk.org) +- [Curio Main Documentation](../documentation.md) + +--- + +**Last Updated**: April 28, 2026 +**Version**: 1.0 +**Status**: Complete diff --git a/utk_curio/frontend/urban-workflows/src/notebook-convertor/README.md b/utk_curio/frontend/urban-workflows/src/notebook-convertor/README.md new file mode 100644 index 00000000..544a843b --- /dev/null +++ b/utk_curio/frontend/urban-workflows/src/notebook-convertor/README.md @@ -0,0 +1,559 @@ +# Notebook Convertor: Bidirectional Jupyter-Curio Conversion + +## Problem Statement + +### The Challenge + +Data scientists and urban analysts face a common workflow friction: + +1. **Jupyter Notebooks** are great for: + - Exploratory data analysis + - Interactive development + - Quick prototyping + - Sharing analysis with colleagues + +2. **Curio Workflows** are great for: + - Visual workflow organization + - Provenance tracking + - Collaborative design + - Interactive parameter adjustment + - Non-linear branching (what-if analysis) + +3. **The Problem**: These two environments are siloed + - Work in Jupyter, then manually recreate in Curio + - Work in Curio, but can't easily share/run in Jupyter + - No automatic dependency detection + - Loss of context in conversion + +### The Solution + +**Notebook Convertor** provides **bidirectional conversion** between Jupyter notebooks and Curio workflows: + +``` +Jupyter Notebook ←→ Curio Workflow + .ipynb ←→ JSON Spec +``` + +This enables: +- ✅ Rapid prototyping in Jupyter +- ✅ Import into Curio for collaborative refinement +- ✅ Export from Curio for sharing/archival +- ✅ Automatic dependency detection (AST-based) +- ✅ Full provenance tracking +- ✅ Reusable workflow templates + +--- + +## Architecture Overview + +### How It Works + +``` +Input Notebook + ↓ +┌─────────────────────────────────────┐ +│ Cell Analysis Phase │ +│ - Parse notebook cells │ +│ - Extract __trill_node__ metadata │ +│ - Analyze variable dependencies │ +│ - Detect visualization specs │ +└─────────────────────────────────────┘ + ↓ +┌─────────────────────────────────────┐ +│ Graph Construction Phase │ +│ - Build dependency graph │ +│ - Infer edges from variables │ +│ - Create node connections │ +│ - Topological sorting │ +└─────────────────────────────────────┘ + ↓ +┌─────────────────────────────────────┐ +│ Conversion Phase │ +│ - Create TrillSpec nodes │ +│ - Generate TrillSpec edges │ +│ - Add metadata │ +│ - Validate structure │ +└─────────────────────────────────────┘ + ↓ +Output: Curio Workflow (JSON) +``` + +### Key Components + +**NotebookConvertor.ts** (Main converter class) +- `notebookToTrill()`: Jupyter → Curio +- `trillToNotebook()`: Curio → Jupyter +- Node type inference +- Metadata extraction + +**Supporting Modules**: +- `grammarDetection.ts`: Detect Vega-Lite/UTK specs +- `importEdges.ts`: Build dependency graph +- `codegen.ts`: Generate notebook code from nodes +- `parsing.ts`: Parse and extract code elements +- `types.ts`: TypeScript interfaces +- `graph.ts`: Graph operations (topological sort, etc.) + +--- + +## Data Instructions + +### Supported Data Sources + +The notebook convertor works with data from various sources: + +#### 1. Example Jupyter Notebooks + +Available in `docs/examples/notebooks/`: + +``` +docs/examples/notebooks/ +├── example10-original-notebook-W.ipynb # Green Roofs example +├── example3-original-notebook-NW.ipynb # Multi-node workflow +├── example5-original-notebook-W.ipynb # Weather analysis +├── example7-original-notebook-W.ipynb # Speed camera analysis +├── example8-original-notebook-W.ipynb # Traffic violations +├── example9-original-notebook-W.ipynb # Energy efficiency +└── screenshots/ # Visual examples + ├── conversion-process.png + ├── metadata-structure.png + ├── workflow-output.png + └── ... +``` + +#### 2. Sample Datasets + +Reference datasets available in `data/datasets/`: + +``` +data/datasets/ +├── Milan_22.07.2022_Weather_File_UMEP_CSV.csv # Weather data +├── R03_21-11_WGS84_P_SocioDemographics_MILANO_Selected.shp* # Demographics +├── Census2020_BlockGroups.shp* # Census blocks +└── CitySurfaces_weights/ # ML model weights +``` + +#### 3. Expected Data Format + +**CSV Files**: +``` +filename,temperature,humidity,date +sensor1,25.5,45,2024-01-01 +sensor2,26.0,42,2024-01-01 +``` + +**Shapefiles** (requires .shp, .shx, .dbf, .prj): +``` +import geopandas as gpd +gdf = gpd.read_file('data.shp') +``` + +**GeoJSON**: +``` +import json +with open('data.geojson') as f: + geojson_data = json.load(f) +``` + +#### 4. Loading Data in Templates + +```python +def _curio_node(): + """Load data from docs/examples/data/""" + import pandas as pd + import os + + # Reference relative to Curio root directory + data_path = 'docs/examples/data/Green_Roofs.csv' + + if os.path.exists(data_path): + data_df = pd.read_csv(data_path) + else: + # Fallback + data_df = pd.DataFrame() + + return data_df +``` + +--- + +## Usage Steps + +### Step 1: Choose Your Approach + +#### Option A: Start with Jupyter Notebook + +**When to use**: If you have existing analysis in Jupyter + +1. **Structure your notebook** using templates from [IPYNB_USAGE.md](../IPYNB_USAGE.md) + - Use the provided templates for each node type + - Add `__trill_node__` and `__trill_connections__` metadata + - Wrap code in `_curio_node()` function + +2. **Example**: Basic data analysis notebook + +```python +# Cell 1: Load data +__trill_node__ = { + "id": "550e8400-e29b-41d4-a716-446655440000", + "type": "DATA_LOADING", + "in": "DEFAULT", + "out": "DEFAULT" +} + +__trill_connections__ = {"inputs": [], "outputs": []} + +def _curio_node(): + import pandas as pd + data_df = pd.read_csv('docs/examples/data/Green_Roofs.csv') + return data_df + +_curio_output = _curio_node() +``` + +3. **Import to Curio**: + - Go to Curio Projects page + - Click "Import Notebook" + - Select your .ipynb file + - Workflow automatically created! + +#### Option B: Start with Curio Workflow + +**When to use**: If building visual workflows in Curio interface + +1. **Design your workflow** in Curio UI +2. **Execute and validate** +3. **Export as notebook**: + - Menu → "Export as notebook" + - Browser downloads .ipynb file + - Can now share or modify in Jupyter + +### Step 2: Follow Node Type Templates + +See [IPYNB_USAGE.md - Node Templates](../IPYNB_USAGE.md#node-templates) for: + +| Node Type | Template | Example | +|-----------|----------|---------| +| DATA_LOADING | Load CSV/API | `pd.read_csv()` | +| DATA_CLEANING | Remove duplicates, handle NaN | `.dropna()` | +| DATA_TRANSFORMATION | Reshape data | `.pivot_table()` | +| COMPUTATION_ANALYSIS | Calculate metrics | `.groupby().agg()` | +| DATA_SUMMARY | Aggregate results | `.describe()` | +| VIS_VEGA | Create charts | Vega-Lite JSON | +| VIS_TABLE | Display table | Return DataFrame | +| VIS_TEXT | Display text | Return string | +| And 8 more... | See templates | See IPYNB_USAGE.md | + +### Step 3: Use Naming Conventions + +From [IPYNB_USAGE.md - Naming Conventions](../IPYNB_USAGE.md#naming-conventions): + +```python +# Good variable naming for automatic detection +data_weather = pd.read_csv('file.csv') # data_* +cleaned_weather = data_weather.dropna() # cleaned_* +result_stats = cleaned_weather.describe() # result_* +summary_avg = result_stats.mean() # summary_* +``` + +### Step 4: Verify Conversion + +#### For Notebook → Curio: + +```python +from utk_curio.frontend.urban_workflows.src.NotebookConvertor import TrillNotebookConverter +import json + +# Load and convert +with open('your_notebook.ipynb') as f: + notebook = json.load(f) + +converter = TrillNotebookConverter() +result = converter.notebookToTrill(notebook) + +# Check for warnings +if result.get('warnings'): + print("Warnings:", result['warnings']) + +# Examine converted workflow +spec = result['trillSpec'] +print(f"Nodes: {len(spec['dataflow']['nodes'])}") +print(f"Edges: {len(spec['dataflow']['edges'])}") +``` + +#### For Curio → Notebook: + +```python +# Export from Curio UI +# Menu → "Export as notebook" → Downloads .ipynb + +# Verify in Jupyter +jupyter notebook exported_workflow.ipynb +``` + +### Step 5: Test and Iterate + +1. **Import into Curio** and execute nodes +2. **Check for execution errors** in node outputs +3. **Adjust dependencies** if needed +4. **Re-export** to update notebook + +--- + +## Complete Working Examples + +### Example 1: Simple Data Analysis + +**File**: `docs/examples/notebooks/example10-original-notebook-W.ipynb` + +**Structure**: +``` +Load CSV → Clean Data → Summarize → Visualize +``` + +**Data**: Green Roofs dataset +**Output**: Summary statistics and charts + +**Screenshots**: [docs/examples/notebooks/screenshots/](../examples/notebooks/screenshots/) + +### Example 2: Multi-Stage Processing + +**File**: `docs/examples/notebooks/example3-original-notebook-NW.ipynb` + +**Structure**: +``` +Load Weather → Transform → Compute Metrics → Merge with Demographics → Visualize +``` + +**Data**: Milan weather + socio-demographic data +**Output**: Interactive map and time series + +### Example 3: What-If Analysis + +**File**: `docs/examples/notebooks/example5-original-notebook-W.ipynb` + +**Structure**: +``` +Load Data → Branch → Scenario A Analysis ⟶ Comparison + ⟶ Scenario B Analysis ⟶ +``` + +**Data**: Energy efficiency scenarios +**Output**: Comparison visualizations + +--- + +## Visual References + +See screenshots for visual examples of: + +1. **Metadata Structure** - How `__trill_node__` appears in notebooks +2. **Conversion Process** - Step-by-step transformation +3. **Workflow Output** - How converted workflows look in Curio +4. **Node Templates** - Example template structure + +**Location**: [docs/examples/notebooks/screenshots/](../examples/notebooks/screenshots/) + +To view: +```bash +# Open in image viewer +open docs/examples/notebooks/screenshots/ +``` + +--- + +## Conversion Details + +### Node Type Inference + +When `type` is not explicitly specified, Curio infers from code patterns: + +```python +# Automatically detected as DATA_LOADING +import pandas as pd +df = pd.read_csv('file.csv') + +# Automatically detected as VIS_UTK +markers = ["grammar =", "utk_spec =", "utk_grammar ="] +``` + +### Metadata Preservation + +**Metadata that's preserved**: +- ✅ Node content (code) +- ✅ Node types +- ✅ Variable dependencies +- ✅ Connection information +- ✅ Node positions (auto-layouted) + +**Metadata that's recreated**: +- 🔄 Execution state (resets on import) +- 🔄 Cell outputs (not stored in Curio) +- 🔄 Notebook styling (Curio has own styling) + +### Dependency Detection + +Uses AST (Abstract Syntax Tree) analysis: + +```python +# Cell 1: Define variable +df = pd.read_csv('data.csv') + +# Cell 2: Uses variable - automatically wired! +filtered = df[df['value'] > 10] +``` + +Result: Automatic edge from Cell 1 → Cell 2 + +--- + +## API Reference + +### TrillNotebookConverter Class + +```typescript +class TrillNotebookConverter { + // Convert Jupyter → Curio + notebookToTrill(notebook: Notebook): { + trillSpec: TrillSpec; + warnings?: string[]; + } + + // Convert Curio → Jupyter + trillToNotebook(trillJson: TrillSpec): Notebook +} +``` + +### Usage Example + +```typescript +import { TrillNotebookConverter } from './NotebookConvertor'; + +const converter = new TrillNotebookConverter(); + +// Import notebook to workflow +const result = converter.notebookToTrill(notebookJson); +if (result.warnings) { + console.warn(result.warnings); +} +const workflow = result.trillSpec; + +// Export workflow to notebook +const notebook = converter.trillToNotebook(workflow); +``` + +--- + +## Troubleshooting + +### Issue: "No Trill metadata detected" + +**Cause**: Notebook cells don't have `__trill_node__` metadata + +**Solution**: +1. Add metadata to each cell (see templates in [IPYNB_USAGE.md](../IPYNB_USAGE.md)) +2. Ensure UUID format for node IDs +3. Use explicit `__trill_connections__` dictionary + +**Example**: +```python +__trill_node__ = { + "id": "550e8400-e29b-41d4-a716-446655440000", + "type": "DATA_LOADING", + "in": "DEFAULT", + "out": "DEFAULT" +} + +__trill_connections__ = {"inputs": [], "outputs": []} +``` + + +## Best Practices + +### 1. Start Simple +- Begin with single-node workflows +- Verify import/export works +- Then add complexity + +### 2. Use Templates +- Copy templates from [IPYNB_USAGE.md](../IPYNB_USAGE.md) +- Follow naming conventions strictly +- Include all metadata fields + +### 3. Test Both Directions +``` +notebook.ipynb → Curio import → export → notebook2.ipynb → compare +``` + +### 4. Version Control +```bash +# Track both formats +git add notebook.ipynb +git add workflow.json +git add IPYNB_USAGE.md +``` + +### 5. Document Dependencies +```python +def _curio_node(): + # Inputs from upstream nodes + # input: data_weather (from DATA_LOADING) + # input: const_threshold (from CONSTANTS) + + # Your code here + + # Output for downstream nodes + return result +``` + +--- + +## References + +### Templates & Guides +- **Templates**: [IPYNB_USAGE.md](../IPYNB_USAGE.md) - 13 ready-to-use node templates +- **Naming Conventions**: [IPYNB_USAGE.md#naming-conventions](../IPYNB_USAGE.md#naming-conventions) +- **Best Practices**: [IPYNB_USAGE.md#best-practices](../IPYNB_USAGE.md#best-practices) + +### Examples +- **Notebook Examples**: [docs/examples/notebooks/](../examples/notebooks/) + - `example10-original-notebook-W.ipynb` + - `example3-original-notebook-NW.ipynb` + - `example5-original-notebook-W.ipynb` + +- **Screenshots**: [docs/examples/notebooks/screenshots/](../examples/notebooks/screenshots/) + - Conversion workflow visualization + - Metadata structure examples + - Output examples + +### External Resources +- **Vega-Lite**: https://vega.github.io/vega-lite/ +- **UTK (Urban Toolkit)**: https://urbantk.org +- **Jupyter Notebook Format**: https://nbformat.readthedocs.io/ +- **Python AST**: https://docs.python.org/3/library/ast.html + +### Related Documentation +- [Curio Main Documentation](../documentation.md) +- [Curio Architecture](../ARCHITECTURE.md) +- [Contributing Guide](../CONTRIBUTIONS.md) + +--- + +## Summary + +| Operation | Input | Output | Time | Link | +|-----------|-------|--------|------|------| +| Import to Curio | .ipynb | Workflow JSON | ~1s | See Step 1 | +| Export from Curio | Workflow JSON | .ipynb | ~1s | See Step 1 | +| Auto-detect nodes | Notebook code | Node types | ~100ms | Auto inference | +| Extract dependencies | Cell code | Edges/wiring | ~100ms | AST analysis | +| Full round-trip | .ipynb | .ipynb | ~2s | See Step 4 | + +--- + +## Contributors + +- Jaideep Nutalapati +- Vamsi Dath Meka + +--- \ No newline at end of file From 6010310cfdf980b3297b06d42a15a6f0026473c2 Mon Sep 17 00:00:00 2001 From: Vamsi Dath Date: Sun, 3 May 2026 15:02:03 -0500 Subject: [PATCH 13/13] updated notebook examples for existing curio example dataflows --- .../notebooks/example1-notebook.ipynb | 120 +++++++++ .../notebooks/example10-notebook.ipynb | 80 ++++++ .../notebooks/example11-notebook.ipynb | 232 ++++++++++++++++++ .../notebooks/example2-notebook.ipynb | 120 +++++++++ .../notebooks/example3-notebook.ipynb | 120 +++++++++ .../notebooks/example4-notebook.ipynb | 64 +++++ .../notebooks/example5-notebook.ipynb | 48 ++++ .../notebooks/example7-notebook.ipynb | 40 +++ .../notebooks/example8-notebook.ipynb | 208 ++++++++++++++++ .../notebooks/example9-notebook.ipynb | 40 +++ ...=> v1-example10-original-notebook-W.ipynb} | 0 ...=> v1-example11-original-notebook-W.ipynb} | 0 ...=> v1-example3-original-notebook-NW.ipynb} | 0 ... => v1-example5-original-notebook-W.ipynb} | 0 ... => v1-example7-original-notebook-W.ipynb} | 0 ... => v1-example8-original-notebook-W.ipynb} | 0 ... => v1-example9-original-notebook-W.ipynb} | 0 17 files changed, 1072 insertions(+) create mode 100644 docs/examples/notebooks/example1-notebook.ipynb create mode 100644 docs/examples/notebooks/example10-notebook.ipynb create mode 100644 docs/examples/notebooks/example11-notebook.ipynb create mode 100644 docs/examples/notebooks/example2-notebook.ipynb create mode 100644 docs/examples/notebooks/example3-notebook.ipynb create mode 100644 docs/examples/notebooks/example4-notebook.ipynb create mode 100644 docs/examples/notebooks/example5-notebook.ipynb create mode 100644 docs/examples/notebooks/example7-notebook.ipynb create mode 100644 docs/examples/notebooks/example8-notebook.ipynb create mode 100644 docs/examples/notebooks/example9-notebook.ipynb rename docs/examples/notebooks/{example10-original-notebook-W.ipynb => v1-example10-original-notebook-W.ipynb} (100%) rename docs/examples/notebooks/{example11-original-notebook-W.ipynb => v1-example11-original-notebook-W.ipynb} (100%) rename docs/examples/notebooks/{example3-original-notebook-NW.ipynb => v1-example3-original-notebook-NW.ipynb} (100%) rename docs/examples/notebooks/{example5-original-notebook-W.ipynb => v1-example5-original-notebook-W.ipynb} (100%) rename docs/examples/notebooks/{example7-original-notebook-W.ipynb => v1-example7-original-notebook-W.ipynb} (100%) rename docs/examples/notebooks/{example8-original-notebook-W.ipynb => v1-example8-original-notebook-W.ipynb} (100%) rename docs/examples/notebooks/{example9-original-notebook-W.ipynb => v1-example9-original-notebook-W.ipynb} (100%) diff --git a/docs/examples/notebooks/example1-notebook.ipynb b/docs/examples/notebooks/example1-notebook.ipynb new file mode 100644 index 00000000..e1f531fa --- /dev/null +++ b/docs/examples/notebooks/example1-notebook.ipynb @@ -0,0 +1,120 @@ +{ + "cells": [ + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"c80722e7-8960-4308-8f1d-90b121acf126\",\n \"type\": \"DATA_LOADING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [],\n \"outputs\": [\n {\n \"id\": \"edge_in_0_c80722e7_8960_4308_8f1d_90b121acf126_289b82c8_734e_4c66_a01d_9b0097f2a7bf\",\n \"source\": \"c80722e7-8960-4308-8f1d-90b121acf126\",\n \"target\": \"289b82c8-734e-4c66-a01d-9b0097f2a7bf\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in_0\"\n },\n {\n \"id\": \"edge_in_0_c80722e7_8960_4308_8f1d_90b121acf126_964b380b_559b_4fa3_a9f7_4dcb12878504\",\n \"source\": \"c80722e7-8960-4308-8f1d-90b121acf126\",\n \"target\": \"964b380b-559b-4fa3-a9f7-4dcb12878504\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in_0\"\n }\n ]\n}\n\ndef _curio_node():\n\n import rasterio\n timestamp = 12\n src = rasterio.open(f'milan/Milan_Tmrt_2022_203_{timestamp:02d}00D.tif')\n\n return src\n\n_curio_output = _curio_node()\n\ntry:\n data_c80722e7_8960_4308_8f1d_90b121acf126 = _curio_output\nexcept NameError:\n data_c80722e7_8960_4308_8f1d_90b121acf126 = None\n", + "metadata": { + "id": "c80722e7-8960-4308-8f1d-90b121acf126", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"48a522cd-b1d7-4947-98c2-fb50e07f1d82\",\n \"type\": \"DATA_LOADING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [],\n \"outputs\": [\n {\n \"id\": \"edge_in_1_48a522cd_b1d7_4947_98c2_fb50e07f1d82_289b82c8_734e_4c66_a01d_9b0097f2a7bf\",\n \"source\": \"48a522cd-b1d7-4947-98c2-fb50e07f1d82\",\n \"target\": \"289b82c8-734e-4c66-a01d-9b0097f2a7bf\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in_1\"\n }\n ]\n}\n\ndef _curio_node():\n\n import pandas as pd\n sensor = pd.read_csv('milan/Milan_22.07.2022_Weather_File_UMEP_CSV.csv', delimiter=';')\n return sensor\n\n_curio_output = _curio_node()\n\ntry:\n data_48a522cd_b1d7_4947_98c2_fb50e07f1d82 = _curio_output\nexcept NameError:\n data_48a522cd_b1d7_4947_98c2_fb50e07f1d82 = None\n", + "metadata": { + "id": "48a522cd-b1d7-4947-98c2-fb50e07f1d82", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"289b82c8-734e-4c66-a01d-9b0097f2a7bf\",\n \"type\": \"MERGE_FLOW\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"id\": \"edge_in_0_c80722e7_8960_4308_8f1d_90b121acf126_289b82c8_734e_4c66_a01d_9b0097f2a7bf\",\n \"source\": \"c80722e7-8960-4308-8f1d-90b121acf126\",\n \"target\": \"289b82c8-734e-4c66-a01d-9b0097f2a7bf\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in_0\"\n },\n {\n \"id\": \"edge_in_1_48a522cd_b1d7_4947_98c2_fb50e07f1d82_289b82c8_734e_4c66_a01d_9b0097f2a7bf\",\n \"source\": \"48a522cd-b1d7-4947-98c2-fb50e07f1d82\",\n \"target\": \"289b82c8-734e-4c66-a01d-9b0097f2a7bf\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in_1\"\n }\n ],\n \"outputs\": [\n {\n \"source\": \"289b82c8-734e-4c66-a01d-9b0097f2a7bf\",\n \"target\": \"ea821796-827b-47ac-8104-4bfb4dc5eb63\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n\n inputs = [\n data_c80722e7_8960_4308_8f1d_90b121acf126,\n data_48a522cd_b1d7_4947_98c2_fb50e07f1d82\n ]\n\n merged_inputs = [i for i in inputs if i is not None]\n\n return merged_inputs\n\n\n_curio_output = _curio_node()\n\ntry:\n merged_289b82c8_734e_4c66_a01d_9b0097f2a7bf = _curio_output\nexcept NameError:\n merged_289b82c8_734e_4c66_a01d_9b0097f2a7bf = None\n", + "metadata": { + "id": "289b82c8-734e-4c66-a01d-9b0097f2a7bf", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"ea821796-827b-47ac-8104-4bfb4dc5eb63\",\n \"type\": \"COMPUTATION_ANALYSIS\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"289b82c8-734e-4c66-a01d-9b0097f2a7bf\",\n \"target\": \"ea821796-827b-47ac-8104-4bfb4dc5eb63\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": [\n {\n \"id\": \"edge_in_1_ea821796_827b_47ac_8104_4bfb4dc5eb63_964b380b_559b_4fa3_a9f7_4dcb12878504\",\n \"source\": \"ea821796-827b-47ac-8104-4bfb4dc5eb63\",\n \"target\": \"964b380b-559b-4fa3-a9f7-4dcb12878504\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in_1\"\n }\n ]\n}\n\ndef _curio_node():\n\n input_0 = merged_289b82c8_734e_4c66_a01d_9b0097f2a7bf\n arg = input_0\n\n # Step 4 — Compute UTCI\n\n import numpy as np\n from pythermalcomfort import models\n from rasterio.warp import Resampling\n\n\n src = arg[0]\n sensor = arg[1]\n\n timestamp = 12\n\n upscale_factor = 0.25\n dataset = src\n data = dataset.read(\n out_shape=(\n dataset.count,\n int(dataset.height * upscale_factor),\n int(dataset.width * upscale_factor),\n ),\n resampling=Resampling.nearest,\n masked=True,\n )\n data = data.astype(float)\n data.data[data.data == src.nodatavals[0]] = np.nan\n\n sensor_filtered = sensor[sensor[\"it\"] == timestamp]\n tdb = float(sensor_filtered[\"Td\"].values[0])\n v = float(sensor_filtered[\"Wind\"].values[0])\n rh = float(sensor_filtered[\"RH\"].values[0])\n\n # pythermalcomfort returns a UTCI result object; extract numeric grid explicitly\n utci_result = models.utci(tdb=tdb, tr=data[0], v=v, rh=rh, units=\"SI\")\n utci_grid = np.asarray(getattr(utci_result, \"utci\", utci_result), dtype=float)\n\n # Ensure 2D array for rasterstats\n if utci_grid.ndim == 3 and utci_grid.shape[0] == 1:\n utci_grid = utci_grid[0]\n if utci_grid.ndim != 2:\n raise ValueError(f\"UTCI must be 2D, got shape={utci_grid.shape}, ndim={utci_grid.ndim}\")\n\n utci_list = utci_grid.tolist()\n utci_shape = [utci_grid.shape[1], utci_grid.shape[0]] # [width, height]\n\n # print(f\"UTCI grid shape: {utci_shape}, dtype={utci_grid.dtype}\")\n # utci_list\n\n return (utci_list, utci_shape)\n\n\n\n_curio_output = _curio_node()\n\ntry:\n result_ea821796_827b_47ac_8104_4bfb4dc5eb63 = _curio_output\nexcept NameError:\n result_ea821796_827b_47ac_8104_4bfb4dc5eb63 = None\n", + "metadata": { + "id": "ea821796-827b-47ac-8104-4bfb4dc5eb63", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"4b7c2234-d107-4531-b890-5c582efbe444\",\n \"type\": \"DATA_LOADING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [],\n \"outputs\": [\n {\n \"id\": \"edge_in_2_4b7c2234_d107_4531_b890_5c582efbe444_964b380b_559b_4fa3_a9f7_4dcb12878504\",\n \"source\": \"4b7c2234-d107-4531-b890-5c582efbe444\",\n \"target\": \"964b380b-559b-4fa3-a9f7-4dcb12878504\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in_2\"\n }\n ]\n}\n\ndef _curio_node():\n\n import geopandas as gpd\n gdf = gpd.read_file('milan/R03_21-11_WGS84_P_SocioDemographics_MILANO_Selected.shp')\n return gdf\n\n_curio_output = _curio_node()\n\ntry:\n data_4b7c2234_d107_4531_b890_5c582efbe444 = _curio_output\nexcept NameError:\n data_4b7c2234_d107_4531_b890_5c582efbe444 = None\n", + "metadata": { + "id": "4b7c2234-d107-4531-b890-5c582efbe444", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"964b380b-559b-4fa3-a9f7-4dcb12878504\",\n \"type\": \"MERGE_FLOW\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"id\": \"edge_in_1_ea821796_827b_47ac_8104_4bfb4dc5eb63_964b380b_559b_4fa3_a9f7_4dcb12878504\",\n \"source\": \"ea821796-827b-47ac-8104-4bfb4dc5eb63\",\n \"target\": \"964b380b-559b-4fa3-a9f7-4dcb12878504\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in_1\"\n },\n {\n \"id\": \"edge_in_0_c80722e7_8960_4308_8f1d_90b121acf126_964b380b_559b_4fa3_a9f7_4dcb12878504\",\n \"source\": \"c80722e7-8960-4308-8f1d-90b121acf126\",\n \"target\": \"964b380b-559b-4fa3-a9f7-4dcb12878504\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in_0\"\n },\n {\n \"id\": \"edge_in_2_4b7c2234_d107_4531_b890_5c582efbe444_964b380b_559b_4fa3_a9f7_4dcb12878504\",\n \"source\": \"4b7c2234-d107-4531-b890-5c582efbe444\",\n \"target\": \"964b380b-559b-4fa3-a9f7-4dcb12878504\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in_2\"\n }\n ],\n \"outputs\": [\n {\n \"source\": \"964b380b-559b-4fa3-a9f7-4dcb12878504\",\n \"target\": \"cdb373b6-1b35-47db-bfd7-fe4ae1e9efdc\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n\n inputs = [\n result_ea821796_827b_47ac_8104_4bfb4dc5eb63,\n data_c80722e7_8960_4308_8f1d_90b121acf126,\n data_4b7c2234_d107_4531_b890_5c582efbe444\n ]\n\n merged_inputs = [i for i in inputs if i is not None]\n\n return merged_inputs\n\n\n_curio_output = _curio_node()\n\ntry:\n merged_964b380b_559b_4fa3_a9f7_4dcb12878504 = _curio_output\nexcept NameError:\n merged_964b380b_559b_4fa3_a9f7_4dcb12878504 = None\n", + "metadata": { + "id": "964b380b-559b-4fa3-a9f7-4dcb12878504", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"cdb373b6-1b35-47db-bfd7-fe4ae1e9efdc\",\n \"type\": \"COMPUTATION_ANALYSIS\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"964b380b-559b-4fa3-a9f7-4dcb12878504\",\n \"target\": \"cdb373b6-1b35-47db-bfd7-fe4ae1e9efdc\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": [\n {\n \"source\": \"cdb373b6-1b35-47db-bfd7-fe4ae1e9efdc\",\n \"target\": \"3cc268ee-ec4b-4921-ac69-6d0bb3691cb4\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n input_0 = merged_964b380b_559b_4fa3_a9f7_4dcb12878504\n arg = input_0\n\n # Step 5 — Zonal Statistics\n\n from rasterstats import zonal_stats\n import numpy as np\n\n dataset = arg[0]\n utci_list = arg[1][0]\n utci_shape = arg[1][1]\n gdf = arg[2]\n\n utci = np.asarray(utci_list, dtype=float)\n shape = utci_shape\n\n if utci.ndim != 2:\n raise ValueError(f\"Expected 2D UTCI array, got shape={utci.shape}, ndim={utci.ndim}\")\n\n transform = dataset.transform * dataset.transform.scale(\n (dataset.width / shape[0]),\n (dataset.height / shape[1]),\n )\n\n # Avoid nodata warning and make nodata explicit\n nodata_value = -999.0\n utci_for_stats = np.where(np.isnan(utci), nodata_value, utci)\n\n joined = zonal_stats(\n gdf,\n utci_for_stats,\n stats=[\"min\", \"max\", \"mean\", \"median\"],\n affine=transform,\n nodata=nodata_value,\n )\n\n gdf[\"mean\"] = [d[\"mean\"] for d in joined]\n result = gdf.loc[:, [gdf.geometry.name, \"mean\", \"gt_65\"]]\n\n\n return result\n\n\n_curio_output = _curio_node()\n\ntry:\n result_cdb373b6_1b35_47db_bfd7_fe4ae1e9efdc = _curio_output\nexcept NameError:\n result_cdb373b6_1b35_47db_bfd7_fe4ae1e9efdc = None\n", + "metadata": { + "id": "cdb373b6-1b35-47db-bfd7-fe4ae1e9efdc", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"3cc268ee-ec4b-4921-ac69-6d0bb3691cb4\",\n \"type\": \"DATA_CLEANING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"cdb373b6-1b35-47db-bfd7-fe4ae1e9efdc\",\n \"target\": \"3cc268ee-ec4b-4921-ac69-6d0bb3691cb4\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": [\n {\n \"source\": \"3cc268ee-ec4b-4921-ac69-6d0bb3691cb4\",\n \"target\": \"e1cf73df-4713-46e8-95d3-ec677378e125\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n input_0 = result_cdb373b6_1b35_47db_bfd7_fe4ae1e9efdc\n arg = input_0\n\n import geopandas as gpd\n\n gdf = arg\n\n filtered_gdf = gdf.set_crs(32632)\n filtered_gdf = filtered_gdf.to_crs(3395)\n\n filtered_gdf = filtered_gdf[filtered_gdf['mean']>0]\n\n filtered_gdf.metadata = {\n 'name': 'census'\n }\n\n return filtered_gdf\n\n\n_curio_output = _curio_node()\n\ntry:\n result_3cc268ee_ec4b_4921_ac69_6d0bb3691cb4 = _curio_output\nexcept NameError:\n result_3cc268ee_ec4b_4921_ac69_6d0bb3691cb4 = None\n", + "metadata": { + "id": "3cc268ee-ec4b-4921-ac69-6d0bb3691cb4", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"e1cf73df-4713-46e8-95d3-ec677378e125\",\n \"type\": \"DATA_POOL\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"3cc268ee-ec4b-4921-ac69-6d0bb3691cb4\",\n \"target\": \"e1cf73df-4713-46e8-95d3-ec677378e125\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": [\n {\n \"source\": \"e1cf73df-4713-46e8-95d3-ec677378e125\",\n \"target\": \"6c4aa6a8-45eb-480e-bb3d-3fd54d13325b\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n },\n {\n \"source\": \"e1cf73df-4713-46e8-95d3-ec677378e125\",\n \"target\": \"3334485c-50ad-4adf-9574-45f8a9704860\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n },\n {\n \"source\": \"e1cf73df-4713-46e8-95d3-ec677378e125\",\n \"target\": \"35bd75ce-005e-40e1-9172-7caef9ccf046\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n },\n {\n \"source\": \"e1cf73df-4713-46e8-95d3-ec677378e125\",\n \"target\": \"6c4aa6a8-45eb-480e-bb3d-3fd54d13325b\",\n \"sourceHandle\": \"in/out\",\n \"targetHandle\": \"in/out\",\n \"type\": \"Interaction\"\n }\n ]\n}\n\ndef _curio_node():\n\n\n return result_3cc268ee_ec4b_4921_ac69_6d0bb3691cb4\n\n\n_curio_output = _curio_node()\n\ntry:\n pool_e1cf73df_4713_46e8_95d3_ec677378e125 = _curio_output\n pool_e1cf73df_4713_46e8_95d3_ec677378e125_in_out = pool_e1cf73df_4713_46e8_95d3_ec677378e125\nexcept NameError:\n pool_e1cf73df_4713_46e8_95d3_ec677378e125 = None\n pool_e1cf73df_4713_46e8_95d3_ec677378e125_in_out = None\n", + "metadata": { + "id": "e1cf73df-4713-46e8-95d3-ec677378e125", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"35bd75ce-005e-40e1-9172-7caef9ccf046\",\n \"type\": \"DATA_CLEANING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"e1cf73df-4713-46e8-95d3-ec677378e125\",\n \"target\": \"35bd75ce-005e-40e1-9172-7caef9ccf046\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": [\n {\n \"source\": \"35bd75ce-005e-40e1-9172-7caef9ccf046\",\n \"target\": \"bcded943-0536-4d5b-adeb-e51dfc28afba\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n input_0 = pool_e1cf73df_4713_46e8_95d3_ec677378e125\n arg = input_0\n\n gdf = arg\n return gdf.loc[:, [\"gt_65\"]]\n\n\n_curio_output = _curio_node()\n\ntry:\n result_35bd75ce_005e_40e1_9172_7caef9ccf046 = _curio_output\nexcept NameError:\n result_35bd75ce_005e_40e1_9172_7caef9ccf046 = None\n", + "metadata": { + "id": "35bd75ce-005e-40e1-9172-7caef9ccf046", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"6c4aa6a8-45eb-480e-bb3d-3fd54d13325b\",\n \"type\": \"VIS_UTK\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"e1cf73df-4713-46e8-95d3-ec677378e125\",\n \"target\": \"6c4aa6a8-45eb-480e-bb3d-3fd54d13325b\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n },\n {\n \"source\": \"e1cf73df-4713-46e8-95d3-ec677378e125\",\n \"target\": \"6c4aa6a8-45eb-480e-bb3d-3fd54d13325b\",\n \"sourceHandle\": \"in/out\",\n \"targetHandle\": \"in/out\",\n \"type\": \"Interaction\"\n }\n ],\n \"outputs\": []\n}\n\ndef _curio_node():\n\n input_0 = pool_e1cf73df_4713_46e8_95d3_ec677378e125\n\n\n # Configure UTK for serverless/notebook environment\n import utk\n import json\n from IPython.display import HTML, Javascript, display\n\n utk.Environment.serverless = True\n\n # Create grammar structure\n grammar = {\n \"components\": [{\n \"id\": \"notebook_map\",\n \"json\": {\n \"camera\": {\n \"wEye\": [0, 0, 1000],\n \"wLookAt\": [0, 0, 0],\n \"wUp\": [0, 1, 0]\n },\n \"grid\": {\"width\": 12, \"height\": 4},\n \"knots\": [],\n \"map_style\": [],\n \"widgets\": [{\n \"type\": \"TOGGLE_KNOT\"\n }]\n },\n \"position\": {\"x\": 0, \"y\": 0, \"width\": 12, \"height\": 4}\n }],\n \"grid\": {\"width\": 12, \"height\": 4},\n \"knots\": []\n }\n\n # If content has grammar, parse and merge it\n grammar_content = \"\"\"{\n \"components\": [\n {\n \"id\": \"grammar_map\",\n \"position\": {\n \"width\": [\n 1,\n 12\n ],\n \"height\": [\n 1,\n 4\n ]\n }\n }\n ],\n \"knots\": [],\n \"ex_knots\": [\n {\n \"id\": \"census0\",\n \"out_name\": \"census\",\n \"in_name\": \"gt_65\"\n },\n {\n \"id\": \"census1\",\n \"out_name\": \"census\",\n \"in_name\": \"mean\"\n }\n ],\n \"grid\": {\n \"width\": 12,\n \"height\": 4\n },\n \"grammar\": false\n }\"\"\".strip()\n if grammar_content and grammar_content != \"{}\":\n try:\n parsed_grammar = json.loads(grammar_content)\n # Merge parsed grammar with our structure\n if \"components\" in parsed_grammar:\n grammar[\"components\"][0][\"json\"].update(parsed_grammar.get(\"json\", {}))\n if \"knots\" in parsed_grammar:\n grammar[\"knots\"] = parsed_grammar[\"knots\"]\n except json.JSONDecodeError:\n pass\n\n # Load geospatial data if available\n geospatial_data = None\n if input_0 is not None:\n data_input = input_0\n # Handle multi-input case\n if isinstance(data_input, list):\n data_input = data_input[0] if data_input else None\n \n if data_input is not None:\n # Check if it's a geodataframe\n try:\n import geopandas as gpd\n if isinstance(data_input, gpd.GeoDataFrame):\n # Convert to GeoJSON\n geojson_data = json.loads(data_input.to_json())\n geospatial_data = utk.physical_from_geojson(geojson_data)\n \n # Add layers to grammar\n if geospatial_data and \"components\" in grammar:\n if \"layers\" not in grammar[\"components\"][0][\"json\"]:\n grammar[\"components\"][0][\"json\"][\"layers\"] = []\n # Add layer for the geospatial data\n grammar[\"components\"][0][\"json\"][\"layers\"].append({\n \"type\": \"geospatial\",\n \"data\": geospatial_data.to_dict() if hasattr(geospatial_data, 'to_dict') else geospatial_data\n })\n except Exception as e:\n pass\n\n # Create HTML container\n html_container = f'
'\n display(HTML(html_container))\n\n # Initialize UTK in browser\n js_initialization = f\"\"\"\n require(['utk'], function(utk) {{\n utk.Environment.serverless = true;\n const container = document.getElementById('utk-container-6c4aa6a8');\n const grammar = {json.dumps(grammar)};\n \n try {{\n const interpreter = new utk.GrammarInterpreter('notebook', grammar, container);\n // Store reference for potential interactions\n window._utk_interpreter_6c4aa6a8 = interpreter;\n }} catch(e) {{\n console.error('UTK initialization error:', e);\n container.innerHTML = '
Error initializing UTK visualization
';\n }}\n }});\n \"\"\"\n display(Javascript(js_initialization))\n\n\n_curio_output = _curio_node()\n\ntry:\n result_6c4aa6a8_45eb_480e_bb3d_3fd54d13325b = _curio_output\nexcept NameError:\n result_6c4aa6a8_45eb_480e_bb3d_3fd54d13325b = None\n\nfrom IPython.display import display\ndisplay(utk-container-6c4aa6a8)\n", + "metadata": { + "id": "6c4aa6a8-45eb-480e-bb3d-3fd54d13325b", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"3334485c-50ad-4adf-9574-45f8a9704860\",\n \"type\": \"VIS_VEGA\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"e1cf73df-4713-46e8-95d3-ec677378e125\",\n \"target\": \"3334485c-50ad-4adf-9574-45f8a9704860\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": []\n}\n\ndef _curio_node():\n\n\n input_data = pool_e1cf73df_4713_46e8_95d3_ec677378e125\n\n spec = {\n \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.json\",\n \"params\": [\n {\"name\": \"clickSelect\", \"select\": \"interval\"}\n ],\n \"mark\": {\n \"type\": \"point\",\n \"cursor\": \"pointer\"\n },\n \"encoding\": {\n \"x\": {\"field\": \"gt_65\", \"type\": \"quantitative\"},\n \"y\": {\"field\": \"mean\", \"type\": \"quantitative\", \"scale\": {\"domain\": [37, 42]}},\n \"fillOpacity\": {\n \"condition\": {\"param\": \"clickSelect\", \"value\": 1},\n \"value\": 0.3\n },\n \"color\": {\n \"field\": \"interacted\",\n \"type\": \"nominal\",\n \"condition\": {\"test\": \"datum.interacted === '1'\", \"value\": \"red\", \"else\": \"blue\"}\n }\n },\n \"config\": {\n \"scale\": {\n \"bandPaddingInner\": 0.2\n }\n }\n }\n\n values = input_data\n if hasattr(input_data, \"to_dict\"):\n values = input_data.to_dict(orient=\"records\")\n\n if isinstance(spec, dict):\n spec[\"data\"] = {\"values\": values}\n\n from IPython.display import display\n display({\"application/vnd.vegalite.v5+json\": spec, \"text/plain\": spec}, raw=True)\n\n return input_data\n\n\n_curio_output = _curio_node()\n\ntry:\n result_3334485c_50ad_4adf_9574_45f8a9704860 = _curio_output\nexcept NameError:\n result_3334485c_50ad_4adf_9574_45f8a9704860 = None\n", + "metadata": { + "id": "3334485c-50ad-4adf-9574-45f8a9704860", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"bcded943-0536-4d5b-adeb-e51dfc28afba\",\n \"type\": \"VIS_VEGA\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"35bd75ce-005e-40e1-9172-7caef9ccf046\",\n \"target\": \"bcded943-0536-4d5b-adeb-e51dfc28afba\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": []\n}\n\ndef _curio_node():\n\n\n input_data = result_35bd75ce_005e_40e1_9172_7caef9ccf046\n\n spec = {\n \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.json\",\n \"transform\": [\n {\n \"fold\": [\"gt_65\"],\n \"as\": [\"Variable\", \"Value\"]\n }\n ],\n \"mark\": {\n \"type\": \"boxplot\",\n \"size\": 60\n },\n \"encoding\": {\n \"x\": {\"field\": \"Variable\", \"type\": \"nominal\", \"title\": \"Variable\"},\n \"y\": {\"field\": \"Value\", \"type\": \"quantitative\", \"title\": \"Value\"}\n }\n }\n\n values = input_data\n if hasattr(input_data, \"to_dict\"):\n values = input_data.to_dict(orient=\"records\")\n\n if isinstance(spec, dict):\n spec[\"data\"] = {\"values\": values}\n\n from IPython.display import display\n display({\"application/vnd.vegalite.v5+json\": spec, \"text/plain\": spec}, raw=True)\n\n return input_data\n\n\n_curio_output = _curio_node()\n\ntry:\n result_bcded943_0536_4d5b_adeb_e51dfc28afba = _curio_output\nexcept NameError:\n result_bcded943_0536_4d5b_adeb_e51dfc28afba = None\n", + "metadata": { + "id": "bcded943-0536-4d5b-adeb-e51dfc28afba", + "language": "python" + } + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/docs/examples/notebooks/example10-notebook.ipynb b/docs/examples/notebooks/example10-notebook.ipynb new file mode 100644 index 00000000..afc2a4a1 --- /dev/null +++ b/docs/examples/notebooks/example10-notebook.ipynb @@ -0,0 +1,80 @@ +{ + "cells": [ + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"82537c44-8195-4cd3-a5fa-8a049d53d96e\",\n \"type\": \"DATA_LOADING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [],\n \"outputs\": [\n {\n \"source\": \"82537c44-8195-4cd3-a5fa-8a049d53d96e\",\n \"target\": \"e5e7e21f-609d-496b-b231-659ee91ff9af\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n import pandas as pd\n\n df = pd.read_csv(\"data/Green_Roofs.csv\")\n return df\n\n_curio_output = _curio_node()\n\ntry:\n data_82537c44_8195_4cd3_a5fa_8a049d53d96e = _curio_output\nexcept NameError:\n data_82537c44_8195_4cd3_a5fa_8a049d53d96e = None\n", + "metadata": { + "id": "82537c44-8195-4cd3-a5fa-8a049d53d96e", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"e5e7e21f-609d-496b-b231-659ee91ff9af\",\n \"type\": \"DATA_CLEANING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"82537c44-8195-4cd3-a5fa-8a049d53d96e\",\n \"target\": \"e5e7e21f-609d-496b-b231-659ee91ff9af\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": [\n {\n \"source\": \"e5e7e21f-609d-496b-b231-659ee91ff9af\",\n \"target\": \"4226b7ed-c8e7-4acf-873b-2d835d9c4a07\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n input_0 = data_82537c44_8195_4cd3_a5fa_8a049d53d96e\n arg = input_0\n\n import pandas as pd\n\n df = arg\n df.fillna(0, inplace=True)\n\n return df\n\n\n_curio_output = _curio_node()\n\ntry:\n result_e5e7e21f_609d_496b_b231_659ee91ff9af = _curio_output\nexcept NameError:\n result_e5e7e21f_609d_496b_b231_659ee91ff9af = None\n", + "metadata": { + "id": "e5e7e21f-609d-496b-b231-659ee91ff9af", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"4226b7ed-c8e7-4acf-873b-2d835d9c4a07\",\n \"type\": \"VIS_VEGA\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"e5e7e21f-609d-496b-b231-659ee91ff9af\",\n \"target\": \"4226b7ed-c8e7-4acf-873b-2d835d9c4a07\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": []\n}\n\ndef _curio_node():\n\n\n input_data = result_e5e7e21f_609d_496b_b231_659ee91ff9af\n\n spec = {\n \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.json\",\n \"description\": \"Histogram of Total Roof Size of Buildings in Chicago (log-scaled)\",\n \"data\": {\n \"name\": \"data\"\n },\n \"transform\": [\n { \"filter\": \"datum.TOTAL_ROOF_SQFT > 0\" },\n {\n \"calculate\": \"log(datum.TOTAL_ROOF_SQFT) / log(10)\",\n \"as\": \"log_roof_size\"\n }\n ],\n \"mark\": \"bar\",\n \"encoding\": {\n \"x\": {\n \"field\": \"log_roof_size\",\n \"bin\": { \"maxbins\": 30 },\n \"axis\": {\n \"title\": \"Total Roof Size (sqft)\",\n \"values\": [3, 4, 5, 6],\n \"labelExpr\": \"'10^' + datum.value\"\n }\n },\n \"y\": {\n \"aggregate\": \"count\",\n \"type\": \"quantitative\",\n \"axis\": {\n \"title\": \"Number of Buildings\"\n }\n }\n }\n }\n\n values = input_data\n if hasattr(input_data, \"to_dict\"):\n values = input_data.to_dict(orient=\"records\")\n\n if isinstance(spec, dict):\n spec[\"data\"] = {\"values\": values}\n\n from IPython.display import display\n display({\"application/vnd.vegalite.v5+json\": spec, \"text/plain\": spec}, raw=True)\n\n return input_data\n\n\n_curio_output = _curio_node()\n\ntry:\n result_4226b7ed_c8e7_4acf_873b_2d835d9c4a07 = _curio_output\nexcept NameError:\n result_4226b7ed_c8e7_4acf_873b_2d835d9c4a07 = None\n", + "metadata": { + "id": "4226b7ed-c8e7-4acf-873b-2d835d9c4a07", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"92717c5a-2e65-4ca3-9818-d5f73c89f0a9\",\n \"type\": \"DATA_LOADING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [],\n \"outputs\": [\n {\n \"source\": \"92717c5a-2e65-4ca3-9818-d5f73c89f0a9\",\n \"target\": \"c528f5bc-5d28-4c72-9158-dfb4282c504f\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n import geopandas as gpd\n import pandas as pd\n from shapely.geometry import Point\n # Read the green roofs dataset\n green_roofs_df = pd.read_csv('data/Green_Roofs.csv')\n\n # Create the dataset into geo dataframe using latitude and longitude columns\n geometry = [Point(xy) for xy in zip(green_roofs_df['LONGITUDE'], green_roofs_df['LATITUDE'])]\n green_roofs_df = gpd.GeoDataFrame(green_roofs_df, geometry=geometry, crs=4326)\n chicago = gpd.read_file(\"data/chicago.geojson\")\n\n # Joining the green roofs dataset with the chicago neighborhood geojson file\n joined = gpd.sjoin(green_roofs_df, chicago, predicate='within')\n return joined\n\n_curio_output = _curio_node()\n\ntry:\n data_92717c5a_2e65_4ca3_9818_d5f73c89f0a9 = _curio_output\nexcept NameError:\n data_92717c5a_2e65_4ca3_9818_d5f73c89f0a9 = None\n", + "metadata": { + "id": "92717c5a-2e65-4ca3-9818-d5f73c89f0a9", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"c528f5bc-5d28-4c72-9158-dfb4282c504f\",\n \"type\": \"DATA_POOL\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"92717c5a-2e65-4ca3-9818-d5f73c89f0a9\",\n \"target\": \"c528f5bc-5d28-4c72-9158-dfb4282c504f\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": [\n {\n \"source\": \"c528f5bc-5d28-4c72-9158-dfb4282c504f\",\n \"target\": \"5c11ed5f-c993-4940-89d0-08d186e903f9\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n },\n {\n \"source\": \"c528f5bc-5d28-4c72-9158-dfb4282c504f\",\n \"target\": \"a9995883-f2ea-4b42-b74d-59ebc727afc6\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n\n return data_92717c5a_2e65_4ca3_9818_d5f73c89f0a9\n\n\n_curio_output = _curio_node()\n\ntry:\n pool_c528f5bc_5d28_4c72_9158_dfb4282c504f = _curio_output\nexcept NameError:\n pool_c528f5bc_5d28_4c72_9158_dfb4282c504f = None\n", + "metadata": { + "id": "c528f5bc-5d28-4c72-9158-dfb4282c504f", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"5c11ed5f-c993-4940-89d0-08d186e903f9\",\n \"type\": \"VIS_VEGA\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"c528f5bc-5d28-4c72-9158-dfb4282c504f\",\n \"target\": \"5c11ed5f-c993-4940-89d0-08d186e903f9\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": []\n}\n\ndef _curio_node():\n\n\n input_data = pool_c528f5bc_5d28_4c72_9158_dfb4282c504f\n\n spec = {\n \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.json\",\n \"description\": \"Dot Density Map of Green Roof Locations in Chicago with Zoom & Pan\",\n \"width\": 500,\n \"height\": 600,\n \"title\": \"Green Roof Locations in Chicago\",\n \"mark\": \"circle\",\n \"selection\": {\n \"grid\": {\n \"type\": \"interval\",\n \"bind\": \"scales\"\n }\n },\n \"encoding\": {\n \"x\": {\n \"field\": \"LONGITUDE\",\n \"type\": \"quantitative\",\n \"scale\": { \"domain\": [-88.0, -87.5] },\n \"axis\": { \"title\": \"Longitude\" }\n },\n \"y\": {\n \"field\": \"LATITUDE\",\n \"type\": \"quantitative\",\n \"scale\": { \"domain\": [41.6, 42.1] },\n \"axis\": { \"title\": \"Latitude\" }\n },\n \"size\": {\n \"field\": \"VEGETATED_SQFT\",\n \"type\": \"quantitative\",\n \"legend\": { \"title\": \"Vegetated Sqft\" }\n },\n \"tooltip\": [\n { \"field\": \"VEGETATED_SQFT\", \"type\": \"quantitative\" },\n { \"field\": \"TOTAL_ROOF_SQFT\", \"type\": \"quantitative\" },\n { \"field\": \"zip\", \"type\": \"nominal\" }\n ]\n },\n \"config\": {\n \"view\": { \"stroke\": \"transparent\" }\n }\n }\n\n values = input_data\n if hasattr(input_data, \"to_dict\"):\n values = input_data.to_dict(orient=\"records\")\n\n if isinstance(spec, dict):\n spec[\"data\"] = {\"values\": values}\n\n from IPython.display import display\n display({\"application/vnd.vegalite.v5+json\": spec, \"text/plain\": spec}, raw=True)\n\n return input_data\n\n\n_curio_output = _curio_node()\n\ntry:\n result_5c11ed5f_c993_4940_89d0_08d186e903f9 = _curio_output\nexcept NameError:\n result_5c11ed5f_c993_4940_89d0_08d186e903f9 = None\n", + "metadata": { + "id": "5c11ed5f-c993-4940-89d0-08d186e903f9", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"a9995883-f2ea-4b42-b74d-59ebc727afc6\",\n \"type\": \"DATA_TRANSFORMATION\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"c528f5bc-5d28-4c72-9158-dfb4282c504f\",\n \"target\": \"a9995883-f2ea-4b42-b74d-59ebc727afc6\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": [\n {\n \"source\": \"a9995883-f2ea-4b42-b74d-59ebc727afc6\",\n \"target\": \"d23e2587-57bf-4db4-84fe-cdb7c2de638d\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n input_0 = pool_c528f5bc_5d28_4c72_9158_dfb4282c504f\n arg = input_0\n\n import geopandas as gpd\n\n joined = arg\n\n # filter out the top 10 zip codes from the joined dataframe on 'zip' by square feet\n top_10_largest = joined.groupby('zip')['VEGETATED_SQFT'].sum().reset_index().sort_values(by='VEGETATED_SQFT', ascending=False).head(10)\n\n return top_10_largest\n\n\n_curio_output = _curio_node()\n\ntry:\n result_a9995883_f2ea_4b42_b74d_59ebc727afc6 = _curio_output\nexcept NameError:\n result_a9995883_f2ea_4b42_b74d_59ebc727afc6 = None\n", + "metadata": { + "id": "a9995883-f2ea-4b42-b74d-59ebc727afc6", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"d23e2587-57bf-4db4-84fe-cdb7c2de638d\",\n \"type\": \"VIS_VEGA\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"a9995883-f2ea-4b42-b74d-59ebc727afc6\",\n \"target\": \"d23e2587-57bf-4db4-84fe-cdb7c2de638d\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": []\n}\n\ndef _curio_node():\n\n\n input_data = result_a9995883_f2ea_4b42_b74d_59ebc727afc6\n\n spec = {\n \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.json\",\n \"description\": \"Top 10 largest zip codes by green roof area\",\n \"width\": 400,\n \"height\": 200,\n \"selection\": {\n \"zip_select\": {\n \"type\": \"multi\",\n \"fields\": [\"zip\"],\n \"toggle\": \"event.shiftKey\"\n }\n },\n \"mark\": { \"type\": \"bar\", \"stroke\": \"black\", \"color\": \"green\" },\n \"encoding\": {\n \"x\": {\n \"field\": \"VEGETATED_SQFT\",\n \"type\": \"quantitative\",\n \"axis\": {\n \"title\": \"Total Vegetated Roof Size (sqft)\",\n \"values\": [100000, 250000, 500000, 1000000],\n \"format\": \",d\"\n }\n },\n \"y\": {\n \"field\": \"zip\",\n \"type\": \"nominal\",\n \"sort\": \"-x\",\n \"axis\": { \"title\": \"Zip Code\" }\n },\n \"color\": {\n \"field\": \"zip\",\n \"type\": \"nominal\",\n \"scale\": { \"scheme\": \"category20\" }\n },\n \"opacity\": {\n \"condition\": { \"selection\": \"zip_select\", \"value\": 1 },\n \"value\": 0.3\n },\n \"tooltip\": [\n { \"field\": \"zip\", \"type\": \"nominal\" },\n { \"field\": \"VEGETATED_SQFT\", \"type\": \"quantitative\" }\n ]\n }\n }\n\n values = input_data\n if hasattr(input_data, \"to_dict\"):\n values = input_data.to_dict(orient=\"records\")\n\n if isinstance(spec, dict):\n spec[\"data\"] = {\"values\": values}\n\n from IPython.display import display\n display({\"application/vnd.vegalite.v5+json\": spec, \"text/plain\": spec}, raw=True)\n\n return input_data\n\n\n_curio_output = _curio_node()\n\ntry:\n result_d23e2587_57bf_4db4_84fe_cdb7c2de638d = _curio_output\nexcept NameError:\n result_d23e2587_57bf_4db4_84fe_cdb7c2de638d = None\n", + "metadata": { + "id": "d23e2587-57bf-4db4-84fe-cdb7c2de638d", + "language": "python" + } + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/docs/examples/notebooks/example11-notebook.ipynb b/docs/examples/notebooks/example11-notebook.ipynb new file mode 100644 index 00000000..48846f38 --- /dev/null +++ b/docs/examples/notebooks/example11-notebook.ipynb @@ -0,0 +1,232 @@ +{ + "cells": [ + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"7d41f188-ec81-4642-ba73-eeb46ae8ebe2\",\n \"type\": \"DATA_LOADING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [],\n \"outputs\": [\n {\n \"source\": \"7d41f188-ec81-4642-ba73-eeb46ae8ebe2\",\n \"target\": \"9eafa9de-726a-404b-8c97-d3d5ec94e51d\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n import pandas as pd\n\n # Load the CSV directly\n df = pd.read_csv(\"data/Energy_Usage_5000.csv\")\n\n # Select relevant columns and clean missing values\n grouped_data = df[[\"BUILDING TYPE\", \"TOTAL KWH\", \"TOTAL THERMS\"]].dropna()\n\n # Return cleaned DataFrame\n return grouped_data\n\n_curio_output = _curio_node()\n\ntry:\n data_7d41f188_ec81_4642_ba73_eeb46ae8ebe2 = _curio_output\nexcept NameError:\n data_7d41f188_ec81_4642_ba73_eeb46ae8ebe2 = None\n", + "metadata": { + "id": "7d41f188-ec81-4642-ba73-eeb46ae8ebe2", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"9eafa9de-726a-404b-8c97-d3d5ec94e51d\",\n \"type\": \"DATA_CLEANING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"7d41f188-ec81-4642-ba73-eeb46ae8ebe2\",\n \"target\": \"9eafa9de-726a-404b-8c97-d3d5ec94e51d\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": [\n {\n \"source\": \"9eafa9de-726a-404b-8c97-d3d5ec94e51d\",\n \"target\": \"fc8f5cba-2ecd-4702-b19f-d471b49104c8\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n },\n {\n \"source\": \"9eafa9de-726a-404b-8c97-d3d5ec94e51d\",\n \"target\": \"7a1c0e5b-3c39-4727-a616-f4ca97fdbb44\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n input_0 = data_7d41f188_ec81_4642_ba73_eeb46ae8ebe2\n arg = input_0\n\n def remove_outliers(df, column):\n Q1 = df[column].quantile(0.25)\n Q3 = df[column].quantile(0.75)\n IQR = Q3 - Q1\n return df[(df[column] >= Q1 - 1.5 * IQR) & (df[column] <= Q3 + 1.5 * IQR)]\n\n def clean(df):\n # Drop only if columns exist\n required_cols = ['CENSUS BLOCK', 'BUILDING TYPE', 'BUILDING_SUBTYPE']\n drop_cols = [col for col in required_cols if col in df.columns]\n\n df_cleaned = df.dropna(subset=drop_cols).copy()\n\n # Standard KWH/THERM fill\n kwh_columns = [col for col in df.columns if 'KWH' in col and '2010' in col and 'SQFT' not in col]\n therm_columns = [col for col in df.columns if 'THERM' in col and '2010' in col and 'SQFT' not in col]\n df_cleaned[kwh_columns] = df_cleaned[kwh_columns].fillna(df_cleaned[kwh_columns].median())\n df_cleaned[therm_columns] = df_cleaned[therm_columns].fillna(df_cleaned[therm_columns].median())\n\n for col in [\n 'TOTAL KWH', 'TOTAL THERMS',\n 'OCCUPIED UNITS PERCENTAGE', 'OCCUPIED UNITS',\n 'RENTER-OCCUPIED HOUSING UNITS'\n ]:\n if col in df_cleaned.columns:\n df_cleaned[col] = df_cleaned[col].fillna(df_cleaned[col].median())\n\n df_cleaned['ELECTRICITY ACCOUNTS'] = pd.to_numeric(df_cleaned.get('ELECTRICITY ACCOUNTS'), errors='coerce')\n df_cleaned['GAS ACCOUNTS'] = pd.to_numeric(df_cleaned.get('GAS ACCOUNTS'), errors='coerce')\n df_cleaned['ELECTRICITY'] = df_cleaned['TOTAL KWH']\n df_cleaned['GAS'] = df_cleaned['TOTAL THERMS']\n\n df_cleaned = df_cleaned.loc[:, df_cleaned.isnull().mean() < 0.2]\n\n if 'TERM APRIL 2010' in df.columns:\n df.rename(columns={'TERM APRIL 2010': 'THERM APRIL 2010'}, inplace=True)\n\n # Standardize community names\n if 'COMMUNITY AREA NAME' in df_cleaned.columns:\n df_cleaned['COMMUNITY AREA NAME'] = df_cleaned['COMMUNITY AREA NAME'].str.strip().str.upper()\n df_cleaned['COMMUNITY AREA NAME'] = df_cleaned['COMMUNITY AREA NAME'].replace({\n \"LAKEVIEW\": \"LAKE VIEW\",\n \"O'HARE\": \"OHARE\"\n })\n\n # Ensure total columns are present\n if 'TOTAL KWH' in df_cleaned.columns:\n df_cleaned['TOTAL KWH'] = df_cleaned['TOTAL KWH'].fillna(df_cleaned['TOTAL KWH'].median())\n if 'TOTAL THERMS' in df_cleaned.columns:\n df_cleaned['TOTAL THERMS'] = df_cleaned['TOTAL THERMS'].fillna(df_cleaned['TOTAL THERMS'].median())\n\n if 'AVERAGE BUILDING AGE' in df_cleaned.columns:\n df_cleaned['DECADE BUILT'] = (2010 - df_cleaned['AVERAGE BUILDING AGE']) // 10 * 10\n\n df_cleaned = remove_outliers(df_cleaned, 'TOTAL KWH')\n df_cleaned = remove_outliers(df_cleaned, 'TOTAL THERMS')\n\n\n\n return df_cleaned\n\n\n # Run cleaning and return\n return clean(arg)\n\n\n_curio_output = _curio_node()\n\ntry:\n result_9eafa9de_726a_404b_8c97_d3d5ec94e51d = _curio_output\nexcept NameError:\n result_9eafa9de_726a_404b_8c97_d3d5ec94e51d = None\n", + "metadata": { + "id": "9eafa9de-726a-404b-8c97-d3d5ec94e51d", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"fc8f5cba-2ecd-4702-b19f-d471b49104c8\",\n \"type\": \"DATA_TRANSFORMATION\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"9eafa9de-726a-404b-8c97-d3d5ec94e51d\",\n \"target\": \"fc8f5cba-2ecd-4702-b19f-d471b49104c8\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": [\n {\n \"source\": \"fc8f5cba-2ecd-4702-b19f-d471b49104c8\",\n \"target\": \"53e8c833-202d-40d2-8ccf-d0b304566593\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n },\n {\n \"source\": \"fc8f5cba-2ecd-4702-b19f-d471b49104c8\",\n \"target\": \"22d68832-0b7b-4a7c-8f11-989d4780f56f\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n input_0 = result_9eafa9de_726a_404b_8c97_d3d5ec94e51d\n arg = input_0\n\n # Assume `arg` is the cleaned DataFrame from the previous card\n import pandas as pd\n\n energy_long = pd.melt(\n arg,\n id_vars='BUILDING TYPE',\n value_vars=['TOTAL KWH', 'TOTAL THERMS'],\n var_name='ENERGY TYPE',\n value_name='VALUE'\n )\n\n total_by_type = energy_long.groupby('BUILDING TYPE')['VALUE'].transform('sum')\n energy_long['PERCENTAGE'] = (energy_long['VALUE'] / total_by_type) * 100\n\n return energy_long\n\n\n_curio_output = _curio_node()\n\ntry:\n result_fc8f5cba_2ecd_4702_b19f_d471b49104c8 = _curio_output\nexcept NameError:\n result_fc8f5cba_2ecd_4702_b19f_d471b49104c8 = None\n", + "metadata": { + "id": "fc8f5cba-2ecd-4702-b19f-d471b49104c8", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"53e8c833-202d-40d2-8ccf-d0b304566593\",\n \"type\": \"VIS_VEGA\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"fc8f5cba-2ecd-4702-b19f-d471b49104c8\",\n \"target\": \"53e8c833-202d-40d2-8ccf-d0b304566593\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": []\n}\n\ndef _curio_node():\n\n\n input_data = result_fc8f5cba_2ecd_4702_b19f_d471b49104c8\n\n spec = {\n \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.json\",\n \"data\": { \"name\": \"energy_transformed_1\" },\n \"mark\": \"rect\",\n \"encoding\": {\n \"x\": { \"field\": \"BUILDING TYPE\", \"type\": \"nominal\" },\n \"y\": { \"field\": \"ENERGY TYPE\", \"type\": \"nominal\" },\n \"color\": {\n \"field\": \"VALUE\",\n \"type\": \"quantitative\",\n \"scale\": { \"scheme\": \"viridis\" }\n },\n \"tooltip\": [\n { \"field\": \"BUILDING TYPE\" },\n { \"field\": \"ENERGY TYPE\" },\n { \"field\": \"VALUE\", \"format\": \".2f\" },\n { \"field\": \"PERCENTAGE\", \"format\": \".1f\" }\n ]\n },\n \"title\": \"Energy Consumption Heatmap (KWH + THERMS)\"\n }\n\n values = input_data\n if hasattr(input_data, \"to_dict\"):\n values = input_data.to_dict(orient=\"records\")\n\n if isinstance(spec, dict):\n spec[\"data\"] = {\"values\": values}\n\n from IPython.display import display\n display({\"application/vnd.vegalite.v5+json\": spec, \"text/plain\": spec}, raw=True)\n\n return input_data\n\n\n_curio_output = _curio_node()\n\ntry:\n result_53e8c833_202d_40d2_8ccf_d0b304566593 = _curio_output\nexcept NameError:\n result_53e8c833_202d_40d2_8ccf_d0b304566593 = None\n", + "metadata": { + "id": "53e8c833-202d-40d2-8ccf-d0b304566593", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"22d68832-0b7b-4a7c-8f11-989d4780f56f\",\n \"type\": \"VIS_VEGA\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"fc8f5cba-2ecd-4702-b19f-d471b49104c8\",\n \"target\": \"22d68832-0b7b-4a7c-8f11-989d4780f56f\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": []\n}\n\ndef _curio_node():\n\n\n input_data = result_fc8f5cba_2ecd_4702_b19f_d471b49104c8\n\n spec = {\n \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.json\",\n \"data\": { \"name\": \"energy_transformed_1\" },\n \"mark\": \"circle\",\n \"encoding\": {\n \"x\": {\n \"field\": \"BUILDING TYPE\",\n \"type\": \"nominal\",\n \"axis\": { \"labelAngle\": -45 }\n },\n \"y\": { \"field\": \"VALUE\", \"type\": \"quantitative\" },\n \"color\": { \"field\": \"ENERGY TYPE\", \"type\": \"nominal\" },\n \"size\": { \"field\": \"VALUE\", \"type\": \"quantitative\" },\n \"tooltip\": [\n { \"field\": \"BUILDING TYPE\" },\n { \"field\": \"ENERGY TYPE\" },\n { \"field\": \"VALUE\", \"format\": \".2f\" }\n ]\n },\n \"title\": \"Dot Plot of Energy Usage by Building Type\"\n }\n\n values = input_data\n if hasattr(input_data, \"to_dict\"):\n values = input_data.to_dict(orient=\"records\")\n\n if isinstance(spec, dict):\n spec[\"data\"] = {\"values\": values}\n\n from IPython.display import display\n display({\"application/vnd.vegalite.v5+json\": spec, \"text/plain\": spec}, raw=True)\n\n return input_data\n\n\n_curio_output = _curio_node()\n\ntry:\n result_22d68832_0b7b_4a7c_8f11_989d4780f56f = _curio_output\nexcept NameError:\n result_22d68832_0b7b_4a7c_8f11_989d4780f56f = None\n", + "metadata": { + "id": "22d68832-0b7b-4a7c-8f11-989d4780f56f", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"7a1c0e5b-3c39-4727-a616-f4ca97fdbb44\",\n \"type\": \"DATA_TRANSFORMATION\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"9eafa9de-726a-404b-8c97-d3d5ec94e51d\",\n \"target\": \"7a1c0e5b-3c39-4727-a616-f4ca97fdbb44\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": [\n {\n \"source\": \"7a1c0e5b-3c39-4727-a616-f4ca97fdbb44\",\n \"target\": \"ce74522c-d689-4cf9-bb41-e4f6a24882d4\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n input_0 = result_9eafa9de_726a_404b_8c97_d3d5ec94e51d\n arg = input_0\n\n # Group by building type and compute average gas usage\n df_avg_gas = arg.groupby(\"BUILDING TYPE\")[\"TOTAL THERMS\"].mean().reset_index()\n df_avg_gas.rename(columns={\"TOTAL THERMS\": \"AVG TOTAL THERMS\"}, inplace=True)\n\n return df_avg_gas\n\n\n_curio_output = _curio_node()\n\ntry:\n result_7a1c0e5b_3c39_4727_a616_f4ca97fdbb44 = _curio_output\nexcept NameError:\n result_7a1c0e5b_3c39_4727_a616_f4ca97fdbb44 = None\n", + "metadata": { + "id": "7a1c0e5b-3c39-4727-a616-f4ca97fdbb44", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"ce74522c-d689-4cf9-bb41-e4f6a24882d4\",\n \"type\": \"VIS_VEGA\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"7a1c0e5b-3c39-4727-a616-f4ca97fdbb44\",\n \"target\": \"ce74522c-d689-4cf9-bb41-e4f6a24882d4\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": []\n}\n\ndef _curio_node():\n\n\n input_data = result_7a1c0e5b_3c39_4727_a616_f4ca97fdbb44\n\n spec = {\n \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.json\",\n \"data\": { \"name\": \"avg_gas_by_building\" },\n \"mark\": \"bar\",\n \"encoding\": {\n \"x\": {\n \"field\": \"BUILDING TYPE\",\n \"type\": \"nominal\",\n \"axis\": { \"labelAngle\": -45 }\n },\n \"y\": { \"field\": \"AVG TOTAL THERMS\", \"type\": \"quantitative\" },\n \"tooltip\": [\n { \"field\": \"BUILDING TYPE\" },\n { \"field\": \"AVG TOTAL THERMS\", \"format\": \".2f\" }\n ],\n \"color\": { \"field\": \"BUILDING TYPE\", \"type\": \"nominal\" }\n }\n }\n\n values = input_data\n if hasattr(input_data, \"to_dict\"):\n values = input_data.to_dict(orient=\"records\")\n\n if isinstance(spec, dict):\n spec[\"data\"] = {\"values\": values}\n\n from IPython.display import display\n display({\"application/vnd.vegalite.v5+json\": spec, \"text/plain\": spec}, raw=True)\n\n return input_data\n\n\n_curio_output = _curio_node()\n\ntry:\n result_ce74522c_d689_4cf9_bb41_e4f6a24882d4 = _curio_output\nexcept NameError:\n result_ce74522c_d689_4cf9_bb41_e4f6a24882d4 = None\n", + "metadata": { + "id": "ce74522c-d689-4cf9-bb41-e4f6a24882d4", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"15aa8924-3c5a-42a3-a89e-456f675c469a\",\n \"type\": \"DATA_LOADING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [],\n \"outputs\": [\n {\n \"source\": \"15aa8924-3c5a-42a3-a89e-456f675c469a\",\n \"target\": \"23a15771-4d9a-479f-911f-b6f758850574\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n import pandas as pd\n\n df = pd.read_csv(\"data/Energy_Usage_5000.csv\")\n\n # Standardize column names right away for consistency\n df.columns = [col.upper().strip() for col in df.columns]\n\n # Just return full dataset for now no filtering yet\n return df\n\n_curio_output = _curio_node()\n\ntry:\n data_15aa8924_3c5a_42a3_a89e_456f675c469a = _curio_output\nexcept NameError:\n data_15aa8924_3c5a_42a3_a89e_456f675c469a = None\n", + "metadata": { + "id": "15aa8924-3c5a-42a3-a89e-456f675c469a", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"23a15771-4d9a-479f-911f-b6f758850574\",\n \"type\": \"DATA_CLEANING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"15aa8924-3c5a-42a3-a89e-456f675c469a\",\n \"target\": \"23a15771-4d9a-479f-911f-b6f758850574\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": [\n {\n \"source\": \"23a15771-4d9a-479f-911f-b6f758850574\",\n \"target\": \"2cd9e67d-4e07-4852-8a55-f6b50cd3658d\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n },\n {\n \"source\": \"23a15771-4d9a-479f-911f-b6f758850574\",\n \"target\": \"0957871c-b1f3-4c89-84de-3dc8f88e49c7\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n },\n {\n \"source\": \"23a15771-4d9a-479f-911f-b6f758850574\",\n \"target\": \"e54363aa-e9c8-45d2-a5d0-3938185d4445\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n input_0 = data_15aa8924_3c5a_42a3_a89e_456f675c469a\n arg = input_0\n\n def remove_outliers(df, column):\n Q1 = df[column].quantile(0.25)\n Q3 = df[column].quantile(0.75)\n IQR = Q3 - Q1\n return df[(df[column] >= Q1 - 1.5 * IQR) & (df[column] <= Q3 + 1.5 * IQR)]\n\n def clean(df):\n # We assume column names are already uppercased by the data loading card\n required_cols = ['COMMUNITY AREA NAME', 'TOTAL KWH', 'TOTAL THERMS', 'BUILDING TYPE']\n df = df.dropna(subset=required_cols).copy()\n\n df['COMMUNITY AREA NAME'] = df['COMMUNITY AREA NAME'].str.strip().str.upper()\n df['TOTAL KWH'] = df['TOTAL KWH'].fillna(df['TOTAL KWH'].median())\n df['TOTAL THERMS'] = df['TOTAL THERMS'].fillna(df['TOTAL THERMS'].median())\n\n df = remove_outliers(df, 'TOTAL KWH')\n df = remove_outliers(df, 'TOTAL THERMS')\n\n return df\n\n return clean(arg)\n\n\n_curio_output = _curio_node()\n\ntry:\n result_23a15771_4d9a_479f_911f_b6f758850574 = _curio_output\nexcept NameError:\n result_23a15771_4d9a_479f_911f_b6f758850574 = None\n", + "metadata": { + "id": "23a15771-4d9a-479f-911f-b6f758850574", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"2cd9e67d-4e07-4852-8a55-f6b50cd3658d\",\n \"type\": \"DATA_TRANSFORMATION\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"23a15771-4d9a-479f-911f-b6f758850574\",\n \"target\": \"2cd9e67d-4e07-4852-8a55-f6b50cd3658d\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": [\n {\n \"source\": \"2cd9e67d-4e07-4852-8a55-f6b50cd3658d\",\n \"target\": \"4e7a7f96-4615-4099-bbb9-b5811b560361\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n input_0 = result_23a15771_4d9a_479f_911f_b6f758850574\n arg = input_0\n\n df_avg = arg[[\"COMMUNITY AREA NAME\", \"TOTAL KWH\", \"TOTAL THERMS\"]].dropna()\n\n agg_df = df_avg.groupby(\"COMMUNITY AREA NAME\").agg({\n \"TOTAL KWH\": \"mean\",\n \"TOTAL THERMS\": \"mean\"\n }).reset_index()\n\n agg_df[\"AVG ENERGY USE\"] = agg_df[\"TOTAL KWH\"] + agg_df[\"TOTAL THERMS\"]\n\n top10 = agg_df.sort_values(\"AVG ENERGY USE\", ascending=False).head(10)\n\n return top10\n\n\n_curio_output = _curio_node()\n\ntry:\n result_2cd9e67d_4e07_4852_8a55_f6b50cd3658d = _curio_output\nexcept NameError:\n result_2cd9e67d_4e07_4852_8a55_f6b50cd3658d = None\n", + "metadata": { + "id": "2cd9e67d-4e07-4852-8a55-f6b50cd3658d", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"4e7a7f96-4615-4099-bbb9-b5811b560361\",\n \"type\": \"VIS_VEGA\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"2cd9e67d-4e07-4852-8a55-f6b50cd3658d\",\n \"target\": \"4e7a7f96-4615-4099-bbb9-b5811b560361\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": []\n}\n\ndef _curio_node():\n\n\n input_data = result_2cd9e67d_4e07_4852_8a55_f6b50cd3658d\n\n spec = {\n \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.json\",\n \"data\": { \"name\": \"top10_avg_energy_by_community\" },\n \"mark\": \"bar\",\n \"encoding\": {\n \"x\": {\n \"field\": \"COMMUNITY AREA NAME\",\n \"type\": \"nominal\",\n \"sort\": \"-y\",\n \"axis\": { \"labelAngle\": -45 }\n },\n \"y\": {\n \"field\": \"AVG ENERGY USE\",\n \"type\": \"quantitative\",\n \"title\": \"Avg Energy Use (KWH + THERMS)\"\n },\n \"tooltip\": [\n { \"field\": \"COMMUNITY AREA NAME\" },\n { \"field\": \"AVG ENERGY USE\", \"format\": \".2f\" }\n ],\n \"color\": {\n \"field\": \"AVG ENERGY USE\",\n \"type\": \"quantitative\",\n \"scale\": { \"scheme\": \"blues\" }\n }\n },\n \"title\": \"Top 10 Communities by Avg Energy Consumption\"\n }\n\n values = input_data\n if hasattr(input_data, \"to_dict\"):\n values = input_data.to_dict(orient=\"records\")\n\n if isinstance(spec, dict):\n spec[\"data\"] = {\"values\": values}\n\n from IPython.display import display\n display({\"application/vnd.vegalite.v5+json\": spec, \"text/plain\": spec}, raw=True)\n\n return input_data\n\n\n_curio_output = _curio_node()\n\ntry:\n result_4e7a7f96_4615_4099_bbb9_b5811b560361 = _curio_output\nexcept NameError:\n result_4e7a7f96_4615_4099_bbb9_b5811b560361 = None\n", + "metadata": { + "id": "4e7a7f96-4615-4099-bbb9-b5811b560361", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"0957871c-b1f3-4c89-84de-3dc8f88e49c7\",\n \"type\": \"DATA_TRANSFORMATION\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"23a15771-4d9a-479f-911f-b6f758850574\",\n \"target\": \"0957871c-b1f3-4c89-84de-3dc8f88e49c7\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": [\n {\n \"source\": \"0957871c-b1f3-4c89-84de-3dc8f88e49c7\",\n \"target\": \"2ba732cf-7fa3-4a59-b41f-faa3d707994f\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n input_0 = result_23a15771_4d9a_479f_911f_b6f758850574\n arg = input_0\n\n df_scatter = arg[[\"TOTAL KWH\", \"TOTAL THERMS\", \"BUILDING TYPE\"]].dropna()\n df_scatter = df_scatter[(df_scatter[\"TOTAL KWH\"] > 0) & (df_scatter[\"TOTAL THERMS\"] > 0)]\n\n return df_scatter\n\n\n_curio_output = _curio_node()\n\ntry:\n result_0957871c_b1f3_4c89_84de_3dc8f88e49c7 = _curio_output\nexcept NameError:\n result_0957871c_b1f3_4c89_84de_3dc8f88e49c7 = None\n", + "metadata": { + "id": "0957871c-b1f3-4c89-84de-3dc8f88e49c7", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"2ba732cf-7fa3-4a59-b41f-faa3d707994f\",\n \"type\": \"VIS_VEGA\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"0957871c-b1f3-4c89-84de-3dc8f88e49c7\",\n \"target\": \"2ba732cf-7fa3-4a59-b41f-faa3d707994f\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": []\n}\n\ndef _curio_node():\n\n\n input_data = result_0957871c_b1f3_4c89_84de_3dc8f88e49c7\n\n spec = {\n \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.json\",\n \"data\": { \"name\": \"scatter_energy_usage\" },\n \"mark\": \"point\",\n \"encoding\": {\n \"x\": {\n \"field\": \"TOTAL KWH\",\n \"type\": \"quantitative\",\n \"scale\": { \"type\": \"log\" }\n },\n \"y\": {\n \"field\": \"TOTAL THERMS\",\n \"type\": \"quantitative\",\n \"scale\": { \"type\": \"log\" }\n },\n \"color\": { \"field\": \"BUILDING TYPE\", \"type\": \"nominal\" },\n \"tooltip\": [\n { \"field\": \"BUILDING TYPE\" },\n { \"field\": \"TOTAL KWH\" },\n { \"field\": \"TOTAL THERMS\" }\n ]\n },\n \"title\": \"Electricity vs Gas Usage by Building Type (Log Scale)\"\n }\n\n values = input_data\n if hasattr(input_data, \"to_dict\"):\n values = input_data.to_dict(orient=\"records\")\n\n if isinstance(spec, dict):\n spec[\"data\"] = {\"values\": values}\n\n from IPython.display import display\n display({\"application/vnd.vegalite.v5+json\": spec, \"text/plain\": spec}, raw=True)\n\n return input_data\n\n\n_curio_output = _curio_node()\n\ntry:\n result_2ba732cf_7fa3_4a59_b41f_faa3d707994f = _curio_output\nexcept NameError:\n result_2ba732cf_7fa3_4a59_b41f_faa3d707994f = None\n", + "metadata": { + "id": "2ba732cf-7fa3-4a59-b41f-faa3d707994f", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"e54363aa-e9c8-45d2-a5d0-3938185d4445\",\n \"type\": \"DATA_TRANSFORMATION\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"23a15771-4d9a-479f-911f-b6f758850574\",\n \"target\": \"e54363aa-e9c8-45d2-a5d0-3938185d4445\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": [\n {\n \"source\": \"e54363aa-e9c8-45d2-a5d0-3938185d4445\",\n \"target\": \"0503ce30-db2c-4a6f-833f-459969113302\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n input_0 = result_23a15771_4d9a_479f_911f_b6f758850574\n arg = input_0\n\n df_strip = arg[[\"BUILDING TYPE\", \"TOTAL THERMS\"]].dropna()\n\n # Remove large outliers for visualization clarity\n df_strip = df_strip[df_strip[\"TOTAL THERMS\"] < 500_000]\n\n return df_strip\n\n\n_curio_output = _curio_node()\n\ntry:\n result_e54363aa_e9c8_45d2_a5d0_3938185d4445 = _curio_output\nexcept NameError:\n result_e54363aa_e9c8_45d2_a5d0_3938185d4445 = None\n", + "metadata": { + "id": "e54363aa-e9c8-45d2-a5d0-3938185d4445", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"0503ce30-db2c-4a6f-833f-459969113302\",\n \"type\": \"VIS_VEGA\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"e54363aa-e9c8-45d2-a5d0-3938185d4445\",\n \"target\": \"0503ce30-db2c-4a6f-833f-459969113302\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": []\n}\n\ndef _curio_node():\n\n\n input_data = result_e54363aa_e9c8_45d2_a5d0_3938185d4445\n\n spec = {\n \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.json\",\n \"data\": { \"name\": \"df_strip\" },\n \"mark\": \"tick\",\n \"encoding\": {\n \"x\": { \"field\": \"TOTAL THERMS\", \"type\": \"quantitative\" },\n \"y\": { \"field\": \"BUILDING TYPE\", \"type\": \"nominal\" },\n \"color\": { \"field\": \"BUILDING TYPE\", \"type\": \"nominal\" },\n \"tooltip\": [{ \"field\": \"BUILDING TYPE\" }, { \"field\": \"TOTAL THERMS\" }]\n },\n \"title\": \"Gas Usage Spread by Building Type (Strip Plot)\"\n }\n\n values = input_data\n if hasattr(input_data, \"to_dict\"):\n values = input_data.to_dict(orient=\"records\")\n\n if isinstance(spec, dict):\n spec[\"data\"] = {\"values\": values}\n\n from IPython.display import display\n display({\"application/vnd.vegalite.v5+json\": spec, \"text/plain\": spec}, raw=True)\n\n return input_data\n\n\n_curio_output = _curio_node()\n\ntry:\n result_0503ce30_db2c_4a6f_833f_459969113302 = _curio_output\nexcept NameError:\n result_0503ce30_db2c_4a6f_833f_459969113302 = None\n", + "metadata": { + "id": "0503ce30-db2c-4a6f-833f-459969113302", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"e62ad390-9e0a-44aa-87f8-644c33974d04\",\n \"type\": \"DATA_LOADING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [],\n \"outputs\": [\n {\n \"source\": \"e62ad390-9e0a-44aa-87f8-644c33974d04\",\n \"target\": \"42ba84c5-edcf-49b5-ab3b-b5658e018f60\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n import pandas as pd\n df = pd.read_csv(\"data/Energy_Usage_5000.csv\")\n return df\n\n_curio_output = _curio_node()\n\ntry:\n data_e62ad390_9e0a_44aa_87f8_644c33974d04 = _curio_output\nexcept NameError:\n data_e62ad390_9e0a_44aa_87f8_644c33974d04 = None\n", + "metadata": { + "id": "e62ad390-9e0a-44aa-87f8-644c33974d04", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"42ba84c5-edcf-49b5-ab3b-b5658e018f60\",\n \"type\": \"DATA_CLEANING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"e62ad390-9e0a-44aa-87f8-644c33974d04\",\n \"target\": \"42ba84c5-edcf-49b5-ab3b-b5658e018f60\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": [\n {\n \"source\": \"42ba84c5-edcf-49b5-ab3b-b5658e018f60\",\n \"target\": \"e7753795-7c01-4773-a9a0-ffeb648cf9bf\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n input_0 = data_e62ad390_9e0a_44aa_87f8_644c33974d04\n arg = input_0\n\n # Filter for KWH month columns\n month_cols = [col for col in arg.columns if col.startswith(\"KWH \") and \"2010\" in col]\n required_cols = [\"COMMUNITY AREA NAME\"] + month_cols\n\n df = arg[required_cols].dropna()\n\n # Melt to long format\n df_long = pd.melt(\n df,\n id_vars=[\"COMMUNITY AREA NAME\"],\n value_vars=month_cols,\n var_name=\"Month\",\n value_name=\"KWH\"\n )\n\n # Extract month name (e.g., \"JANUARY\")\n df_long[\"Month\"] = df_long[\"Month\"].str.extract(r\"KWH (.+?) 2010\")[0].str.upper()\n df_long = df_long.dropna(subset=[\"Month\", \"KWH\", \"COMMUNITY AREA NAME\"])\n\n return df_long\n\n\n_curio_output = _curio_node()\n\ntry:\n result_42ba84c5_edcf_49b5_ab3b_b5658e018f60 = _curio_output\nexcept NameError:\n result_42ba84c5_edcf_49b5_ab3b_b5658e018f60 = None\n", + "metadata": { + "id": "42ba84c5-edcf-49b5-ab3b-b5658e018f60", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"e7753795-7c01-4773-a9a0-ffeb648cf9bf\",\n \"type\": \"DATA_TRANSFORMATION\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"42ba84c5-edcf-49b5-ab3b-b5658e018f60\",\n \"target\": \"e7753795-7c01-4773-a9a0-ffeb648cf9bf\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": [\n {\n \"source\": \"e7753795-7c01-4773-a9a0-ffeb648cf9bf\",\n \"target\": \"d537cc76-27b7-41b4-95dc-7ec65ec1ec42\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n input_0 = result_42ba84c5_edcf_49b5_ab3b_b5658e018f60\n arg = input_0\n\n # Get top 20 communities by average KWH\n top_20 = arg.groupby(\"COMMUNITY AREA NAME\")[\"KWH\"].mean().sort_values(ascending=False).head(20).index\n\n # Filter the long-form data\n df_top20 = arg[arg[\"COMMUNITY AREA NAME\"].isin(top_20)].copy()\n return df_top20\n\n\n_curio_output = _curio_node()\n\ntry:\n result_e7753795_7c01_4773_a9a0_ffeb648cf9bf = _curio_output\nexcept NameError:\n result_e7753795_7c01_4773_a9a0_ffeb648cf9bf = None\n", + "metadata": { + "id": "e7753795-7c01-4773-a9a0-ffeb648cf9bf", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"d537cc76-27b7-41b4-95dc-7ec65ec1ec42\",\n \"type\": \"VIS_VEGA\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"e7753795-7c01-4773-a9a0-ffeb648cf9bf\",\n \"target\": \"d537cc76-27b7-41b4-95dc-7ec65ec1ec42\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": []\n}\n\ndef _curio_node():\n\n\n input_data = result_e7753795_7c01_4773_a9a0_ffeb648cf9bf\n\n spec = {\n \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.json\",\n \"params\": [\n {\n \"name\": \"commPick\",\n \"select\": {\n \"type\": \"point\",\n \"fields\": [\"COMMUNITY AREA NAME\"]\n }\n }\n ],\n \"vconcat\": [\n {\n \"title\": \"Click on a Line to Highlight a Community\",\n \"width\": 650,\n \"height\": 400,\n \"mark\": {\n \"type\": \"line\",\n \"interpolate\": \"monotone\"\n },\n \"encoding\": {\n \"x\": {\n \"field\": \"Month\",\n \"type\": \"nominal\",\n \"sort\": [\n \"JANUARY\",\n \"FEBRUARY\",\n \"MARCH\",\n \"APRIL\",\n \"MAY\",\n \"JUNE\",\n \"JULY\",\n \"AUGUST\",\n \"SEPTEMBER\",\n \"OCTOBER\",\n \"NOVEMBER\",\n \"DECEMBER\"\n ],\n \"axis\": { \"labelAngle\": 45 }\n },\n \"y\": {\n \"field\": \"KWH\",\n \"type\": \"quantitative\",\n \"title\": \"Total KWH\",\n \"scale\": { \"zero\": False }\n },\n \"color\": {\n \"field\": \"COMMUNITY AREA NAME\",\n \"type\": \"nominal\",\n \"scale\": { \"scheme\": \"category20\" },\n \"legend\": { \"columns\": 2 }\n },\n \"opacity\": {\n \"condition\": { \"param\": \"commPick\", \"value\": 1 },\n \"value\": 0.2\n },\n \"tooltip\": [\n { \"field\": \"COMMUNITY AREA NAME\", \"title\": \"Community\" },\n { \"field\": \"Month\" },\n { \"field\": \"KWH\", \"format\": \",.0f\" }\n ]\n }\n },\n {\n \"title\": \"Average KWH of Selected Community\",\n \"width\": 650,\n \"height\": 300,\n \"mark\": \"bar\",\n \"encoding\": {\n \"y\": {\n \"field\": \"COMMUNITY AREA NAME\",\n \"type\": \"nominal\",\n \"sort\": \"-x\"\n },\n \"x\": {\n \"aggregate\": \"mean\",\n \"field\": \"KWH\",\n \"type\": \"quantitative\",\n \"title\": \"Avg KWH\"\n },\n \"color\": {\n \"field\": \"COMMUNITY AREA NAME\",\n \"type\": \"nominal\"\n }\n },\n \"transform\": [{ \"filter\": { \"param\": \"commPick\" } }]\n }\n ]\n }\n\n values = input_data\n if hasattr(input_data, \"to_dict\"):\n values = input_data.to_dict(orient=\"records\")\n\n if isinstance(spec, dict):\n spec[\"data\"] = {\"values\": values}\n\n from IPython.display import display\n display({\"application/vnd.vegalite.v5+json\": spec, \"text/plain\": spec}, raw=True)\n\n return input_data\n\n\n_curio_output = _curio_node()\n\ntry:\n result_d537cc76_27b7_41b4_95dc_7ec65ec1ec42 = _curio_output\nexcept NameError:\n result_d537cc76_27b7_41b4_95dc_7ec65ec1ec42 = None\n", + "metadata": { + "id": "d537cc76-27b7-41b4-95dc-7ec65ec1ec42", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"e3b53a07-079f-402c-82c5-69d30fe06b24\",\n \"type\": \"DATA_LOADING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [],\n \"outputs\": [\n {\n \"source\": \"e3b53a07-079f-402c-82c5-69d30fe06b24\",\n \"target\": \"03b3d67d-9bfd-466d-89c3-a616f6951f7d\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n import pandas as pd\n\n df = pd.read_csv(\"data/Energy_Usage_5000.csv\")\n\n month_cols = [col for col in df.columns if col.startswith(\"KWH \") and \"2010\" in col]\n required_cols = [\"COMMUNITY AREA NAME\"] + month_cols\n\n df = df[required_cols].dropna()\n return df\n\n_curio_output = _curio_node()\n\ntry:\n data_e3b53a07_079f_402c_82c5_69d30fe06b24 = _curio_output\nexcept NameError:\n data_e3b53a07_079f_402c_82c5_69d30fe06b24 = None\n", + "metadata": { + "id": "e3b53a07-079f-402c-82c5-69d30fe06b24", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"03b3d67d-9bfd-466d-89c3-a616f6951f7d\",\n \"type\": \"DATA_CLEANING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"e3b53a07-079f-402c-82c5-69d30fe06b24\",\n \"target\": \"03b3d67d-9bfd-466d-89c3-a616f6951f7d\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": [\n {\n \"source\": \"03b3d67d-9bfd-466d-89c3-a616f6951f7d\",\n \"target\": \"2deacd81-afd9-4521-bd35-636b30e7c755\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n input_0 = data_e3b53a07_079f_402c_82c5_69d30fe06b24\n arg = input_0\n\n df_long = pd.melt(\n arg,\n id_vars=[\"COMMUNITY AREA NAME\"],\n value_vars=[col for col in arg.columns if \"KWH\" in col],\n var_name=\"Month\",\n value_name=\"KWH\"\n )\n\n df_long[\"Month\"] = df_long[\"Month\"].str.extract(r\"KWH (.+?) 2010\")[0].str.upper()\n df_long = df_long.dropna(subset=[\"Month\", \"KWH\", \"COMMUNITY AREA NAME\"])\n\n return df_long\n\n\n_curio_output = _curio_node()\n\ntry:\n result_03b3d67d_9bfd_466d_89c3_a616f6951f7d = _curio_output\nexcept NameError:\n result_03b3d67d_9bfd_466d_89c3_a616f6951f7d = None\n", + "metadata": { + "id": "03b3d67d-9bfd-466d-89c3-a616f6951f7d", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"2deacd81-afd9-4521-bd35-636b30e7c755\",\n \"type\": \"DATA_TRANSFORMATION\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"03b3d67d-9bfd-466d-89c3-a616f6951f7d\",\n \"target\": \"2deacd81-afd9-4521-bd35-636b30e7c755\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": [\n {\n \"source\": \"2deacd81-afd9-4521-bd35-636b30e7c755\",\n \"target\": \"d7ba337c-dd7b-49f8-970b-e3114585c58b\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n input_0 = result_03b3d67d_9bfd_466d_89c3_a616f6951f7d\n arg = input_0\n\n top20_names = arg.groupby(\"COMMUNITY AREA NAME\")[\"KWH\"].mean().sort_values(ascending=False).head(20).index\n df_top20 = arg[arg[\"COMMUNITY AREA NAME\"].isin(top20_names)].copy()\n\n return df_top20\n\n\n_curio_output = _curio_node()\n\ntry:\n result_2deacd81_afd9_4521_bd35_636b30e7c755 = _curio_output\nexcept NameError:\n result_2deacd81_afd9_4521_bd35_636b30e7c755 = None\n", + "metadata": { + "id": "2deacd81-afd9-4521-bd35-636b30e7c755", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"d7ba337c-dd7b-49f8-970b-e3114585c58b\",\n \"type\": \"VIS_VEGA\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"2deacd81-afd9-4521-bd35-636b30e7c755\",\n \"target\": \"d7ba337c-dd7b-49f8-970b-e3114585c58b\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": []\n}\n\ndef _curio_node():\n\n\n input_data = result_2deacd81_afd9_4521_bd35_636b30e7c755\n\n spec = {\n \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.json\",\n \"vconcat\": [\n {\n \"title\": \"Monthly Average Energy Usage (Brush to Select Months)\",\n \"params\": [\n {\n \"name\": \"monthBrush\",\n \"select\": {\n \"type\": \"interval\",\n \"encodings\": [\"x\"]\n }\n }\n ],\n \"mark\": \"bar\",\n \"encoding\": {\n \"x\": {\n \"field\": \"Month\",\n \"type\": \"ordinal\",\n \"scale\": {\n \"domain\": [\n \"JANUARY\",\n \"FEBRUARY\",\n \"MARCH\",\n \"APRIL\",\n \"MAY\",\n \"JUNE\",\n \"JULY\",\n \"AUGUST\",\n \"SEPTEMBER\",\n \"OCTOBER\",\n \"NOVEMBER\",\n \"DECEMBER\"\n ]\n },\n \"axis\": {\n \"labelAngle\": -40,\n \"labelFontSize\": 11\n }\n },\n \"y\": {\n \"aggregate\": \"mean\",\n \"field\": \"KWH\",\n \"type\": \"quantitative\",\n \"title\": \"Avg KWH\"\n },\n \"tooltip\": [\n { \"field\": \"Month\" },\n { \"aggregate\": \"mean\", \"field\": \"KWH\", \"title\": \"Avg KWH\" }\n ],\n \"color\": {\n \"value\": \"#4C78A8\"\n }\n }\n },\n {\n \"title\": \"Avg KWH by Community (Filtered by Selected Months)\",\n \"transform\": [\n {\n \"filter\": { \"param\": \"monthBrush\" }\n }\n ],\n \"mark\": \"bar\",\n \"encoding\": {\n \"y\": {\n \"field\": \"COMMUNITY AREA NAME\",\n \"type\": \"nominal\",\n \"sort\": \"-x\"\n },\n \"x\": {\n \"aggregate\": \"mean\",\n \"field\": \"KWH\",\n \"type\": \"quantitative\",\n \"title\": \"Avg KWH\"\n },\n \"color\": {\n \"field\": \"COMMUNITY AREA NAME\",\n \"type\": \"nominal\"\n },\n \"tooltip\": [\n { \"field\": \"COMMUNITY AREA NAME\" },\n { \"aggregate\": \"mean\", \"field\": \"KWH\", \"title\": \"Avg KWH\" }\n ]\n }\n }\n ]\n }\n\n values = input_data\n if hasattr(input_data, \"to_dict\"):\n values = input_data.to_dict(orient=\"records\")\n\n if isinstance(spec, dict):\n spec[\"data\"] = {\"values\": values}\n\n from IPython.display import display\n display({\"application/vnd.vegalite.v5+json\": spec, \"text/plain\": spec}, raw=True)\n\n return input_data\n\n\n_curio_output = _curio_node()\n\ntry:\n result_d7ba337c_dd7b_49f8_970b_e3114585c58b = _curio_output\nexcept NameError:\n result_d7ba337c_dd7b_49f8_970b_e3114585c58b = None\n", + "metadata": { + "id": "d7ba337c-dd7b-49f8-970b-e3114585c58b", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"f5b36cc1-63de-4c10-aca9-c28dd2f3ba3a\",\n \"type\": \"DATA_LOADING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [],\n \"outputs\": [\n {\n \"source\": \"f5b36cc1-63de-4c10-aca9-c28dd2f3ba3a\",\n \"target\": \"5c3fa75a-a919-432b-9e95-83e6f1691c8d\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n import pandas as pd\n\n df = pd.read_csv(\"data/Energy_Usage_5000.csv\")\n\n columns_needed = [\"AVERAGE STORIES\", \"AVERAGE BUILDING AGE\", \"TOTAL KWH\"] + [col for col in df.columns if col.startswith(\"KWH \") and \"2010\" in col]\n\n df = df[columns_needed].dropna()\n return df\n\n_curio_output = _curio_node()\n\ntry:\n data_f5b36cc1_63de_4c10_aca9_c28dd2f3ba3a = _curio_output\nexcept NameError:\n data_f5b36cc1_63de_4c10_aca9_c28dd2f3ba3a = None\n", + "metadata": { + "id": "f5b36cc1-63de-4c10-aca9-c28dd2f3ba3a", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"5c3fa75a-a919-432b-9e95-83e6f1691c8d\",\n \"type\": \"DATA_CLEANING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"f5b36cc1-63de-4c10-aca9-c28dd2f3ba3a\",\n \"target\": \"5c3fa75a-a919-432b-9e95-83e6f1691c8d\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": [\n {\n \"source\": \"5c3fa75a-a919-432b-9e95-83e6f1691c8d\",\n \"target\": \"ffa68346-1c37-48f0-9284-7d34a397692f\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n input_0 = data_f5b36cc1_63de_4c10_aca9_c28dd2f3ba3a\n arg = input_0\n\n def story_bracket(stories):\n if stories <= 1:\n return \"1 story\"\n elif stories == 2:\n return \"2 stories\"\n elif 3 <= stories <= 5:\n return \"3-5 stories\"\n elif 6 <= stories <= 10:\n return \"6-10 stories\"\n else:\n return \"11+ stories\"\n\n def age_bracket(age):\n if age <= 20:\n return \"0-20 yrs\"\n elif age <= 40:\n return \"21-40 yrs\"\n elif age <= 60:\n return \"41-60 yrs\"\n elif age <= 80:\n return \"61-80 yrs\"\n else:\n return \"81+ yrs\"\n\n df = arg.copy()\n\n df[\"STORY BRACKET\"] = df[\"AVERAGE STORIES\"].apply(story_bracket)\n df[\"AGE BRACKET\"] = df[\"AVERAGE BUILDING AGE\"].apply(age_bracket)\n return df\n\n\n_curio_output = _curio_node()\n\ntry:\n result_5c3fa75a_a919_432b_9e95_83e6f1691c8d = _curio_output\nexcept NameError:\n result_5c3fa75a_a919_432b_9e95_83e6f1691c8d = None\n", + "metadata": { + "id": "5c3fa75a-a919-432b-9e95-83e6f1691c8d", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"ffa68346-1c37-48f0-9284-7d34a397692f\",\n \"type\": \"DATA_TRANSFORMATION\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"5c3fa75a-a919-432b-9e95-83e6f1691c8d\",\n \"target\": \"ffa68346-1c37-48f0-9284-7d34a397692f\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": [\n {\n \"source\": \"ffa68346-1c37-48f0-9284-7d34a397692f\",\n \"target\": \"82ef1d71-15c8-481c-a61b-eb172822f7a6\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n input_0 = result_5c3fa75a_a919_432b_9e95_83e6f1691c8d\n arg = input_0\n\n import pandas as pd\n\n df_long = pd.melt(\n arg,\n id_vars=[\"STORY BRACKET\", \"AGE BRACKET\", \"TOTAL KWH\"],\n value_vars=[col for col in arg.columns if col.startswith(\"KWH \")],\n var_name=\"Month\",\n value_name=\"KWH\"\n )\n\n df_long[\"Month\"] = df_long[\"Month\"].str.extract(r\"KWH (.+?) 2010\")[0].str.upper()\n df_long = df_long.dropna(subset=[\"Month\", \"KWH\", \"STORY BRACKET\", \"AGE BRACKET\"])\n\n return df_long\n\n\n_curio_output = _curio_node()\n\ntry:\n result_ffa68346_1c37_48f0_9284_7d34a397692f = _curio_output\nexcept NameError:\n result_ffa68346_1c37_48f0_9284_7d34a397692f = None\n", + "metadata": { + "id": "ffa68346-1c37-48f0-9284-7d34a397692f", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"82ef1d71-15c8-481c-a61b-eb172822f7a6\",\n \"type\": \"VIS_VEGA\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"ffa68346-1c37-48f0-9284-7d34a397692f\",\n \"target\": \"82ef1d71-15c8-481c-a61b-eb172822f7a6\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": []\n}\n\ndef _curio_node():\n\n\n input_data = result_ffa68346_1c37_48f0_9284_7d34a397692f\n\n spec = {\n \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.json\",\n \"params\": [\n {\n \"name\": \"storySelect\",\n \"bind\": {\n \"input\": \"select\",\n \"options\": [\n \"1 story\",\n \"2 stories\",\n \"3-5 stories\",\n \"6-10 stories\",\n \"11+ stories\"\n ]\n },\n \"value\": \"1 story\"\n }\n ],\n \"vconcat\": [\n {\n \"width\": 600,\n \"title\": {\n \"text\": \"Distribution of Total KWH by Age (Box Plot)\",\n \"align\": \"center\"\n },\n \"transform\": [{ \"filter\": \"datum['STORY BRACKET'] == storySelect\" }],\n \"mark\": \"boxplot\",\n \"encoding\": {\n \"x\": {\n \"field\": \"AGE BRACKET\",\n \"type\": \"nominal\",\n \"sort\": [\"0-20 yrs\", \"21-40 yrs\", \"41-60 yrs\", \"61-80 yrs\", \"81+ yrs\"]\n },\n \"y\": {\n \"field\": \"TOTAL KWH\",\n \"type\": \"quantitative\",\n \"title\": \"Total KWH\"\n },\n \"color\": {\n \"field\": \"AGE BRACKET\",\n \"type\": \"nominal\",\n \"legend\": {\n \"orient\": \"right\",\n \"anchor\": \"middle\",\n \"direction\": \"vertical\"\n }\n },\n \"tooltip\": [{ \"field\": \"AGE BRACKET\" }, { \"field\": \"TOTAL KWH\" }]\n }\n },\n {\n \"width\": 600,\n \"title\": {\n \"text\": \"Monthly Avg KWH Trend by Age (for Selected Stories)\",\n \"align\": \"center\"\n },\n \"transform\": [{ \"filter\": \"datum['STORY BRACKET'] == storySelect\" }],\n \"mark\": { \"type\": \"line\", \"point\": True },\n \"encoding\": {\n \"x\": {\n \"field\": \"Month\",\n \"type\": \"ordinal\",\n \"sort\": [\n \"JANUARY\",\n \"FEBRUARY\",\n \"MARCH\",\n \"APRIL\",\n \"MAY\",\n \"JUNE\",\n \"JULY\",\n \"AUGUST\",\n \"SEPTEMBER\",\n \"OCTOBER\",\n \"NOVEMBER\",\n \"DECEMBER\"\n ]\n },\n \"y\": {\n \"aggregate\": \"mean\",\n \"field\": \"KWH\",\n \"type\": \"quantitative\",\n \"title\": \"Avg Monthly KWH\"\n },\n \"color\": {\n \"field\": \"AGE BRACKET\",\n \"type\": \"nominal\",\n \"legend\": {\n \"orient\": \"right\",\n \"anchor\": \"middle\",\n \"direction\": \"vertical\"\n }\n },\n \"tooltip\": [\n { \"field\": \"Month\" },\n { \"aggregate\": \"mean\", \"field\": \"KWH\" },\n { \"field\": \"AGE BRACKET\" }\n ]\n }\n }\n ],\n \"config\": {\n \"concat\": { \"align\": \"center\" }\n }\n }\n\n values = input_data\n if hasattr(input_data, \"to_dict\"):\n values = input_data.to_dict(orient=\"records\")\n\n if isinstance(spec, dict):\n spec[\"data\"] = {\"values\": values}\n\n from IPython.display import display\n display({\"application/vnd.vegalite.v5+json\": spec, \"text/plain\": spec}, raw=True)\n\n return input_data\n\n\n_curio_output = _curio_node()\n\ntry:\n result_82ef1d71_15c8_481c_a61b_eb172822f7a6 = _curio_output\nexcept NameError:\n result_82ef1d71_15c8_481c_a61b_eb172822f7a6 = None\n", + "metadata": { + "id": "82ef1d71-15c8-481c-a61b-eb172822f7a6", + "language": "python" + } + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/docs/examples/notebooks/example2-notebook.ipynb b/docs/examples/notebooks/example2-notebook.ipynb new file mode 100644 index 00000000..768fd662 --- /dev/null +++ b/docs/examples/notebooks/example2-notebook.ipynb @@ -0,0 +1,120 @@ +{ + "cells": [ + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"58ded8d4-4db6-40ff-bfef-a195997b9865\",\n \"type\": \"DATA_LOADING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [],\n \"outputs\": [\n {\n \"source\": \"58ded8d4-4db6-40ff-bfef-a195997b9865\",\n \"target\": \"c152be2d-20c5-4a0b-a05e-275e0d12d317\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n },\n {\n \"id\": \"edge_in_0_58ded8d4_4db6_40ff_bfef_a195997b9865_c93d6a9c_14ac_4c69_83d2_19e87e99dae0\",\n \"source\": \"58ded8d4-4db6-40ff-bfef-a195997b9865\",\n \"target\": \"c93d6a9c-14ac-4c69-83d2-19e87e99dae0\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in_0\"\n },\n {\n \"source\": \"58ded8d4-4db6-40ff-bfef-a195997b9865\",\n \"target\": \"6f726f1a-700f-49bf-b6eb-8176c7f559e9\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n # load box\n import utk\n import pandas as pd\n\n uc = utk.OSM.load([42.336844, -71.113459, 42.345559, -71.099216], layers=[{'name':'buildings', 'args': {'sizeCells': 5}}, {'name':'surface', 'args': {'sizeCells': 5}}, 'parks'])\n\n # buildings\n gdf_buildings = uc.layers['gdf']['sections'][0]\n gdf_buildings['thematic'] = 0.5\n gdf_buildings.metadata = {\n 'name': 'buildings'\n }\n\n #surface\n json_surface = uc.layers['json'][1]\n gdf_surface = uc.layers['gdf']['objects'][1]\n gdf_surface.metadata = {\n 'name': 'surface'\n }\n\n #parks\n json_parks = uc.layers['json'][2]\n gdf_parks = uc.layers['gdf']['objects'][2]\n gdf_parks.metadata = {\n 'name': 'parks'\n }\n\n # Wrap JSON layers in DataFrames (DATA_LOADING supports DataFrame/GeoDataFrame output)\n df_json_surface = pd.DataFrame({'json_data': [json_surface]})\n df_json_parks = pd.DataFrame({'json_data': [json_parks]})\n\n return (df_json_surface, df_json_parks, gdf_buildings, gdf_surface, gdf_parks)\n\n_curio_output = _curio_node()\n\ntry:\n data_58ded8d4_4db6_40ff_bfef_a195997b9865 = _curio_output\nexcept NameError:\n data_58ded8d4_4db6_40ff_bfef_a195997b9865 = None\n", + "metadata": { + "id": "58ded8d4-4db6-40ff-bfef-a195997b9865", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"c152be2d-20c5-4a0b-a05e-275e0d12d317\",\n \"type\": \"COMPUTATION_ANALYSIS\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"58ded8d4-4db6-40ff-bfef-a195997b9865\",\n \"target\": \"c152be2d-20c5-4a0b-a05e-275e0d12d317\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": [\n {\n \"source\": \"c152be2d-20c5-4a0b-a05e-275e0d12d317\",\n \"target\": \"4afbbd11-ef72-406d-a99a-5bc29c6a17c1\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n input_0 = data_58ded8d4_4db6_40ff_bfef_a195997b9865\n arg = input_0\n\n return arg[2]\n\n\n_curio_output = _curio_node()\n\ntry:\n result_c152be2d_20c5_4a0b_a05e_275e0d12d317 = _curio_output\nexcept NameError:\n result_c152be2d_20c5_4a0b_a05e_275e0d12d317 = None\n", + "metadata": { + "id": "c152be2d-20c5-4a0b-a05e-275e0d12d317", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"4afbbd11-ef72-406d-a99a-5bc29c6a17c1\",\n \"type\": \"DATA_POOL\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"c152be2d-20c5-4a0b-a05e-275e0d12d317\",\n \"target\": \"4afbbd11-ef72-406d-a99a-5bc29c6a17c1\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n },\n {\n \"source\": \"75eb6da0-7ae4-4af5-9faa-fbf986d1a0f4\",\n \"target\": \"4afbbd11-ef72-406d-a99a-5bc29c6a17c1\",\n \"sourceHandle\": \"in/out\",\n \"targetHandle\": \"in/out\",\n \"type\": \"Interaction\"\n }\n ],\n \"outputs\": [\n {\n \"source\": \"4afbbd11-ef72-406d-a99a-5bc29c6a17c1\",\n \"target\": \"7f3cc834-2879-4bc3-bc58-839602f4b3e7\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n },\n {\n \"source\": \"4afbbd11-ef72-406d-a99a-5bc29c6a17c1\",\n \"target\": \"75eb6da0-7ae4-4af5-9faa-fbf986d1a0f4\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n\n return result_c152be2d_20c5_4a0b_a05e_275e0d12d317\n\n\n_curio_output = _curio_node()\n\ntry:\n pool_4afbbd11_ef72_406d_a99a_5bc29c6a17c1 = _curio_output\nexcept NameError:\n pool_4afbbd11_ef72_406d_a99a_5bc29c6a17c1 = None\n", + "metadata": { + "id": "4afbbd11-ef72-406d-a99a-5bc29c6a17c1", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"75eb6da0-7ae4-4af5-9faa-fbf986d1a0f4\",\n \"type\": \"VIS_UTK\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"4afbbd11-ef72-406d-a99a-5bc29c6a17c1\",\n \"target\": \"75eb6da0-7ae4-4af5-9faa-fbf986d1a0f4\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": [\n {\n \"source\": \"75eb6da0-7ae4-4af5-9faa-fbf986d1a0f4\",\n \"target\": \"4afbbd11-ef72-406d-a99a-5bc29c6a17c1\",\n \"sourceHandle\": \"in/out\",\n \"targetHandle\": \"in/out\",\n \"type\": \"Interaction\"\n }\n ]\n}\n\ndef _curio_node():\n\n input_0 = pool_4afbbd11_ef72_406d_a99a_5bc29c6a17c1\n\n\n # Configure UTK for serverless/notebook environment\n import utk\n import json\n from IPython.display import HTML, Javascript, display\n\n utk.Environment.serverless = True\n\n # Create grammar structure\n grammar = {\n \"components\": [{\n \"id\": \"notebook_map\",\n \"json\": {\n \"camera\": {\n \"wEye\": [0, 0, 1000],\n \"wLookAt\": [0, 0, 0],\n \"wUp\": [0, 1, 0]\n },\n \"grid\": {\"width\": 12, \"height\": 4},\n \"knots\": [],\n \"map_style\": [],\n \"widgets\": [{\n \"type\": \"TOGGLE_KNOT\"\n }]\n },\n \"position\": {\"x\": 0, \"y\": 0, \"width\": 12, \"height\": 4}\n }],\n \"grid\": {\"width\": 12, \"height\": 4},\n \"knots\": []\n }\n\n # If content has grammar, parse and merge it\n grammar_content = \"\"\"{}\"\"\".strip()\n if grammar_content and grammar_content != \"{}\":\n try:\n parsed_grammar = json.loads(grammar_content)\n # Merge parsed grammar with our structure\n if \"components\" in parsed_grammar:\n grammar[\"components\"][0][\"json\"].update(parsed_grammar.get(\"json\", {}))\n if \"knots\" in parsed_grammar:\n grammar[\"knots\"] = parsed_grammar[\"knots\"]\n except json.JSONDecodeError:\n pass\n\n # Load geospatial data if available\n geospatial_data = None\n if input_0 is not None:\n data_input = input_0\n # Handle multi-input case\n if isinstance(data_input, list):\n data_input = data_input[0] if data_input else None\n \n if data_input is not None:\n # Check if it's a geodataframe\n try:\n import geopandas as gpd\n if isinstance(data_input, gpd.GeoDataFrame):\n # Convert to GeoJSON\n geojson_data = json.loads(data_input.to_json())\n geospatial_data = utk.physical_from_geojson(geojson_data)\n \n # Add layers to grammar\n if geospatial_data and \"components\" in grammar:\n if \"layers\" not in grammar[\"components\"][0][\"json\"]:\n grammar[\"components\"][0][\"json\"][\"layers\"] = []\n # Add layer for the geospatial data\n grammar[\"components\"][0][\"json\"][\"layers\"].append({\n \"type\": \"geospatial\",\n \"data\": geospatial_data.to_dict() if hasattr(geospatial_data, 'to_dict') else geospatial_data\n })\n except Exception as e:\n pass\n\n # Create HTML container\n html_container = f'
'\n display(HTML(html_container))\n\n # Initialize UTK in browser\n js_initialization = f\"\"\"\n require(['utk'], function(utk) {{\n utk.Environment.serverless = true;\n const container = document.getElementById('utk-container-75eb6da0');\n const grammar = {json.dumps(grammar)};\n \n try {{\n const interpreter = new utk.GrammarInterpreter('notebook', grammar, container);\n // Store reference for potential interactions\n window._utk_interpreter_75eb6da0 = interpreter;\n }} catch(e) {{\n console.error('UTK initialization error:', e);\n container.innerHTML = '
Error initializing UTK visualization
';\n }}\n }});\n \"\"\"\n display(Javascript(js_initialization))\n\n\n_curio_output = _curio_node()\n\ntry:\n result_75eb6da0_7ae4_4af5_9faa_fbf986d1a0f4 = _curio_output\n result_75eb6da0_7ae4_4af5_9faa_fbf986d1a0f4_in_out = result_75eb6da0_7ae4_4af5_9faa_fbf986d1a0f4\nexcept NameError:\n result_75eb6da0_7ae4_4af5_9faa_fbf986d1a0f4 = None\n result_75eb6da0_7ae4_4af5_9faa_fbf986d1a0f4_in_out = None\n\nfrom IPython.display import display\ndisplay(utk-container-75eb6da0)\n", + "metadata": { + "id": "75eb6da0-7ae4-4af5-9faa-fbf986d1a0f4", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"7f3cc834-2879-4bc3-bc58-839602f4b3e7\",\n \"type\": \"DATA_TRANSFORMATION\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"4afbbd11-ef72-406d-a99a-5bc29c6a17c1\",\n \"target\": \"7f3cc834-2879-4bc3-bc58-839602f4b3e7\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": [\n {\n \"id\": \"edge_in_1_7f3cc834_2879_4bc3_bc58_839602f4b3e7_c93d6a9c_14ac_4c69_83d2_19e87e99dae0\",\n \"source\": \"7f3cc834-2879-4bc3-bc58-839602f4b3e7\",\n \"target\": \"c93d6a9c-14ac-4c69-83d2-19e87e99dae0\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in_1\"\n }\n ]\n}\n\ndef _curio_node():\n\n input_0 = pool_4afbbd11_ef72_406d_a99a_5bc29c6a17c1\n arg = input_0\n\n gdf = arg\n\n gdf.loc[gdf['interacted'] == '1', 'height'] *= [!! Height Multiplier$INPUT_VALUE$14 !!]\n\n gdf.metadata = {\n 'name': 'buildings'\n }\n\n return gdf\n\n\n_curio_output = _curio_node()\n\ntry:\n result_7f3cc834_2879_4bc3_bc58_839602f4b3e7 = _curio_output\nexcept NameError:\n result_7f3cc834_2879_4bc3_bc58_839602f4b3e7 = None\n", + "metadata": { + "id": "7f3cc834-2879-4bc3-bc58-839602f4b3e7", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"c93d6a9c-14ac-4c69-83d2-19e87e99dae0\",\n \"type\": \"MERGE_FLOW\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"id\": \"edge_in_0_58ded8d4_4db6_40ff_bfef_a195997b9865_c93d6a9c_14ac_4c69_83d2_19e87e99dae0\",\n \"source\": \"58ded8d4-4db6-40ff-bfef-a195997b9865\",\n \"target\": \"c93d6a9c-14ac-4c69-83d2-19e87e99dae0\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in_0\"\n },\n {\n \"id\": \"edge_in_1_7f3cc834_2879_4bc3_bc58_839602f4b3e7_c93d6a9c_14ac_4c69_83d2_19e87e99dae0\",\n \"source\": \"7f3cc834-2879-4bc3-bc58-839602f4b3e7\",\n \"target\": \"c93d6a9c-14ac-4c69-83d2-19e87e99dae0\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in_1\"\n }\n ],\n \"outputs\": [\n {\n \"source\": \"c93d6a9c-14ac-4c69-83d2-19e87e99dae0\",\n \"target\": \"ffca6c56-c04c-4d65-a8ca-ec40eb1fae99\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n\n inputs = [\n data_58ded8d4_4db6_40ff_bfef_a195997b9865,\n result_7f3cc834_2879_4bc3_bc58_839602f4b3e7\n ]\n\n merged_inputs = [i for i in inputs if i is not None]\n\n return merged_inputs\n\n\n_curio_output = _curio_node()\n\ntry:\n merged_c93d6a9c_14ac_4c69_83d2_19e87e99dae0 = _curio_output\nexcept NameError:\n merged_c93d6a9c_14ac_4c69_83d2_19e87e99dae0 = None\n", + "metadata": { + "id": "c93d6a9c-14ac-4c69-83d2-19e87e99dae0", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"ffca6c56-c04c-4d65-a8ca-ec40eb1fae99\",\n \"type\": \"COMPUTATION_ANALYSIS\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"c93d6a9c-14ac-4c69-83d2-19e87e99dae0\",\n \"target\": \"ffca6c56-c04c-4d65-a8ca-ec40eb1fae99\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": [\n {\n \"source\": \"ffca6c56-c04c-4d65-a8ca-ec40eb1fae99\",\n \"target\": \"9075a191-c09b-440b-ae42-5e618261506e\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n input_0 = merged_c93d6a9c_14ac_4c69_83d2_19e87e99dae0\n arg = input_0\n\n import utk\r\n\r\n json_surface = arg[0][0].iloc[0]['json_data']\r\n gdf_surface = arg[0][3]\r\n json_parks = arg[0][1].iloc[0]['json_data']\r\n gdf_parks = arg[0][4]\r\n\r\n gdf_buildings = arg[1]\r\n\r\n json_layers = [json_surface]\r\n # buildings json layer\r\n gdf_buildings = gdf_buildings.set_crs('4326')\r\n mesh = utk.OSM.mesh_from_buildings_gdf(gdf_buildings, 5)['data']\r\n\r\n json_buildings = {\r\n 'id': 'buildings',\r\n 'type': 'BUILDINGS_LAYER',\r\n 'renderStyle': ['SMOOTH_COLOR_MAP_TEX'],\r\n 'styleKey': 'building',\r\n 'data': mesh\r\n }\r\n\r\n json_layers.append(json_buildings)\r\n\r\n shadow = utk.data.shadow(json_layers, [[[!! Start date$INPUT_TEXT$12/26/2015 10:00 !!], [!! End date$INPUT_TEXT$12/26/2015 16:01 !!]]])\r\n\r\n thematic_layers = shadow.get_shadow_by_layer()\r\n\r\n building_index = -1\r\n current_building_id = -1\r\n\r\n values_per_row = []\r\n\r\n for index, row in gdf_buildings.iterrows():\r\n if(row['building_id'] != current_building_id):\r\n current_building_id = row['building_id']\r\n building_index += 1\r\n\r\n values_per_row.append(thematic_layers['shadow0_buildings']['values'][building_index])\r\n\r\n gdf_buildings[\"shadow0_buildings\"] = values_per_row\r\n\r\n gdf_buildings.metadata = {\r\n 'name': 'buildings'\r\n }\r\n\r\n values_per_row = []\r\n\r\n for index, row in gdf_surface.iterrows():\r\n values_per_row.append(thematic_layers['shadow0_surface']['values'][index])\r\n\r\n gdf_surface[\"shadow0_surface\"] = values_per_row\r\n gdf_surface[\"surface_id\"] = 0 # surface is a single big bounding box\r\n\r\n gdf_surface.metadata = {\r\n 'name': 'surface'\r\n }\r\n\r\n return (gdf_surface, gdf_parks, gdf_buildings)\n\n\n_curio_output = _curio_node()\n\ntry:\n result_ffca6c56_c04c_4d65_a8ca_ec40eb1fae99 = _curio_output\nexcept NameError:\n result_ffca6c56_c04c_4d65_a8ca_ec40eb1fae99 = None\n", + "metadata": { + "id": "ffca6c56-c04c-4d65-a8ca-ec40eb1fae99", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"9075a191-c09b-440b-ae42-5e618261506e\",\n \"type\": \"VIS_UTK\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"ffca6c56-c04c-4d65-a8ca-ec40eb1fae99\",\n \"target\": \"9075a191-c09b-440b-ae42-5e618261506e\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": [\n {\n \"id\": \"edge_in_1_9075a191_c09b_440b_ae42_5e618261506e_98352a3b_23fd_4fa0_8a5f_e846e4076b76\",\n \"source\": \"9075a191-c09b-440b-ae42-5e618261506e\",\n \"target\": \"98352a3b-23fd-4fa0-8a5f-e846e4076b76\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in_1\"\n }\n ]\n}\n\ndef _curio_node():\n\n input_0 = result_ffca6c56_c04c_4d65_a8ca_ec40eb1fae99\n\n\n # Configure UTK for serverless/notebook environment\n import utk\n import json\n from IPython.display import HTML, Javascript, display\n\n utk.Environment.serverless = True\n\n # Create grammar structure\n grammar = {\n \"components\": [{\n \"id\": \"notebook_map\",\n \"json\": {\n \"camera\": {\n \"wEye\": [0, 0, 1000],\n \"wLookAt\": [0, 0, 0],\n \"wUp\": [0, 1, 0]\n },\n \"grid\": {\"width\": 12, \"height\": 4},\n \"knots\": [],\n \"map_style\": [],\n \"widgets\": [{\n \"type\": \"TOGGLE_KNOT\"\n }]\n },\n \"position\": {\"x\": 0, \"y\": 0, \"width\": 12, \"height\": 4}\n }],\n \"grid\": {\"width\": 12, \"height\": 4},\n \"knots\": []\n }\n\n # If content has grammar, parse and merge it\n grammar_content = \"\"\"{}\"\"\".strip()\n if grammar_content and grammar_content != \"{}\":\n try:\n parsed_grammar = json.loads(grammar_content)\n # Merge parsed grammar with our structure\n if \"components\" in parsed_grammar:\n grammar[\"components\"][0][\"json\"].update(parsed_grammar.get(\"json\", {}))\n if \"knots\" in parsed_grammar:\n grammar[\"knots\"] = parsed_grammar[\"knots\"]\n except json.JSONDecodeError:\n pass\n\n # Load geospatial data if available\n geospatial_data = None\n if input_0 is not None:\n data_input = input_0\n # Handle multi-input case\n if isinstance(data_input, list):\n data_input = data_input[0] if data_input else None\n \n if data_input is not None:\n # Check if it's a geodataframe\n try:\n import geopandas as gpd\n if isinstance(data_input, gpd.GeoDataFrame):\n # Convert to GeoJSON\n geojson_data = json.loads(data_input.to_json())\n geospatial_data = utk.physical_from_geojson(geojson_data)\n \n # Add layers to grammar\n if geospatial_data and \"components\" in grammar:\n if \"layers\" not in grammar[\"components\"][0][\"json\"]:\n grammar[\"components\"][0][\"json\"][\"layers\"] = []\n # Add layer for the geospatial data\n grammar[\"components\"][0][\"json\"][\"layers\"].append({\n \"type\": \"geospatial\",\n \"data\": geospatial_data.to_dict() if hasattr(geospatial_data, 'to_dict') else geospatial_data\n })\n except Exception as e:\n pass\n\n # Create HTML container\n html_container = f'
'\n display(HTML(html_container))\n\n # Initialize UTK in browser\n js_initialization = f\"\"\"\n require(['utk'], function(utk) {{\n utk.Environment.serverless = true;\n const container = document.getElementById('utk-container-9075a191');\n const grammar = {json.dumps(grammar)};\n \n try {{\n const interpreter = new utk.GrammarInterpreter('notebook', grammar, container);\n // Store reference for potential interactions\n window._utk_interpreter_9075a191 = interpreter;\n }} catch(e) {{\n console.error('UTK initialization error:', e);\n container.innerHTML = '
Error initializing UTK visualization
';\n }}\n }});\n \"\"\"\n display(Javascript(js_initialization))\n\n\n_curio_output = _curio_node()\n\ntry:\n result_9075a191_c09b_440b_ae42_5e618261506e = _curio_output\nexcept NameError:\n result_9075a191_c09b_440b_ae42_5e618261506e = None\n\nfrom IPython.display import display\ndisplay(utk-container-9075a191)\n", + "metadata": { + "id": "9075a191-c09b-440b-ae42-5e618261506e", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"6f726f1a-700f-49bf-b6eb-8176c7f559e9\",\n \"type\": \"COMPUTATION_ANALYSIS\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"58ded8d4-4db6-40ff-bfef-a195997b9865\",\n \"target\": \"6f726f1a-700f-49bf-b6eb-8176c7f559e9\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": [\n {\n \"source\": \"6f726f1a-700f-49bf-b6eb-8176c7f559e9\",\n \"target\": \"03e0e49a-f493-4af1-95e8-9f004af838e0\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n input_0 = data_58ded8d4_4db6_40ff_bfef_a195997b9865\n arg = input_0\n\n import utk\r\n\r\n json_surface = arg[0].iloc[0]['json_data']\r\n json_parks = arg[1].iloc[0]['json_data']\r\n gdf_buildings = arg[2]\r\n gdf_surface = arg[3]\r\n gdf_parks = arg[4]\r\n\r\n json_layers = [json_surface]\r\n\r\n # buildings json layer\r\n gdf_buildings = gdf_buildings.set_crs('4326')\r\n mesh = utk.OSM.mesh_from_buildings_gdf(gdf_buildings, 5)['data']\r\n\r\n json_buildings = {\r\n 'id': 'buildings',\r\n 'type': 'BUILDINGS_LAYER',\r\n 'renderStyle': ['SMOOTH_COLOR_MAP_TEX'],\r\n 'styleKey': 'building',\r\n 'data': mesh\r\n }\r\n\r\n json_layers.append(json_buildings)\r\n\r\n shadow = utk.data.shadow(json_layers, [[[!! Start date$INPUT_TEXT$12/26/2015 10:00 !!], [!! End date$INPUT_TEXT$12/26/2015 16:01 !!]]])\r\n\r\n thematic_layers = shadow.get_shadow_by_layer()\r\n\r\n building_index = -1\r\n current_building_id = -1\r\n\r\n values_per_row = []\r\n\r\n for index, row in gdf_buildings.iterrows():\r\n if(row['building_id'] != current_building_id):\r\n current_building_id = row['building_id']\r\n building_index += 1\r\n\r\n values_per_row.append(thematic_layers['shadow0_buildings']['values'][building_index])\r\n\r\n gdf_buildings[\"shadow0_buildings\"] = values_per_row\r\n\r\n gdf_buildings.metadata = {\r\n 'name': 'buildings'\r\n }\r\n\r\n values_per_row = []\r\n\r\n for index, row in gdf_surface.iterrows():\r\n values_per_row.append(thematic_layers['shadow0_surface']['values'][index])\r\n\r\n gdf_surface[\"shadow0_surface\"] = values_per_row\r\n gdf_surface[\"surface_id\"] = 0 # surface is a single big bounding box\r\n\r\n gdf_surface.metadata = {\r\n 'name': 'surface'\r\n }\r\n\r\n return (gdf_surface, gdf_parks, gdf_buildings)\n\n\n_curio_output = _curio_node()\n\ntry:\n result_6f726f1a_700f_49bf_b6eb_8176c7f559e9 = _curio_output\nexcept NameError:\n result_6f726f1a_700f_49bf_b6eb_8176c7f559e9 = None\n", + "metadata": { + "id": "6f726f1a-700f-49bf-b6eb-8176c7f559e9", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"03e0e49a-f493-4af1-95e8-9f004af838e0\",\n \"type\": \"VIS_UTK\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"6f726f1a-700f-49bf-b6eb-8176c7f559e9\",\n \"target\": \"03e0e49a-f493-4af1-95e8-9f004af838e0\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": [\n {\n \"id\": \"edge_in_0_03e0e49a_f493_4af1_95e8_9f004af838e0_98352a3b_23fd_4fa0_8a5f_e846e4076b76\",\n \"source\": \"03e0e49a-f493-4af1-95e8-9f004af838e0\",\n \"target\": \"98352a3b-23fd-4fa0-8a5f-e846e4076b76\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in_0\"\n }\n ]\n}\n\ndef _curio_node():\n\n input_0 = result_6f726f1a_700f_49bf_b6eb_8176c7f559e9\n\n\n # Configure UTK for serverless/notebook environment\n import utk\n import json\n from IPython.display import HTML, Javascript, display\n\n utk.Environment.serverless = True\n\n # Create grammar structure\n grammar = {\n \"components\": [{\n \"id\": \"notebook_map\",\n \"json\": {\n \"camera\": {\n \"wEye\": [0, 0, 1000],\n \"wLookAt\": [0, 0, 0],\n \"wUp\": [0, 1, 0]\n },\n \"grid\": {\"width\": 12, \"height\": 4},\n \"knots\": [],\n \"map_style\": [],\n \"widgets\": [{\n \"type\": \"TOGGLE_KNOT\"\n }]\n },\n \"position\": {\"x\": 0, \"y\": 0, \"width\": 12, \"height\": 4}\n }],\n \"grid\": {\"width\": 12, \"height\": 4},\n \"knots\": []\n }\n\n # If content has grammar, parse and merge it\n grammar_content = \"\"\"{}\"\"\".strip()\n if grammar_content and grammar_content != \"{}\":\n try:\n parsed_grammar = json.loads(grammar_content)\n # Merge parsed grammar with our structure\n if \"components\" in parsed_grammar:\n grammar[\"components\"][0][\"json\"].update(parsed_grammar.get(\"json\", {}))\n if \"knots\" in parsed_grammar:\n grammar[\"knots\"] = parsed_grammar[\"knots\"]\n except json.JSONDecodeError:\n pass\n\n # Load geospatial data if available\n geospatial_data = None\n if input_0 is not None:\n data_input = input_0\n # Handle multi-input case\n if isinstance(data_input, list):\n data_input = data_input[0] if data_input else None\n \n if data_input is not None:\n # Check if it's a geodataframe\n try:\n import geopandas as gpd\n if isinstance(data_input, gpd.GeoDataFrame):\n # Convert to GeoJSON\n geojson_data = json.loads(data_input.to_json())\n geospatial_data = utk.physical_from_geojson(geojson_data)\n \n # Add layers to grammar\n if geospatial_data and \"components\" in grammar:\n if \"layers\" not in grammar[\"components\"][0][\"json\"]:\n grammar[\"components\"][0][\"json\"][\"layers\"] = []\n # Add layer for the geospatial data\n grammar[\"components\"][0][\"json\"][\"layers\"].append({\n \"type\": \"geospatial\",\n \"data\": geospatial_data.to_dict() if hasattr(geospatial_data, 'to_dict') else geospatial_data\n })\n except Exception as e:\n pass\n\n # Create HTML container\n html_container = f'
'\n display(HTML(html_container))\n\n # Initialize UTK in browser\n js_initialization = f\"\"\"\n require(['utk'], function(utk) {{\n utk.Environment.serverless = true;\n const container = document.getElementById('utk-container-03e0e49a');\n const grammar = {json.dumps(grammar)};\n \n try {{\n const interpreter = new utk.GrammarInterpreter('notebook', grammar, container);\n // Store reference for potential interactions\n window._utk_interpreter_03e0e49a = interpreter;\n }} catch(e) {{\n console.error('UTK initialization error:', e);\n container.innerHTML = '
Error initializing UTK visualization
';\n }}\n }});\n \"\"\"\n display(Javascript(js_initialization))\n\n\n_curio_output = _curio_node()\n\ntry:\n result_03e0e49a_f493_4af1_95e8_9f004af838e0 = _curio_output\nexcept NameError:\n result_03e0e49a_f493_4af1_95e8_9f004af838e0 = None\n\nfrom IPython.display import display\ndisplay(utk-container-03e0e49a)\n", + "metadata": { + "id": "03e0e49a-f493-4af1-95e8-9f004af838e0", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"98352a3b-23fd-4fa0-8a5f-e846e4076b76\",\n \"type\": \"MERGE_FLOW\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"id\": \"edge_in_0_03e0e49a_f493_4af1_95e8_9f004af838e0_98352a3b_23fd_4fa0_8a5f_e846e4076b76\",\n \"source\": \"03e0e49a-f493-4af1-95e8-9f004af838e0\",\n \"target\": \"98352a3b-23fd-4fa0-8a5f-e846e4076b76\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in_0\"\n },\n {\n \"id\": \"edge_in_1_9075a191_c09b_440b_ae42_5e618261506e_98352a3b_23fd_4fa0_8a5f_e846e4076b76\",\n \"source\": \"9075a191-c09b-440b-ae42-5e618261506e\",\n \"target\": \"98352a3b-23fd-4fa0-8a5f-e846e4076b76\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in_1\"\n }\n ],\n \"outputs\": [\n {\n \"source\": \"98352a3b-23fd-4fa0-8a5f-e846e4076b76\",\n \"target\": \"2de74d5b-ce5b-4a94-9951-5e2d551033bb\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n\n inputs = [\n result_03e0e49a_f493_4af1_95e8_9f004af838e0,\n result_9075a191_c09b_440b_ae42_5e618261506e\n ]\n\n merged_inputs = [i for i in inputs if i is not None]\n\n return merged_inputs\n\n\n_curio_output = _curio_node()\n\ntry:\n merged_98352a3b_23fd_4fa0_8a5f_e846e4076b76 = _curio_output\nexcept NameError:\n merged_98352a3b_23fd_4fa0_8a5f_e846e4076b76 = None\n", + "metadata": { + "id": "98352a3b-23fd-4fa0-8a5f-e846e4076b76", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"2de74d5b-ce5b-4a94-9951-5e2d551033bb\",\n \"type\": \"COMPUTATION_ANALYSIS\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"98352a3b-23fd-4fa0-8a5f-e846e4076b76\",\n \"target\": \"2de74d5b-ce5b-4a94-9951-5e2d551033bb\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": [\n {\n \"source\": \"2de74d5b-ce5b-4a94-9951-5e2d551033bb\",\n \"target\": \"738a627f-8072-4cf1-a7d8-3fd45d06f4a9\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n input_0 = merged_98352a3b_23fd_4fa0_8a5f_e846e4076b76\n arg = input_0\n\n gdf_surface_1 = arg[0][0]\r\n gdf_buildings_1 = arg[0][2]\r\n\r\n gdf_surface_2 = arg[1][0]\r\n gdf_buildings_2 = arg[1][2]\r\n\r\n gdf_parks_3 = arg[0][1]\r\n\r\n list_1 = gdf_surface_1.iloc[0]['shadow0_surface']\r\n list_2 = gdf_surface_2.iloc[0]['shadow0_surface']\r\n\r\n difference_list = [b - a for a, b in zip(list_1, list_2)]\r\n\r\n gdf_surface_2['shadow0_surface'] = [difference_list]\r\n\r\n gdf_parks_3.metadata = {\r\n 'name': 'parks'\r\n }\r\n\r\n gdf_surface_2.metadata = {\r\n 'name': 'surface'\r\n }\r\n\r\n gdf_buildings_2.metadata = {\r\n 'name': 'buildings'\r\n }\r\n\r\n return (gdf_parks_3, gdf_surface_2, gdf_buildings_2)\n\n\n_curio_output = _curio_node()\n\ntry:\n result_2de74d5b_ce5b_4a94_9951_5e2d551033bb = _curio_output\nexcept NameError:\n result_2de74d5b_ce5b_4a94_9951_5e2d551033bb = None\n", + "metadata": { + "id": "2de74d5b-ce5b-4a94-9951-5e2d551033bb", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"738a627f-8072-4cf1-a7d8-3fd45d06f4a9\",\n \"type\": \"VIS_UTK\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"2de74d5b-ce5b-4a94-9951-5e2d551033bb\",\n \"target\": \"738a627f-8072-4cf1-a7d8-3fd45d06f4a9\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": []\n}\n\ndef _curio_node():\n\n input_0 = result_2de74d5b_ce5b_4a94_9951_5e2d551033bb\n\n\n # Configure UTK for serverless/notebook environment\n import utk\n import json\n from IPython.display import HTML, Javascript, display\n\n utk.Environment.serverless = True\n\n # Create grammar structure\n grammar = {\n \"components\": [{\n \"id\": \"notebook_map\",\n \"json\": {\n \"camera\": {\n \"wEye\": [0, 0, 1000],\n \"wLookAt\": [0, 0, 0],\n \"wUp\": [0, 1, 0]\n },\n \"grid\": {\"width\": 12, \"height\": 4},\n \"knots\": [],\n \"map_style\": [],\n \"widgets\": [{\n \"type\": \"TOGGLE_KNOT\"\n }]\n },\n \"position\": {\"x\": 0, \"y\": 0, \"width\": 12, \"height\": 4}\n }],\n \"grid\": {\"width\": 12, \"height\": 4},\n \"knots\": []\n }\n\n # If content has grammar, parse and merge it\n grammar_content = \"\"\"{\n \"components\": [\n {\n \"id\": \"grammar_map\",\n \"position\": {\n \"width\": [\n 1,\n 12\n ],\n \"height\": [\n 1,\n 4\n ]\n }\n }\n ],\n \"knots\": [],\n \"ex_knots\": [\n {\n \"id\": \"parks0\",\n \"out_name\": \"parks\"\n },\n {\n \"id\": \"surface0\",\n \"out_name\": \"surface\",\n \"in_name\": \"shadow0_surface\"\n },\n {\n \"id\": \"buildings0\",\n \"out_name\": \"buildings\",\n \"in_name\": \"shadow0_buildings\"\n },\n {\n \"id\": \"buildings1\",\n \"out_name\": \"buildings\",\n \"in_name\": \"thematic\"\n }\n ],\n \"grid\": {\n \"width\": 12,\n \"height\": 4\n },\n \"grammar\": false\n }\"\"\".strip()\n if grammar_content and grammar_content != \"{}\":\n try:\n parsed_grammar = json.loads(grammar_content)\n # Merge parsed grammar with our structure\n if \"components\" in parsed_grammar:\n grammar[\"components\"][0][\"json\"].update(parsed_grammar.get(\"json\", {}))\n if \"knots\" in parsed_grammar:\n grammar[\"knots\"] = parsed_grammar[\"knots\"]\n except json.JSONDecodeError:\n pass\n\n # Load geospatial data if available\n geospatial_data = None\n if input_0 is not None:\n data_input = input_0\n # Handle multi-input case\n if isinstance(data_input, list):\n data_input = data_input[0] if data_input else None\n \n if data_input is not None:\n # Check if it's a geodataframe\n try:\n import geopandas as gpd\n if isinstance(data_input, gpd.GeoDataFrame):\n # Convert to GeoJSON\n geojson_data = json.loads(data_input.to_json())\n geospatial_data = utk.physical_from_geojson(geojson_data)\n \n # Add layers to grammar\n if geospatial_data and \"components\" in grammar:\n if \"layers\" not in grammar[\"components\"][0][\"json\"]:\n grammar[\"components\"][0][\"json\"][\"layers\"] = []\n # Add layer for the geospatial data\n grammar[\"components\"][0][\"json\"][\"layers\"].append({\n \"type\": \"geospatial\",\n \"data\": geospatial_data.to_dict() if hasattr(geospatial_data, 'to_dict') else geospatial_data\n })\n except Exception as e:\n pass\n\n # Create HTML container\n html_container = f'
'\n display(HTML(html_container))\n\n # Initialize UTK in browser\n js_initialization = f\"\"\"\n require(['utk'], function(utk) {{\n utk.Environment.serverless = true;\n const container = document.getElementById('utk-container-738a627f');\n const grammar = {json.dumps(grammar)};\n \n try {{\n const interpreter = new utk.GrammarInterpreter('notebook', grammar, container);\n // Store reference for potential interactions\n window._utk_interpreter_738a627f = interpreter;\n }} catch(e) {{\n console.error('UTK initialization error:', e);\n container.innerHTML = '
Error initializing UTK visualization
';\n }}\n }});\n \"\"\"\n display(Javascript(js_initialization))\n\n\n_curio_output = _curio_node()\n\ntry:\n result_738a627f_8072_4cf1_a7d8_3fd45d06f4a9 = _curio_output\nexcept NameError:\n result_738a627f_8072_4cf1_a7d8_3fd45d06f4a9 = None\n\nfrom IPython.display import display\ndisplay(utk-container-738a627f)\n", + "metadata": { + "id": "738a627f-8072-4cf1-a7d8-3fd45d06f4a9", + "language": "python" + } + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/docs/examples/notebooks/example3-notebook.ipynb b/docs/examples/notebooks/example3-notebook.ipynb new file mode 100644 index 00000000..bc20918d --- /dev/null +++ b/docs/examples/notebooks/example3-notebook.ipynb @@ -0,0 +1,120 @@ +{ + "cells": [ + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"09c6e03f-117d-45c5-af30-8f71bc3e58b6\",\n \"type\": \"COMPUTATION_ANALYSIS\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [],\n \"outputs\": []\n}\n\ndef _curio_node():\n\n\n\n # computation analysis - clear\r\n\r\n import sys\r\n import os\r\n import re\r\n import torch\r\n\r\n _ROOT = os.getcwd()\r\n _CANDIDATES = [\r\n os.environ.get(\"CITYSURFACES_DIR\"),\r\n os.path.join(_ROOT, \"city-surfaces\"),\r\n os.path.join(_ROOT, \"CitySurfaces\"),\r\n ]\r\n CITYSURFACES_DIR = None\r\n for _p in _CANDIDATES:\r\n if _p and os.path.isfile(os.path.join(_p, \"config.py\")):\r\n CITYSURFACES_DIR = os.path.abspath(_p)\r\n break\r\n if CITYSURFACES_DIR is None:\r\n raise FileNotFoundError(\r\n \"CitySurfaces repo not found (need config.py). Clone \"\r\n \"https://github.com/VIDA-NYU/city-surfaces into ./city-surfaces/ under \"\r\n f\"your Curio launch directory, or set CITYSURFACES_DIR. cwd={_ROOT!r}\"\r\n )\r\n sys.path.insert(0, CITYSURFACES_DIR)\r\n\r\n # CitySurfaces calls logx.msg() during model init; runx requires initialize() first (normally done in val.py main).\r\n import tempfile\r\n from runx.logx import logx\r\n _log_dir = os.path.join(tempfile.gettempdir(), \"curio_citysurfaces_runx\")\r\n logx.initialize(logdir=_log_dir, tensorboard=False, hparams={}, global_rank=0)\r\n\r\n WEIGHTS_DIR = './data/dataset/CitySurfaces_weights'\r\n WEIGHTS_FILE = os.path.join(WEIGHTS_DIR, 'block_c_10classes.pth')\r\n NUM_CLASSES = 10\r\n DEVICE = \"cuda\"\r\n\r\n from config import cfg\r\n cfg.immutable(False)\r\n cfg.DATASET.NUM_CLASSES = NUM_CLASSES\r\n cfg.MODEL.BNFUNC = torch.nn.BatchNorm2d\r\n cfg.MODEL.HRNET_CHECKPOINT = os.path.join(WEIGHTS_DIR, 'hrnetv2_w48_imagenet_pretrained.pth')\r\n cfg.OPTIONS.INIT_DECODER = False\r\n # val.py sets this via assert_and_infer_cfg(); required for network/mynn.py interpolate branches\r\n _m = re.match(r'^([0-9]+\\.[0-9]+)', torch.__version__)\r\n cfg.OPTIONS.TORCH_VERSION = float(_m.group(1)) if _m else 2.0\r\n cfg.immutable(True)\r\n\r\n from network.ocrnet import HRNet_Mscale\r\n\r\n model = HRNet_Mscale(num_classes=NUM_CLASSES, criterion=None).to(DEVICE)\r\n\r\n # PyTorch 2.6+ defaults weights_only=True; CitySurfaces checkpoints need False (trusted local files).\r\n checkpoint = torch.load(WEIGHTS_FILE, map_location=DEVICE, weights_only=False)\r\n state_dict = checkpoint.get('state_dict', checkpoint)\r\n\r\n model_state = model.state_dict()\r\n new_state = {}\r\n for k in model_state:\r\n if k in state_dict and model_state[k].size() == state_dict[k].size():\r\n new_state[k] = state_dict[k]\r\n elif 'module.' + k in state_dict and model_state[k].size() == state_dict['module.' + k].size():\r\n new_state[k] = state_dict['module.' + k]\r\n\r\n model_state.update(new_state)\r\n model.load_state_dict(model_state)\r\n model.eval()\r\n\r\n return \"Pretrained CitySurfaces model loaded (10 classes)\"\n\n\n_curio_output = _curio_node()\n\ntry:\n result_09c6e03f_117d_45c5_af30_8f71bc3e58b6 = _curio_output\nexcept NameError:\n result_09c6e03f_117d_45c5_af30_8f71bc3e58b6 = None\n", + "metadata": { + "id": "09c6e03f-117d-45c5-af30-8f71bc3e58b6", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"9def5617-0b4e-4afb-afaf-7a567af01f92\",\n \"type\": \"DATA_LOADING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [],\n \"outputs\": [\n {\n \"id\": \"edge_in_0_9def5617_0b4e_4afb_afaf_7a567af01f92_7902bce6_4771_4f73_9ee1_d706fc22892f\",\n \"source\": \"9def5617-0b4e-4afb-afaf-7a567af01f92\",\n \"target\": \"7902bce6-4771-4f73-9ee1-d706fc22892f\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in_0\"\n }\n ]\n}\n\ndef _curio_node():\n\n import geopandas as gpd\r\n # Load neighborhood data\r\n boston = gpd.read_file('./data/dataset/Census2020_BlockGroups.shp').to_crs('EPSG:4326')\r\n return boston\n\n_curio_output = _curio_node()\n\ntry:\n data_9def5617_0b4e_4afb_afaf_7a567af01f92 = _curio_output\nexcept NameError:\n data_9def5617_0b4e_4afb_afaf_7a567af01f92 = None\n", + "metadata": { + "id": "9def5617-0b4e-4afb-afaf-7a567af01f92", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"c66ae5dc-5727-4dba-9ad7-e0d312cbc1cb\",\n \"type\": \"DATA_LOADING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [],\n \"outputs\": [\n {\n \"source\": \"c66ae5dc-5727-4dba-9ad7-e0d312cbc1cb\",\n \"target\": \"aaff4f52-b04b-413f-9f83-7e12fb0acbf0\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n import pandas as pd\r\n df = pd.read_csv('./data/dataset/gsv/boston_gsv.csv', names=['status','id','lat','lon'])\r\n sample = df[df['status']=='OK'].sample(100, random_state=42)\r\n return sample\n\n_curio_output = _curio_node()\n\ntry:\n data_c66ae5dc_5727_4dba_9ad7_e0d312cbc1cb = _curio_output\nexcept NameError:\n data_c66ae5dc_5727_4dba_9ad7_e0d312cbc1cb = None\n", + "metadata": { + "id": "c66ae5dc-5727-4dba-9ad7-e0d312cbc1cb", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"aaff4f52-b04b-413f-9f83-7e12fb0acbf0\",\n \"type\": \"COMPUTATION_ANALYSIS\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"c66ae5dc-5727-4dba-9ad7-e0d312cbc1cb\",\n \"target\": \"aaff4f52-b04b-413f-9f83-7e12fb0acbf0\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": [\n {\n \"source\": \"aaff4f52-b04b-413f-9f83-7e12fb0acbf0\",\n \"target\": \"cf7bf5ef-5ce7-4e26-974b-fb782f84be19\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n input_0 = data_c66ae5dc_5727_4dba_9ad7_e0d312cbc1cb\n arg = input_0\n\n import sys\r\n import os\r\n import re\r\n import torch\r\n import torch.nn.functional as F\r\n import numpy as np\r\n from PIL import Image\r\n from io import BytesIO\r\n import base64\r\n\r\n sample = arg\r\n\r\n _ROOT = os.getcwd()\r\n _CANDIDATES = [\r\n os.environ.get(\"CITYSURFACES_DIR\"),\r\n os.path.join(_ROOT, \"city-surfaces\"),\r\n os.path.join(_ROOT, \"CitySurfaces\"),\r\n ]\r\n CITYSURFACES_DIR = None\r\n for _p in _CANDIDATES:\r\n if _p and os.path.isfile(os.path.join(_p, \"config.py\")):\r\n CITYSURFACES_DIR = os.path.abspath(_p)\r\n break\r\n if CITYSURFACES_DIR is None:\r\n raise FileNotFoundError(\r\n \"CitySurfaces repo not found (need config.py). Clone \"\r\n \"https://github.com/VIDA-NYU/city-surfaces into ./city-surfaces/ under \"\r\n f\"your Curio launch directory, or set CITYSURFACES_DIR. cwd={_ROOT!r}\"\r\n )\r\n sys.path.insert(0, CITYSURFACES_DIR)\r\n\r\n import tempfile\r\n from runx.logx import logx\r\n _log_dir = os.path.join(tempfile.gettempdir(), \"curio_citysurfaces_runx\")\r\n logx.initialize(logdir=_log_dir, tensorboard=False, hparams={}, global_rank=0)\r\n\r\n WEIGHTS_DIR = './data/dataset/CitySurfaces_weights'\r\n WEIGHTS_FILE = os.path.join(WEIGHTS_DIR, 'block_c_10classes.pth')\r\n NUM_CLASSES = 10\r\n DEVICE = 'cuda'\r\n IMAGE_SIZE = 320\r\n\r\n from config import cfg\r\n cfg.immutable(False)\r\n cfg.DATASET.NUM_CLASSES = NUM_CLASSES\r\n cfg.MODEL.BNFUNC = torch.nn.BatchNorm2d\r\n cfg.MODEL.HRNET_CHECKPOINT = os.path.join(WEIGHTS_DIR, 'hrnetv2_w48_imagenet_pretrained.pth')\r\n cfg.OPTIONS.INIT_DECODER = False\r\n _m = re.match(r'^([0-9]+\\.[0-9]+)', torch.__version__)\r\n cfg.OPTIONS.TORCH_VERSION = float(_m.group(1)) if _m else 2.0\r\n cfg.immutable(True)\r\n\r\n from network.ocrnet import HRNet_Mscale\r\n\r\n def compute_uncertainty(predictions):\r\n sorted_probs = np.sort(predictions, axis=1)\r\n highest_prob = sorted_probs[:, -1, :, :]\r\n second_highest_prob = sorted_probs[:, -2, :, :]\r\n uncertainty_margin = highest_prob - second_highest_prob\r\n return 1.0 - uncertainty_margin\r\n\r\n model = HRNet_Mscale(num_classes=NUM_CLASSES, criterion=None).to(DEVICE)\r\n\r\n checkpoint = torch.load(WEIGHTS_FILE, map_location=DEVICE, weights_only=False)\r\n state_dict = checkpoint.get('state_dict', checkpoint)\r\n model_state = model.state_dict()\r\n new_state = {}\r\n for k in model_state:\r\n if k in state_dict and model_state[k].size() == state_dict[k].size():\r\n new_state[k] = state_dict[k]\r\n elif 'module.' + k in state_dict and model_state[k].size() == state_dict['module.' + k].size():\r\n new_state[k] = state_dict['module.' + k]\r\n model_state.update(new_state)\r\n model.load_state_dict(model_state)\r\n model.eval()\r\n\r\n MEAN = torch.tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1).to(DEVICE)\r\n STD = torch.tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1).to(DEVICE)\r\n\r\n color_map = {\r\n 0: (255, 127, 14), # concrete\r\n 1: (43, 160, 43), # bricks\r\n 2: (31, 119, 179), # granite\r\n 3: (153, 153, 153), # asphalt\r\n 4: (214, 39, 40), # mixed\r\n 5: (54, 54, 54), # road\r\n 6: (0, 0, 0), # background\r\n 7: (138, 0, 138), # granite block-stone\r\n 8: (240, 110, 170), # hexagonal\r\n 9: (139, 109, 48), # cobblestone\r\n }\r\n\r\n lats = []\r\n lons = []\r\n uncerts = []\r\n images = []\r\n predicted_images = []\r\n uncert_images = []\r\n for index, row in sample.iterrows():\r\n image_path = './data/dataset/gsv/boston/%s_left.jpg' % row['id']\r\n\r\n pil_image = Image.open(image_path).convert(\"RGB\").resize((IMAGE_SIZE, IMAGE_SIZE))\r\n\r\n image = np.array(pil_image, dtype=np.float32) / 255.0\r\n input_tensor = torch.from_numpy(image.reshape(1, IMAGE_SIZE, IMAGE_SIZE, 3)).permute((0, 3, 1, 2)).to(DEVICE)\r\n input_tensor = (input_tensor - MEAN) / STD\r\n\r\n with torch.no_grad():\r\n output = model({'images': input_tensor})\r\n logits = output['pred']\r\n predictions = F.softmax(logits, dim=1)\r\n\r\n pred_labels = torch.argmax(predictions, dim=1)\r\n pred_array = pred_labels.cpu().numpy()\r\n pred_array = pred_array.reshape((IMAGE_SIZE, IMAGE_SIZE))\r\n pred_pil = Image.new(\"RGB\", (pred_array.shape[1], pred_array.shape[0]))\r\n for i in range(pred_array.shape[0]):\r\n for j in range(pred_array.shape[1]):\r\n pred_pil.putpixel((j, i), color_map[pred_array[i, j]])\r\n\r\n buffered = BytesIO()\r\n pred_pil.save(buffered, format=\"PNG\")\r\n pred_str = base64.b64encode(buffered.getvalue()).decode('utf-8')\r\n\r\n uncertainty_margin = compute_uncertainty(predictions.cpu().detach().numpy())\r\n\r\n uncertainty_array = np.uint8(uncertainty_margin * 255)\r\n uncertainty_array = np.transpose(uncertainty_array, (1, 2, 0))\r\n uncertainty_array = np.squeeze(uncertainty_array, axis=2)\r\n uncertainty_pil = Image.fromarray(uncertainty_array)\r\n\r\n buffered = BytesIO()\r\n uncertainty_pil.save(buffered, format=\"PNG\")\r\n uncertainty_str = base64.b64encode(buffered.getvalue()).decode('utf-8')\r\n\r\n lats.append(row['lat'])\r\n lons.append(row['lon'])\r\n uncerts.append(float(np.average(uncertainty_margin)))\r\n\r\n buffered = BytesIO()\r\n pil_image.save(buffered, format=\"PNG\")\r\n img_str = base64.b64encode(buffered.getvalue()).decode('utf-8')\r\n\r\n images.append(img_str)\r\n predicted_images.append(pred_str)\r\n uncert_images.append(uncertainty_str)\r\n\r\n return (lats, lons, uncerts, images, predicted_images, uncert_images)\n\n\n_curio_output = _curio_node()\n\ntry:\n result_aaff4f52_b04b_413f_9f83_7e12fb0acbf0 = _curio_output\nexcept NameError:\n result_aaff4f52_b04b_413f_9f83_7e12fb0acbf0 = None\n", + "metadata": { + "id": "aaff4f52-b04b-413f-9f83-7e12fb0acbf0", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"cf7bf5ef-5ce7-4e26-974b-fb782f84be19\",\n \"type\": \"COMPUTATION_ANALYSIS\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"aaff4f52-b04b-413f-9f83-7e12fb0acbf0\",\n \"target\": \"cf7bf5ef-5ce7-4e26-974b-fb782f84be19\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": [\n {\n \"id\": \"edge_in_1_cf7bf5ef_5ce7_4e26_974b_fb782f84be19_7902bce6_4771_4f73_9ee1_d706fc22892f\",\n \"source\": \"cf7bf5ef-5ce7-4e26-974b-fb782f84be19\",\n \"target\": \"7902bce6-4771-4f73-9ee1-d706fc22892f\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in_1\"\n },\n {\n \"source\": \"cf7bf5ef-5ce7-4e26-974b-fb782f84be19\",\n \"target\": \"f7172aea-3e3d-4c58-b0bc-dc02db24a733\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n input_0 = result_aaff4f52_b04b_413f_9f83_7e12fb0acbf0\n arg = input_0\n\n import geopandas as gpd\r\n\r\n lats = arg[0]\r\n lons = arg[1]\r\n uncerts = arg[2]\r\n original_images = arg[3]\r\n predicted_images = arg[4]\r\n uncert_images = arg[5]\r\n\r\n image_content = list(zip(original_images, predicted_images, uncert_images))\r\n\r\n gdf = pd.DataFrame({'lat': lats, 'lon': lons, 'uncertainty': uncerts, 'image_content': image_content})\r\n\r\n gdf['image_id'] = gdf.index\r\n\r\n gdf = gpd.GeoDataFrame(\r\n gdf, geometry=gpd.points_from_xy(gdf.lon, gdf.lat), crs=\"EPSG:4326\"\r\n )\r\n\r\n gdf = gdf.sort_values(by='image_id', ascending=True)\r\n\r\n return gdf\n\n\n_curio_output = _curio_node()\n\ntry:\n result_cf7bf5ef_5ce7_4e26_974b_fb782f84be19 = _curio_output\nexcept NameError:\n result_cf7bf5ef_5ce7_4e26_974b_fb782f84be19 = None\n", + "metadata": { + "id": "cf7bf5ef-5ce7-4e26-974b-fb782f84be19", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"7902bce6-4771-4f73-9ee1-d706fc22892f\",\n \"type\": \"MERGE_FLOW\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"id\": \"edge_in_0_9def5617_0b4e_4afb_afaf_7a567af01f92_7902bce6_4771_4f73_9ee1_d706fc22892f\",\n \"source\": \"9def5617-0b4e-4afb-afaf-7a567af01f92\",\n \"target\": \"7902bce6-4771-4f73-9ee1-d706fc22892f\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in_0\"\n },\n {\n \"id\": \"edge_in_1_cf7bf5ef_5ce7_4e26_974b_fb782f84be19_7902bce6_4771_4f73_9ee1_d706fc22892f\",\n \"source\": \"cf7bf5ef-5ce7-4e26-974b-fb782f84be19\",\n \"target\": \"7902bce6-4771-4f73-9ee1-d706fc22892f\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in_1\"\n }\n ],\n \"outputs\": [\n {\n \"source\": \"7902bce6-4771-4f73-9ee1-d706fc22892f\",\n \"target\": \"55aa4581-9a68-4257-b2c7-63e3360737e3\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n\n inputs = [\n data_9def5617_0b4e_4afb_afaf_7a567af01f92,\n result_cf7bf5ef_5ce7_4e26_974b_fb782f84be19\n ]\n\n merged_inputs = [i for i in inputs if i is not None]\n\n return merged_inputs\n\n\n_curio_output = _curio_node()\n\ntry:\n merged_7902bce6_4771_4f73_9ee1_d706fc22892f = _curio_output\nexcept NameError:\n merged_7902bce6_4771_4f73_9ee1_d706fc22892f = None\n", + "metadata": { + "id": "7902bce6-4771-4f73-9ee1-d706fc22892f", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"f7172aea-3e3d-4c58-b0bc-dc02db24a733\",\n \"type\": \"DATA_POOL\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"cf7bf5ef-5ce7-4e26-974b-fb782f84be19\",\n \"target\": \"f7172aea-3e3d-4c58-b0bc-dc02db24a733\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n },\n {\n \"source\": \"3c5db6ce-0082-47d7-80b7-1b9534b4726b\",\n \"target\": \"f7172aea-3e3d-4c58-b0bc-dc02db24a733\",\n \"sourceHandle\": \"in/out\",\n \"targetHandle\": \"in/out\",\n \"type\": \"Interaction\"\n }\n ],\n \"outputs\": [\n {\n \"source\": \"f7172aea-3e3d-4c58-b0bc-dc02db24a733\",\n \"target\": \"a61f234d-78b4-4c3f-9d22-15a2855967b3\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n\n return result_cf7bf5ef_5ce7_4e26_974b_fb782f84be19\n\n\n_curio_output = _curio_node()\n\ntry:\n pool_f7172aea_3e3d_4c58_b0bc_dc02db24a733 = _curio_output\nexcept NameError:\n pool_f7172aea_3e3d_4c58_b0bc_dc02db24a733 = None\n", + "metadata": { + "id": "f7172aea-3e3d-4c58-b0bc-dc02db24a733", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"a61f234d-78b4-4c3f-9d22-15a2855967b3\",\n \"type\": \"COMPUTATION_ANALYSIS\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"f7172aea-3e3d-4c58-b0bc-dc02db24a733\",\n \"target\": \"a61f234d-78b4-4c3f-9d22-15a2855967b3\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": [\n {\n \"source\": \"a61f234d-78b4-4c3f-9d22-15a2855967b3\",\n \"target\": \"e2e0b5d8-a0dc-4860-9a08-203871b0d28f\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n input_0 = pool_f7172aea_3e3d_4c58_b0bc_dc02db24a733\n arg = input_0\n\n df = pd.DataFrame(arg.drop(columns=arg.geometry.name))\r\n df = df[df['interacted'] == '1']\r\n df = df.sort_values(by='uncertainty', ascending=False)\r\n return df.head(20)\n\n\n_curio_output = _curio_node()\n\ntry:\n result_a61f234d_78b4_4c3f_9d22_15a2855967b3 = _curio_output\nexcept NameError:\n result_a61f234d_78b4_4c3f_9d22_15a2855967b3 = None\n", + "metadata": { + "id": "a61f234d-78b4-4c3f-9d22-15a2855967b3", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"e2e0b5d8-a0dc-4860-9a08-203871b0d28f\",\n \"type\": \"VIS_IMAGE\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"a61f234d-78b4-4c3f-9d22-15a2855967b3\",\n \"target\": \"e2e0b5d8-a0dc-4860-9a08-203871b0d28f\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": []\n}\n\ndef _curio_node():\n\n\n input_data = result_a61f234d_78b4_4c3f_9d22_15a2855967b3\n from IPython.display import display, Image, HTML\n import pandas as pd\n from io import BytesIO\n import base64\n\n # Handle both DataFrame and direct image input\n if isinstance(input_data, pd.DataFrame):\n # Expecting DataFrame with 'image_id' and 'image_content' columns (base64 encoded)\n cols = input_data.columns.tolist()\n image_col = 'image_content' if 'image_content' in cols else (cols[1] if len(cols) > 1 else None)\n id_col = 'image_id' if 'image_id' in cols else cols[0]\n \n if image_col is None:\n display(input_data)\n else:\n # Create HTML grid for images\n grid_html = '
'\n \n for idx, row in input_data.iterrows():\n image_id = str(row[id_col]) if id_col in row else f\"Image {idx}\"\n image_content = row[image_col]\n \n try:\n # Check if image_content is base64 string\n if isinstance(image_content, str):\n if not image_content.startswith('data:image'):\n image_content = f'data:image/png;base64,{image_content}'\n grid_html += f'

{image_id}

'\n else:\n grid_html += f'

Invalid image format for {image_id}

'\n except Exception as e:\n grid_html += f'

Error displaying {image_id}: {str(e)}

'\n \n grid_html += '
'\n display(HTML(grid_html))\n else:\n # Single image or fallback\n try:\n if isinstance(input_data, str):\n # Base64 encoded image or file path\n if input_data.startswith('data:image') or input_data.startswith('/') or input_data.endswith(('.jpg', '.jpeg', '.png', '.gif', '.bmp')):\n display(Image(input_data))\n else:\n # Assume base64 encoded\n display(Image(data=base64.b64decode(input_data)))\n else:\n display(Image(input_data))\n except Exception:\n display(input_data)\n\n return input_data\n\n\n_curio_output = _curio_node()\n\ntry:\n result_e2e0b5d8_a0dc_4860_9a08_203871b0d28f = _curio_output\nexcept NameError:\n result_e2e0b5d8_a0dc_4860_9a08_203871b0d28f = None\n", + "metadata": { + "id": "e2e0b5d8-a0dc-4860-9a08-203871b0d28f", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"55aa4581-9a68-4257-b2c7-63e3360737e3\",\n \"type\": \"COMPUTATION_ANALYSIS\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"7902bce6-4771-4f73-9ee1-d706fc22892f\",\n \"target\": \"55aa4581-9a68-4257-b2c7-63e3360737e3\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": [\n {\n \"source\": \"55aa4581-9a68-4257-b2c7-63e3360737e3\",\n \"target\": \"71ab6de4-b23c-42d9-a2fe-ce5141d285b2\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n input_0 = merged_7902bce6_4771_4f73_9ee1_d706fc22892f\n arg = input_0\n\n import geopandas as gpd\r\n\r\n boston = arg[0]\r\n gdf = arg[1]\r\n\r\n def agg_to_list(series):\r\n return list(series)\r\n\r\n joined = gpd.sjoin(boston, gdf).groupby('GEOID20').agg({'uncertainty': 'mean', 'image_id': agg_to_list})\r\n boston = boston.set_index('GEOID20')\r\n boston.loc[joined.index,'uncertainty'] = joined['uncertainty']\r\n boston.loc[joined.index,'image_id'] = joined['image_id']\r\n\r\n filtered_boston = boston.loc[joined.index]\r\n\r\n filtered_boston = filtered_boston.rename(columns={'image_id': 'linked'})\r\n\r\n return filtered_boston\n\n\n_curio_output = _curio_node()\n\ntry:\n result_55aa4581_9a68_4257_b2c7_63e3360737e3 = _curio_output\nexcept NameError:\n result_55aa4581_9a68_4257_b2c7_63e3360737e3 = None\n", + "metadata": { + "id": "55aa4581-9a68-4257-b2c7-63e3360737e3", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"71ab6de4-b23c-42d9-a2fe-ce5141d285b2\",\n \"type\": \"DATA_CLEANING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"55aa4581-9a68-4257-b2c7-63e3360737e3\",\n \"target\": \"71ab6de4-b23c-42d9-a2fe-ce5141d285b2\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": [\n {\n \"source\": \"71ab6de4-b23c-42d9-a2fe-ce5141d285b2\",\n \"target\": \"3c5db6ce-0082-47d7-80b7-1b9534b4726b\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n input_0 = result_55aa4581_9a68_4257_b2c7_63e3360737e3\n arg = input_0\n\n import geopandas as gpd\r\n\r\n filtered_boston = arg\r\n\r\n filtered_boston = filtered_boston.loc[:, [filtered_boston.geometry.name, 'uncertainty', 'linked']]\r\n\r\n filtered_boston = filtered_boston.set_crs(4326)\r\n filtered_boston = filtered_boston.to_crs(3395)\r\n\r\n filtered_boston.metadata = {\r\n 'name': 'boston'\r\n }\r\n\r\n return filtered_boston\n\n\n_curio_output = _curio_node()\n\ntry:\n result_71ab6de4_b23c_42d9_a2fe_ce5141d285b2 = _curio_output\nexcept NameError:\n result_71ab6de4_b23c_42d9_a2fe_ce5141d285b2 = None\n", + "metadata": { + "id": "71ab6de4-b23c-42d9-a2fe-ce5141d285b2", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"3c5db6ce-0082-47d7-80b7-1b9534b4726b\",\n \"type\": \"DATA_POOL\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"71ab6de4-b23c-42d9-a2fe-ce5141d285b2\",\n \"target\": \"3c5db6ce-0082-47d7-80b7-1b9534b4726b\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n },\n {\n \"source\": \"ddae8fb9-82ee-4523-bf07-9184c7fc873f\",\n \"target\": \"3c5db6ce-0082-47d7-80b7-1b9534b4726b\",\n \"sourceHandle\": \"in/out\",\n \"targetHandle\": \"in/out\",\n \"type\": \"Interaction\"\n }\n ],\n \"outputs\": [\n {\n \"source\": \"3c5db6ce-0082-47d7-80b7-1b9534b4726b\",\n \"target\": \"ddae8fb9-82ee-4523-bf07-9184c7fc873f\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n },\n {\n \"source\": \"3c5db6ce-0082-47d7-80b7-1b9534b4726b\",\n \"target\": \"f7172aea-3e3d-4c58-b0bc-dc02db24a733\",\n \"sourceHandle\": \"in/out\",\n \"targetHandle\": \"in/out\",\n \"type\": \"Interaction\"\n }\n ]\n}\n\ndef _curio_node():\n\n\n return result_71ab6de4_b23c_42d9_a2fe_ce5141d285b2\n\n\n_curio_output = _curio_node()\n\ntry:\n pool_3c5db6ce_0082_47d7_80b7_1b9534b4726b = _curio_output\n pool_3c5db6ce_0082_47d7_80b7_1b9534b4726b_in_out = pool_3c5db6ce_0082_47d7_80b7_1b9534b4726b\nexcept NameError:\n pool_3c5db6ce_0082_47d7_80b7_1b9534b4726b = None\n pool_3c5db6ce_0082_47d7_80b7_1b9534b4726b_in_out = None\n", + "metadata": { + "id": "3c5db6ce-0082-47d7-80b7-1b9534b4726b", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"ddae8fb9-82ee-4523-bf07-9184c7fc873f\",\n \"type\": \"VIS_UTK\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"3c5db6ce-0082-47d7-80b7-1b9534b4726b\",\n \"target\": \"ddae8fb9-82ee-4523-bf07-9184c7fc873f\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": [\n {\n \"source\": \"ddae8fb9-82ee-4523-bf07-9184c7fc873f\",\n \"target\": \"3c5db6ce-0082-47d7-80b7-1b9534b4726b\",\n \"sourceHandle\": \"in/out\",\n \"targetHandle\": \"in/out\",\n \"type\": \"Interaction\"\n }\n ]\n}\n\ndef _curio_node():\n\n input_0 = pool_3c5db6ce_0082_47d7_80b7_1b9534b4726b\n\n\n # Configure UTK for serverless/notebook environment\n import utk\n import json\n from IPython.display import HTML, Javascript, display\n\n utk.Environment.serverless = True\n\n # Create grammar structure\n grammar = {\n \"components\": [{\n \"id\": \"notebook_map\",\n \"json\": {\n \"camera\": {\n \"wEye\": [0, 0, 1000],\n \"wLookAt\": [0, 0, 0],\n \"wUp\": [0, 1, 0]\n },\n \"grid\": {\"width\": 12, \"height\": 4},\n \"knots\": [],\n \"map_style\": [],\n \"widgets\": [{\n \"type\": \"TOGGLE_KNOT\"\n }]\n },\n \"position\": {\"x\": 0, \"y\": 0, \"width\": 12, \"height\": 4}\n }],\n \"grid\": {\"width\": 12, \"height\": 4},\n \"knots\": []\n }\n\n # If content has grammar, parse and merge it\n grammar_content = \"\"\"{\n \"components\": [\n {\n \"id\": \"grammar_map\",\n \"position\": {\n \"width\": [\n 1,\n 12\n ],\n \"height\": [\n 1,\n 4\n ]\n }\n }\n ],\n \"knots\": [],\n \"ex_knots\": [\n {\n \"id\": \"boston0\",\n \"out_name\": \"boston\",\n \"in_name\": \"uncertainty\"\n }\n ],\n \"grid\": {\n \"width\": 12,\n \"height\": 4\n },\n \"grammar\": false\n }\"\"\".strip()\n if grammar_content and grammar_content != \"{}\":\n try:\n parsed_grammar = json.loads(grammar_content)\n # Merge parsed grammar with our structure\n if \"components\" in parsed_grammar:\n grammar[\"components\"][0][\"json\"].update(parsed_grammar.get(\"json\", {}))\n if \"knots\" in parsed_grammar:\n grammar[\"knots\"] = parsed_grammar[\"knots\"]\n except json.JSONDecodeError:\n pass\n\n # Load geospatial data if available\n geospatial_data = None\n if input_0 is not None:\n data_input = input_0\n # Handle multi-input case\n if isinstance(data_input, list):\n data_input = data_input[0] if data_input else None\n \n if data_input is not None:\n # Check if it's a geodataframe\n try:\n import geopandas as gpd\n if isinstance(data_input, gpd.GeoDataFrame):\n # Convert to GeoJSON\n geojson_data = json.loads(data_input.to_json())\n geospatial_data = utk.physical_from_geojson(geojson_data)\n \n # Add layers to grammar\n if geospatial_data and \"components\" in grammar:\n if \"layers\" not in grammar[\"components\"][0][\"json\"]:\n grammar[\"components\"][0][\"json\"][\"layers\"] = []\n # Add layer for the geospatial data\n grammar[\"components\"][0][\"json\"][\"layers\"].append({\n \"type\": \"geospatial\",\n \"data\": geospatial_data.to_dict() if hasattr(geospatial_data, 'to_dict') else geospatial_data\n })\n except Exception as e:\n pass\n\n # Create HTML container\n html_container = f'
'\n display(HTML(html_container))\n\n # Initialize UTK in browser\n js_initialization = f\"\"\"\n require(['utk'], function(utk) {{\n utk.Environment.serverless = true;\n const container = document.getElementById('utk-container-ddae8fb9');\n const grammar = {json.dumps(grammar)};\n \n try {{\n const interpreter = new utk.GrammarInterpreter('notebook', grammar, container);\n // Store reference for potential interactions\n window._utk_interpreter_ddae8fb9 = interpreter;\n }} catch(e) {{\n console.error('UTK initialization error:', e);\n container.innerHTML = '
Error initializing UTK visualization
';\n }}\n }});\n \"\"\"\n display(Javascript(js_initialization))\n\n\n_curio_output = _curio_node()\n\ntry:\n result_ddae8fb9_82ee_4523_bf07_9184c7fc873f = _curio_output\n result_ddae8fb9_82ee_4523_bf07_9184c7fc873f_in_out = result_ddae8fb9_82ee_4523_bf07_9184c7fc873f\nexcept NameError:\n result_ddae8fb9_82ee_4523_bf07_9184c7fc873f = None\n result_ddae8fb9_82ee_4523_bf07_9184c7fc873f_in_out = None\n\nfrom IPython.display import display\ndisplay(utk-container-ddae8fb9)\n", + "metadata": { + "id": "ddae8fb9-82ee-4523-bf07-9184c7fc873f", + "language": "python" + } + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/docs/examples/notebooks/example4-notebook.ipynb b/docs/examples/notebooks/example4-notebook.ipynb new file mode 100644 index 00000000..01539f28 --- /dev/null +++ b/docs/examples/notebooks/example4-notebook.ipynb @@ -0,0 +1,64 @@ +{ + "cells": [ + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"cd3b6afc-900d-417c-8064-375ed01a912f\",\n \"type\": \"DATA_LOADING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [],\n \"outputs\": [\n {\n \"source\": \"cd3b6afc-900d-417c-8064-375ed01a912f\",\n \"target\": \"a0c5d21f-ce16-4318-95fe-598451859de1\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n import geopandas as gpd\n\n gdf = gpd.read_file('data/labels_2026-03-30T13_06_26.436355935-07_00.json')\n\n gdf.metadata = {\n 'name': 'accessibility_features'\n }\n\n return gdf\n\n\n_curio_output = _curio_node()\n\ntry:\n data_cd3b6afc_900d_417c_8064_375ed01a912f = _curio_output\nexcept NameError:\n data_cd3b6afc_900d_417c_8064_375ed01a912f = None\n", + "metadata": { + "id": "cd3b6afc-900d-417c-8064-375ed01a912f", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"a0c5d21f-ce16-4318-95fe-598451859de1\",\n \"type\": \"DATA_CLEANING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"cd3b6afc-900d-417c-8064-375ed01a912f\",\n \"target\": \"a0c5d21f-ce16-4318-95fe-598451859de1\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": [\n {\n \"source\": \"a0c5d21f-ce16-4318-95fe-598451859de1\",\n \"target\": \"6475234e-0288-49cd-b03f-19852717c980\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n },\n {\n \"source\": \"a0c5d21f-ce16-4318-95fe-598451859de1\",\n \"target\": \"44286732-11aa-4495-8706-cb4a9ba2e6a5\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n input_0 = data_cd3b6afc_900d_417c_8064_375ed01a912f\n arg = input_0\n\n import pandas as pd\n import geopandas as gpd\n import numpy as np\n import utk\n\n gdf = arg\n\n processed_gdf = gdf[['label_type', 'severity', 'neighborhood', 'geometry', 'agree_count', 'disagree_count']]\n\n processed_gdf['agreement_ratio'] = processed_gdf['agree_count'] / (processed_gdf['agree_count'] + processed_gdf['disagree_count'])\n\n severity_bins = [0, 1, 2, 3, 5]\n severity_labels = ['Low', 'Medium', 'High', 'Critical']\n processed_gdf['severity_level'] = pd.cut(\n processed_gdf['severity'],\n bins=severity_bins,\n labels=severity_labels,\n include_lowest=True\n )\n\n if processed_gdf.crs is None:\n processed_gdf = processed_gdf.set_crs(\"EPSG:4326\")\n else:\n processed_gdf = processed_gdf.to_crs(\"EPSG:4326\")\n\n processed_gdf['thematic'] = processed_gdf['severity']\n\n processed_gdf.metadata = {\n 'name': 'accessibility_analysis'\n }\n\n return processed_gdf\n\n\n_curio_output = _curio_node()\n\ntry:\n result_a0c5d21f_ce16_4318_95fe_598451859de1 = _curio_output\nexcept NameError:\n result_a0c5d21f_ce16_4318_95fe_598451859de1 = None\n", + "metadata": { + "id": "a0c5d21f-ce16-4318-95fe-598451859de1", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"6475234e-0288-49cd-b03f-19852717c980\",\n \"type\": \"DATA_TRANSFORMATION\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"a0c5d21f-ce16-4318-95fe-598451859de1\",\n \"target\": \"6475234e-0288-49cd-b03f-19852717c980\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": [\n {\n \"source\": \"6475234e-0288-49cd-b03f-19852717c980\",\n \"target\": \"90282349-bde0-4b34-a0ab-e9a32f48032a\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n input_0 = result_a0c5d21f_ce16_4318_95fe_598451859de1\n arg = input_0\n\n import pandas as pd\n import numpy as np\n\n gdf = arg\n\n feature_stats = gdf.groupby('label_type').agg(\n count=('label_type', 'count'),\n avg_severity=('severity', 'mean'),\n avg_agreement=('agreement_ratio', 'mean')\n ).reset_index()\n\n feature_stats = feature_stats.fillna(0)\n\n for col in ['avg_severity', 'avg_agreement']:\n feature_stats[col] = feature_stats[col].astype(float)\n\n feature_stats.metadata = {\n 'name': 'feature_stats'\n }\n\n return feature_stats\n\n\n_curio_output = _curio_node()\n\ntry:\n result_6475234e_0288_49cd_b03f_19852717c980 = _curio_output\nexcept NameError:\n result_6475234e_0288_49cd_b03f_19852717c980 = None\n", + "metadata": { + "id": "6475234e-0288-49cd-b03f-19852717c980", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"44286732-11aa-4495-8706-cb4a9ba2e6a5\",\n \"type\": \"DATA_TRANSFORMATION\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"a0c5d21f-ce16-4318-95fe-598451859de1\",\n \"target\": \"44286732-11aa-4495-8706-cb4a9ba2e6a5\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": [\n {\n \"source\": \"44286732-11aa-4495-8706-cb4a9ba2e6a5\",\n \"target\": \"44609c8e-87dd-4373-ab6e-59839e8f9406\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n input_0 = result_a0c5d21f_ce16_4318_95fe_598451859de1\n arg = input_0\n\n import pandas as pd\n import numpy as np\n\n gdf = arg\n\n neighborhood_stats = gdf.groupby('neighborhood').agg(\n count=('label_type', 'count'),\n avg_severity=('severity', 'mean'),\n avg_agreement=('agreement_ratio', 'mean')\n ).reset_index()\n\n neighborhood_stats = neighborhood_stats.fillna(0)\n\n for col in ['avg_severity', 'avg_agreement']:\n neighborhood_stats[col] = neighborhood_stats[col].astype(float)\n\n neighborhood_stats.metadata = {\n 'name': 'neighborhood_stats'\n }\n\n return neighborhood_stats\n\n\n_curio_output = _curio_node()\n\ntry:\n result_44286732_11aa_4495_8706_cb4a9ba2e6a5 = _curio_output\nexcept NameError:\n result_44286732_11aa_4495_8706_cb4a9ba2e6a5 = None\n", + "metadata": { + "id": "44286732-11aa-4495-8706-cb4a9ba2e6a5", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"90282349-bde0-4b34-a0ab-e9a32f48032a\",\n \"type\": \"VIS_VEGA\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"6475234e-0288-49cd-b03f-19852717c980\",\n \"target\": \"90282349-bde0-4b34-a0ab-e9a32f48032a\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": []\n}\n\ndef _curio_node():\n\n\n input_data = result_6475234e_0288_49cd_b03f_19852717c980\n\n spec = {\n \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.json\",\n \"data\": {\"name\": \"feature_stats\"},\n \"mark\": \"bar\",\n \"encoding\": {\n \"x\": {\n \"field\": \"label_type\",\n \"type\": \"nominal\",\n \"title\": \"Feature Type\"\n },\n \"y\": {\n \"field\": \"count\",\n \"type\": \"quantitative\",\n \"title\": \"Number of Features\"\n },\n \"color\": {\n \"field\": \"avg_severity\",\n \"type\": \"quantitative\",\n \"title\": \"Average Severity\",\n \"scale\": {\n \"scheme\": \"viridis\"\n }\n },\n \"tooltip\": [\n {\"field\": \"label_type\", \"type\": \"nominal\", \"title\": \"Feature Type\"},\n {\"field\": \"count\", \"type\": \"quantitative\", \"title\": \"Count\"},\n {\"field\": \"avg_severity\", \"type\": \"quantitative\", \"title\": \"Avg Severity\", \"format\": \".2f\"},\n {\"field\": \"avg_agreement\", \"type\": \"quantitative\", \"title\": \"Avg Agreement\", \"format\": \".2f\"}\n ]\n }\n }\n\n values = input_data\n if hasattr(input_data, \"to_dict\"):\n values = input_data.to_dict(orient=\"records\")\n\n if isinstance(spec, dict):\n spec[\"data\"] = {\"values\": values}\n\n from IPython.display import display\n display({\"application/vnd.vegalite.v5+json\": spec, \"text/plain\": spec}, raw=True)\n\n return input_data\n\n\n_curio_output = _curio_node()\n\ntry:\n result_90282349_bde0_4b34_a0ab_e9a32f48032a = _curio_output\nexcept NameError:\n result_90282349_bde0_4b34_a0ab_e9a32f48032a = None\n", + "metadata": { + "id": "90282349-bde0-4b34-a0ab-e9a32f48032a", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"44609c8e-87dd-4373-ab6e-59839e8f9406\",\n \"type\": \"VIS_VEGA\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"44286732-11aa-4495-8706-cb4a9ba2e6a5\",\n \"target\": \"44609c8e-87dd-4373-ab6e-59839e8f9406\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": []\n}\n\ndef _curio_node():\n\n\n input_data = result_44286732_11aa_4495_8706_cb4a9ba2e6a5\n\n spec = {\n \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.json\",\n \"data\": {\"name\": \"neighborhood_stats\"},\n \"mark\": \"circle\",\n \"encoding\": {\n \"x\": {\n \"field\": \"neighborhood\",\n \"type\": \"nominal\",\n \"title\": \"Neighborhood\"\n },\n \"y\": {\n \"field\": \"count\",\n \"type\": \"quantitative\",\n \"title\": \"Number of Features\"\n },\n \"size\": {\n \"field\": \"count\",\n \"type\": \"quantitative\",\n \"title\": \"Number of Features\",\n \"scale\": {\n \"range\": [50, 500]\n }\n },\n \"color\": {\n \"field\": \"avg_severity\",\n \"type\": \"quantitative\",\n \"title\": \"Average Severity\",\n \"scale\": {\n \"scheme\": \"viridis\"\n }\n },\n \"tooltip\": [\n {\"field\": \"neighborhood\", \"type\": \"nominal\", \"title\": \"Neighborhood\"},\n {\"field\": \"count\", \"type\": \"quantitative\", \"title\": \"Count\"},\n {\"field\": \"avg_severity\", \"type\": \"quantitative\", \"title\": \"Avg Severity\", \"format\": \".2f\"},\n {\"field\": \"avg_agreement\", \"type\": \"quantitative\", \"title\": \"Avg Agreement\", \"format\": \".2f\"}\n ]\n }\n }\n\n values = input_data\n if hasattr(input_data, \"to_dict\"):\n values = input_data.to_dict(orient=\"records\")\n\n if isinstance(spec, dict):\n spec[\"data\"] = {\"values\": values}\n\n from IPython.display import display\n display({\"application/vnd.vegalite.v5+json\": spec, \"text/plain\": spec}, raw=True)\n\n return input_data\n\n\n_curio_output = _curio_node()\n\ntry:\n result_44609c8e_87dd_4373_ab6e_59839e8f9406 = _curio_output\nexcept NameError:\n result_44609c8e_87dd_4373_ab6e_59839e8f9406 = None\n", + "metadata": { + "id": "44609c8e-87dd-4373-ab6e-59839e8f9406", + "language": "python" + } + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/docs/examples/notebooks/example5-notebook.ipynb b/docs/examples/notebooks/example5-notebook.ipynb new file mode 100644 index 00000000..6381fbbe --- /dev/null +++ b/docs/examples/notebooks/example5-notebook.ipynb @@ -0,0 +1,48 @@ +{ + "cells": [ + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"92385949-b264-4108-abea-99df7a39b551\",\n \"type\": \"DATA_LOADING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [],\n \"outputs\": [\n {\n \"source\": \"92385949-b264-4108-abea-99df7a39b551\",\n \"target\": \"d2ebbea0-a6c0-459e-8aa3-a16ce0983e79\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n import pandas as pd\n\n sensor = pd.read_csv('data/Flooding_Complaints_to_311_20260330.csv')\n\n return sensor\n\n_curio_output = _curio_node()\n\ntry:\n data_92385949_b264_4108_abea_99df7a39b551 = _curio_output\nexcept NameError:\n data_92385949_b264_4108_abea_99df7a39b551 = None\n", + "metadata": { + "id": "92385949-b264-4108-abea-99df7a39b551", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"d2ebbea0-a6c0-459e-8aa3-a16ce0983e79\",\n \"type\": \"COMPUTATION_ANALYSIS\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"92385949-b264-4108-abea-99df7a39b551\",\n \"target\": \"d2ebbea0-a6c0-459e-8aa3-a16ce0983e79\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": [\n {\n \"source\": \"d2ebbea0-a6c0-459e-8aa3-a16ce0983e79\",\n \"target\": \"6a33a101-fb35-487c-8670-fae8dc7f3828\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n input_0 = data_92385949_b264_4108_abea_99df7a39b551\n arg = input_0\n\n def complaints_by_zip(df):\n\n grouped = df[\"ZIP_CODE\"].fillna(\"UNKNOWN\").value_counts().reset_index()\n grouped.columns = [\"ZIP_CODE\", \"Complaint_Count\"]\n return grouped\n\n return complaints_by_zip(arg)\n\n\n_curio_output = _curio_node()\n\ntry:\n result_d2ebbea0_a6c0_459e_8aa3_a16ce0983e79 = _curio_output\nexcept NameError:\n result_d2ebbea0_a6c0_459e_8aa3_a16ce0983e79 = None\n", + "metadata": { + "id": "d2ebbea0-a6c0-459e-8aa3-a16ce0983e79", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"6a33a101-fb35-487c-8670-fae8dc7f3828\",\n \"type\": \"DATA_POOL\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"d2ebbea0-a6c0-459e-8aa3-a16ce0983e79\",\n \"target\": \"6a33a101-fb35-487c-8670-fae8dc7f3828\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": [\n {\n \"source\": \"6a33a101-fb35-487c-8670-fae8dc7f3828\",\n \"target\": \"6864201f-4c0e-441b-ac7c-94e7eba7580e\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n\n return result_d2ebbea0_a6c0_459e_8aa3_a16ce0983e79\n\n\n_curio_output = _curio_node()\n\ntry:\n pool_6a33a101_fb35_487c_8670_fae8dc7f3828 = _curio_output\nexcept NameError:\n pool_6a33a101_fb35_487c_8670_fae8dc7f3828 = None\n", + "metadata": { + "id": "6a33a101-fb35-487c-8670-fae8dc7f3828", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"6864201f-4c0e-441b-ac7c-94e7eba7580e\",\n \"type\": \"VIS_VEGA\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"6a33a101-fb35-487c-8670-fae8dc7f3828\",\n \"target\": \"6864201f-4c0e-441b-ac7c-94e7eba7580e\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": []\n}\n\ndef _curio_node():\n\n\n input_data = pool_6a33a101_fb35_487c_8670_fae8dc7f3828\n\n spec = {\n \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.json\",\n \"description\": \"A simple bar chart with embedded data.\",\n \"mark\": \"bar\",\n \"encoding\": {\n \"x\": {\"field\": \"ZIP_CODE\", \"type\": \"nominal\", \"title\": \"Zip Code\", \"axis\": {\"labelAngle\": 270}},\n \"y\": {\"field\": \"Complaint_Count\", \"type\": \"quantitative\", \"title\": \"Complaints\", \"axis\": {\"labelAngle\": 0}}\n }\n }\n\n values = input_data\n if hasattr(input_data, \"to_dict\"):\n values = input_data.to_dict(orient=\"records\")\n\n if isinstance(spec, dict):\n spec[\"data\"] = {\"values\": values}\n\n from IPython.display import display\n display({\"application/vnd.vegalite.v5+json\": spec, \"text/plain\": spec}, raw=True)\n\n return input_data\n\n\n_curio_output = _curio_node()\n\ntry:\n result_6864201f_4c0e_441b_ac7c_94e7eba7580e = _curio_output\nexcept NameError:\n result_6864201f_4c0e_441b_ac7c_94e7eba7580e = None\n", + "metadata": { + "id": "6864201f-4c0e-441b-ac7c-94e7eba7580e", + "language": "python" + } + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/docs/examples/notebooks/example7-notebook.ipynb b/docs/examples/notebooks/example7-notebook.ipynb new file mode 100644 index 00000000..644cb7fb --- /dev/null +++ b/docs/examples/notebooks/example7-notebook.ipynb @@ -0,0 +1,40 @@ +{ + "cells": [ + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"e1a123c1-8837-47a5-9b63-038e5ebcb530\",\n \"type\": \"DATA_LOADING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [],\n \"outputs\": [\n {\n \"source\": \"e1a123c1-8837-47a5-9b63-038e5ebcb530\",\n \"target\": \"cb340f83-0a4d-457a-be66-691672f330d3\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n import pandas as pd\n\n df = pd.read_csv(\"data/Speed_Camera_Violations.csv\")\n df.dropna(inplace=True)\n return df\n\n_curio_output = _curio_node()\n\ntry:\n data_e1a123c1_8837_47a5_9b63_038e5ebcb530 = _curio_output\nexcept NameError:\n data_e1a123c1_8837_47a5_9b63_038e5ebcb530 = None\n", + "metadata": { + "id": "e1a123c1-8837-47a5-9b63-038e5ebcb530", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"cb340f83-0a4d-457a-be66-691672f330d3\",\n \"type\": \"COMPUTATION_ANALYSIS\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"e1a123c1-8837-47a5-9b63-038e5ebcb530\",\n \"target\": \"cb340f83-0a4d-457a-be66-691672f330d3\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": [\n {\n \"source\": \"cb340f83-0a4d-457a-be66-691672f330d3\",\n \"target\": \"c2ba6e0e-e239-4167-a382-ffd1993cb3da\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n input_0 = data_e1a123c1_8837_47a5_9b63_038e5ebcb530\n arg = input_0\n\n import pandas as pd\n\n df = arg\n\n df['VIOLATION DATE'] = pd.to_datetime(df['VIOLATION DATE'], format='%m/%d/%Y')\n\n df['Year'] = df['VIOLATION DATE'].dt.year\n\n yr_sum = (df.groupby(['CAMERA ID', 'Year'])['VIOLATIONS']\n .sum()\n .reset_index()\n .rename(columns={'VIOLATIONS': 'avg_violations'}))\n\n top_ids = (df.groupby('CAMERA ID')['VIOLATIONS']\n .sum()\n .sort_values(ascending=False)\n .head(5)\n .index\n .tolist())\n\n yr_sum = yr_sum[yr_sum['CAMERA ID'].isin(top_ids)]\n\n camera_pos = (df.groupby('CAMERA ID')[['LATITUDE', 'LONGITUDE']]\n .mean()\n .reset_index())\n\n yr_sum = yr_sum.merge(camera_pos, on='CAMERA ID')\n\n return yr_sum\n\n\n_curio_output = _curio_node()\n\ntry:\n result_cb340f83_0a4d_457a_be66_691672f330d3 = _curio_output\nexcept NameError:\n result_cb340f83_0a4d_457a_be66_691672f330d3 = None\n", + "metadata": { + "id": "cb340f83-0a4d-457a-be66-691672f330d3", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"c2ba6e0e-e239-4167-a382-ffd1993cb3da\",\n \"type\": \"VIS_VEGA\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"cb340f83-0a4d-457a-be66-691672f330d3\",\n \"target\": \"c2ba6e0e-e239-4167-a382-ffd1993cb3da\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": []\n}\n\ndef _curio_node():\n\n\n input_data = result_cb340f83_0a4d_457a_be66_691672f330d3\n\n spec = {\n \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.json\",\n \"data\": { \"name\": \"table\" },\n \"config\": { \"bar\": { \"continuousBandSize\": 18 } },\n \"hconcat\": [\n {\n \"width\": 320,\n \"height\": 260,\n \"selection\": { \"yrBrush\": { \"type\": \"interval\", \"encodings\": [\"x\"] } },\n \"mark\": { \"type\": \"bar\" },\n \"encoding\": {\n \"x\": { \"field\": \"Year\", \"type\": \"quantitative\", \"title\": \"Year\" },\n \"y\": {\n \"aggregate\": \"sum\",\n \"field\": \"avg_violations\",\n \"type\": \"quantitative\",\n \"title\": \"Total Violations\"\n },\n \"color\": {\n \"field\": \"CAMERA ID\",\n \"type\": \"nominal\",\n \"legend\": { \"title\": \"Camera ID\" }\n }\n }\n },\n {\n \"width\": 320,\n \"height\": 260,\n \"transform\": [\n { \"filter\": { \"selection\": \"yrBrush\" } },\n {\n \"aggregate\": [\n { \"op\": \"sum\", \"field\": \"avg_violations\", \"as\": \"total\" }\n ],\n \"groupby\": [\"Year\"]\n },\n { \"sort\": { \"field\": \"Year\" } }\n ],\n \"mark\": { \"type\": \"line\", \"point\": True },\n \"encoding\": {\n \"x\": {\n \"field\": \"Year\",\n \"type\": \"quantitative\",\n \"title\": \"Year (brush range)\"\n },\n \"y\": {\n \"field\": \"total\",\n \"type\": \"quantitative\",\n \"title\": \"Total Violations\"\n }\n }\n }\n ]\n }\n\n values = input_data\n if hasattr(input_data, \"to_dict\"):\n values = input_data.to_dict(orient=\"records\")\n\n if isinstance(spec, dict):\n spec[\"data\"] = {\"values\": values}\n\n from IPython.display import display\n display({\"application/vnd.vegalite.v5+json\": spec, \"text/plain\": spec}, raw=True)\n\n return input_data\n\n\n_curio_output = _curio_node()\n\ntry:\n result_c2ba6e0e_e239_4167_a382_ffd1993cb3da = _curio_output\nexcept NameError:\n result_c2ba6e0e_e239_4167_a382_ffd1993cb3da = None\n", + "metadata": { + "id": "c2ba6e0e-e239-4167-a382-ffd1993cb3da", + "language": "python" + } + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/docs/examples/notebooks/example8-notebook.ipynb b/docs/examples/notebooks/example8-notebook.ipynb new file mode 100644 index 00000000..d937ca63 --- /dev/null +++ b/docs/examples/notebooks/example8-notebook.ipynb @@ -0,0 +1,208 @@ +{ + "cells": [ + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"6f4c2cd3-e83e-4e85-81de-3ec50986a2ed\",\n \"type\": \"DATA_LOADING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [],\n \"outputs\": [\n {\n \"source\": \"6f4c2cd3-e83e-4e85-81de-3ec50986a2ed\",\n \"target\": \"2704287e-a72c-454d-b6f2-7bca1e521397\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n },\n {\n \"source\": \"6f4c2cd3-e83e-4e85-81de-3ec50986a2ed\",\n \"target\": \"f4cb8452-3fbf-48b3-9a4e-42d0607428cb\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n },\n {\n \"source\": \"6f4c2cd3-e83e-4e85-81de-3ec50986a2ed\",\n \"target\": \"1c3f8346-a156-4716-9b67-5e3cd0c4b256\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n },\n {\n \"source\": \"6f4c2cd3-e83e-4e85-81de-3ec50986a2ed\",\n \"target\": \"ad6d1689-5be3-4b18-a72a-ed54f74621ee\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n },\n {\n \"source\": \"6f4c2cd3-e83e-4e85-81de-3ec50986a2ed\",\n \"target\": \"da73932f-b91a-4542-98df-d731a888b8b4\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n },\n {\n \"source\": \"6f4c2cd3-e83e-4e85-81de-3ec50986a2ed\",\n \"target\": \"2bbbd05f-f5cb-48a2-b96f-c55a9ac95a34\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n },\n {\n \"source\": \"6f4c2cd3-e83e-4e85-81de-3ec50986a2ed\",\n \"target\": \"be851609-c0f7-4cae-afae-2094965ead93\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n },\n {\n \"source\": \"6f4c2cd3-e83e-4e85-81de-3ec50986a2ed\",\n \"target\": \"18b287f6-0a05-4dbc-92d3-4eb10f177bc5\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n import pandas as pd\n\n df = pd.read_csv(\"data/red-light-violation.csv\")\n return df\n\n_curio_output = _curio_node()\n\ntry:\n data_6f4c2cd3_e83e_4e85_81de_3ec50986a2ed = _curio_output\nexcept NameError:\n data_6f4c2cd3_e83e_4e85_81de_3ec50986a2ed = None\n", + "metadata": { + "id": "6f4c2cd3-e83e-4e85-81de-3ec50986a2ed", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"2704287e-a72c-454d-b6f2-7bca1e521397\",\n \"type\": \"DATA_CLEANING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"6f4c2cd3-e83e-4e85-81de-3ec50986a2ed\",\n \"target\": \"2704287e-a72c-454d-b6f2-7bca1e521397\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": [\n {\n \"source\": \"2704287e-a72c-454d-b6f2-7bca1e521397\",\n \"target\": \"04062f8c-289c-4588-84b2-21be45adf916\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n },\n {\n \"id\": \"edge_in_0_2704287e_a72c_454d_b6f2_7bca1e521397_624087f5_cd1f_4348_8b15_ea9eed203770\",\n \"source\": \"2704287e-a72c-454d-b6f2-7bca1e521397\",\n \"target\": \"624087f5-cd1f-4348-8b15-ea9eed203770\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in_0\"\n }\n ]\n}\n\ndef _curio_node():\n\n input_0 = data_6f4c2cd3_e83e_4e85_81de_3ec50986a2ed\n arg = input_0\n\n import pandas as pd\n\n df = arg.copy()\n df['VIOLATION DATE'] = pd.to_datetime(df['VIOLATION DATE'])\n df['Year'] = df['VIOLATION DATE'].dt.year\n df['Month'] = df['VIOLATION DATE'].dt.month\n\n def assign_season(month):\n if month in [12, 1, 2]:\n return \"Winter\"\n elif month in [3, 4, 5]:\n return \"Spring\"\n elif month in [6, 7, 8]:\n return \"Summer\"\n else:\n return \"Fall\"\n\n df['Season'] = df['Month'].apply(assign_season)\n\n df_trend = df.groupby(['VIOLATION DATE', 'Year', 'Season'])['VIOLATIONS'].sum().reset_index()\n df_trend['VIOLATION DATE'] = df_trend['VIOLATION DATE'].astype(str)\n\n return pd.DataFrame(df_trend)\n\n\n_curio_output = _curio_node()\n\ntry:\n result_2704287e_a72c_454d_b6f2_7bca1e521397 = _curio_output\nexcept NameError:\n result_2704287e_a72c_454d_b6f2_7bca1e521397 = None\n", + "metadata": { + "id": "2704287e-a72c-454d-b6f2-7bca1e521397", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"04062f8c-289c-4588-84b2-21be45adf916\",\n \"type\": \"VIS_VEGA\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"2704287e-a72c-454d-b6f2-7bca1e521397\",\n \"target\": \"04062f8c-289c-4588-84b2-21be45adf916\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": []\n}\n\ndef _curio_node():\n\n\n input_data = result_2704287e_a72c_454d_b6f2_7bca1e521397\n\n spec = {\n \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.json\",\n \"width\": 750,\n \"height\": 400,\n \"title\": \"Seasonal Violation Trend (Daily)\",\n \"mark\": {\n \"type\": \"line\",\n \"point\": True\n },\n \"encoding\": {\n \"x\": {\n \"field\": \"VIOLATION DATE\",\n \"type\": \"temporal\",\n \"title\": \"Date\",\n \"axis\": {\n \"format\": \"%Y %b\",\n \"labelAngle\": -45\n }\n },\n \"y\": {\n \"field\": \"VIOLATIONS\",\n \"type\": \"quantitative\",\n \"title\": \"Violations\"\n },\n \"color\": {\n \"field\": \"Season\",\n \"type\": \"nominal\",\n \"title\": \"Season\",\n \"scale\": {\n \"domain\": [\"Winter\", \"Spring\", \"Summer\", \"Fall\"],\n \"range\": [\"#1f77b4\", \"#2ca02c\", \"#ff7f0e\", \"#9467bd\"]\n }\n },\n \"tooltip\": [\n { \"field\": \"VIOLATION DATE\", \"type\": \"temporal\", \"title\": \"Date\" },\n { \"field\": \"Season\", \"type\": \"nominal\" },\n { \"field\": \"VIOLATIONS\", \"type\": \"quantitative\" }\n ]\n }\n }\n\n values = input_data\n if hasattr(input_data, \"to_dict\"):\n values = input_data.to_dict(orient=\"records\")\n\n if isinstance(spec, dict):\n spec[\"data\"] = {\"values\": values}\n\n from IPython.display import display\n display({\"application/vnd.vegalite.v5+json\": spec, \"text/plain\": spec}, raw=True)\n\n return input_data\n\n\n_curio_output = _curio_node()\n\ntry:\n result_04062f8c_289c_4588_84b2_21be45adf916 = _curio_output\nexcept NameError:\n result_04062f8c_289c_4588_84b2_21be45adf916 = None\n", + "metadata": { + "id": "04062f8c-289c-4588-84b2-21be45adf916", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"f4cb8452-3fbf-48b3-9a4e-42d0607428cb\",\n \"type\": \"DATA_CLEANING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"6f4c2cd3-e83e-4e85-81de-3ec50986a2ed\",\n \"target\": \"f4cb8452-3fbf-48b3-9a4e-42d0607428cb\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": [\n {\n \"id\": \"edge_in_1_f4cb8452_3fbf_48b3_9a4e_42d0607428cb_624087f5_cd1f_4348_8b15_ea9eed203770\",\n \"source\": \"f4cb8452-3fbf-48b3-9a4e-42d0607428cb\",\n \"target\": \"624087f5-cd1f-4348-8b15-ea9eed203770\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in_1\"\n }\n ]\n}\n\ndef _curio_node():\n\n input_0 = data_6f4c2cd3_e83e_4e85_81de_3ec50986a2ed\n arg = input_0\n\n import pandas as pd\n\n df = arg.copy()\n df['VIOLATION DATE'] = pd.to_datetime(df['VIOLATION DATE'])\n df['Year'] = df['VIOLATION DATE'].dt.year\n df['Month'] = df['VIOLATION DATE'].dt.month\n\n heatmap_data = df.groupby(['Year', 'Month'])['VIOLATIONS'].sum().reset_index()\n\n return pd.DataFrame(heatmap_data)\n\n\n_curio_output = _curio_node()\n\ntry:\n result_f4cb8452_3fbf_48b3_9a4e_42d0607428cb = _curio_output\nexcept NameError:\n result_f4cb8452_3fbf_48b3_9a4e_42d0607428cb = None\n", + "metadata": { + "id": "f4cb8452-3fbf-48b3-9a4e-42d0607428cb", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"624087f5-cd1f-4348-8b15-ea9eed203770\",\n \"type\": \"MERGE_FLOW\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"id\": \"edge_in_0_2704287e_a72c_454d_b6f2_7bca1e521397_624087f5_cd1f_4348_8b15_ea9eed203770\",\n \"source\": \"2704287e-a72c-454d-b6f2-7bca1e521397\",\n \"target\": \"624087f5-cd1f-4348-8b15-ea9eed203770\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in_0\"\n },\n {\n \"id\": \"edge_in_1_f4cb8452_3fbf_48b3_9a4e_42d0607428cb_624087f5_cd1f_4348_8b15_ea9eed203770\",\n \"source\": \"f4cb8452-3fbf-48b3-9a4e-42d0607428cb\",\n \"target\": \"624087f5-cd1f-4348-8b15-ea9eed203770\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in_1\"\n }\n ],\n \"outputs\": [\n {\n \"source\": \"624087f5-cd1f-4348-8b15-ea9eed203770\",\n \"target\": \"ab8d8046-b68f-419f-bb36-3454e576afd4\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n\n inputs = [\n result_2704287e_a72c_454d_b6f2_7bca1e521397,\n result_f4cb8452_3fbf_48b3_9a4e_42d0607428cb\n ]\n\n merged_inputs = [i for i in inputs if i is not None]\n\n return merged_inputs\n\n\n_curio_output = _curio_node()\n\ntry:\n merged_624087f5_cd1f_4348_8b15_ea9eed203770 = _curio_output\nexcept NameError:\n merged_624087f5_cd1f_4348_8b15_ea9eed203770 = None\n", + "metadata": { + "id": "624087f5-cd1f-4348-8b15-ea9eed203770", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"ab8d8046-b68f-419f-bb36-3454e576afd4\",\n \"type\": \"COMPUTATION_ANALYSIS\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"624087f5-cd1f-4348-8b15-ea9eed203770\",\n \"target\": \"ab8d8046-b68f-419f-bb36-3454e576afd4\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": [\n {\n \"source\": \"ab8d8046-b68f-419f-bb36-3454e576afd4\",\n \"target\": \"3024166a-cd2c-460e-9063-d49f912774cc\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n input_0 = merged_624087f5_cd1f_4348_8b15_ea9eed203770\n arg = input_0\n\n import pandas as pd\n\n df_trend = pd.DataFrame(arg[0])\n heatmap_data = pd.DataFrame(arg[1])\n\n df_trend['VIOLATION DATE'] = pd.to_datetime(df_trend['VIOLATION DATE'])\n\n summary = heatmap_data.groupby('Year')['VIOLATIONS'].sum().reset_index()\n\n merged = df_trend.merge(summary, on='Year', how='left')\n\n final = merged[['VIOLATION DATE', 'VIOLATIONS_x', 'Season', 'VIOLATIONS_y']]\n\n final.columns = ['VIOLATION DATE', 'Daily Violations', 'Season', 'Yearly Total']\n\n final['VIOLATION DATE'] = final['VIOLATION DATE'].astype(str)\n\n array_data = final.to_dict(orient='records')\n\n shape = [final.shape[0], final.shape[1]]\n\n return pd.DataFrame(array_data)\n\n\n_curio_output = _curio_node()\n\ntry:\n result_ab8d8046_b68f_419f_bb36_3454e576afd4 = _curio_output\nexcept NameError:\n result_ab8d8046_b68f_419f_bb36_3454e576afd4 = None\n", + "metadata": { + "id": "ab8d8046-b68f-419f-bb36-3454e576afd4", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"3024166a-cd2c-460e-9063-d49f912774cc\",\n \"type\": \"DATA_CLEANING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"ab8d8046-b68f-419f-bb36-3454e576afd4\",\n \"target\": \"3024166a-cd2c-460e-9063-d49f912774cc\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": [\n {\n \"source\": \"3024166a-cd2c-460e-9063-d49f912774cc\",\n \"target\": \"b3beb8b4-d227-48af-b0c1-d9b99564888b\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n input_0 = result_ab8d8046_b68f_419f_bb36_3454e576afd4\n arg = input_0\n\n import pandas as pd\n\n df = arg.copy()\n\n df['VIOLATION DATE'] = pd.to_datetime(df['VIOLATION DATE'], errors='coerce')\n\n if 'Year' not in df.columns:\n df['Year'] = df['VIOLATION DATE'].dt.year\n if 'Month' not in df.columns:\n df['Month'] = df['VIOLATION DATE'].dt.month\n\n df['Daily Violations'] = pd.to_numeric(df['Daily Violations'], errors='coerce')\n df['Yearly Total'] = pd.to_numeric(df['Yearly Total'], errors='coerce')\n\n df['VIOLATION DATE'] = df['VIOLATION DATE'].astype(str)\n\n return pd.DataFrame(df)\n\n\n_curio_output = _curio_node()\n\ntry:\n result_3024166a_cd2c_460e_9063_d49f912774cc = _curio_output\nexcept NameError:\n result_3024166a_cd2c_460e_9063_d49f912774cc = None\n", + "metadata": { + "id": "3024166a-cd2c-460e-9063-d49f912774cc", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"b3beb8b4-d227-48af-b0c1-d9b99564888b\",\n \"type\": \"VIS_VEGA\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"3024166a-cd2c-460e-9063-d49f912774cc\",\n \"target\": \"b3beb8b4-d227-48af-b0c1-d9b99564888b\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": []\n}\n\ndef _curio_node():\n\n\n input_data = result_3024166a_cd2c_460e_9063_d49f912774cc\n\n spec = {\n \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.json\",\n \"hconcat\": [\n {\n \"width\": 300,\n \"height\": 300,\n \"title\": \"Monthly Violations Heatmap\",\n \"params\": [\n {\n \"name\": \"yearFilter\",\n \"select\": {\n \"type\": \"point\",\n \"fields\": [\"Year\"],\n \"on\": \"click\"\n }\n }\n ],\n \"mark\": \"rect\",\n \"encoding\": {\n \"x\": {\n \"field\": \"Month\",\n \"type\": \"ordinal\",\n \"title\": \"Month\"\n },\n \"y\": {\n \"field\": \"Year\",\n \"type\": \"ordinal\",\n \"title\": \"Year\"\n },\n \"color\": {\n \"aggregate\": \"sum\",\n \"field\": \"Yearly Total\",\n \"type\": \"quantitative\",\n \"scale\": {\n \"scheme\": \"orangered\"\n },\n \"title\": \"Violations\"\n },\n \"tooltip\": [\n { \"field\": \"Year\", \"type\": \"ordinal\" },\n { \"field\": \"Month\", \"type\": \"ordinal\" },\n {\n \"aggregate\": \"sum\",\n \"field\": \"Yearly Total\",\n \"type\": \"quantitative\",\n \"title\": \"Total Violations\"\n }\n ]\n }\n },\n {\n \"width\": 600,\n \"height\": 300,\n \"title\": \"Seasonal Violation Trend (Daily)\",\n \"transform\": [\n {\n \"filter\": \"yearFilter.Year == null || datum.Year == yearFilter.Year\"\n }\n ],\n \"mark\": \"line\",\n \"encoding\": {\n \"x\": {\n \"field\": \"VIOLATION DATE\",\n \"type\": \"temporal\",\n \"title\": \"Date\"\n },\n \"y\": {\n \"field\": \"Daily Violations\",\n \"type\": \"quantitative\",\n \"title\": \"Violations\"\n },\n \"color\": {\n \"field\": \"Season\",\n \"type\": \"nominal\"\n },\n \"tooltip\": [\n { \"field\": \"VIOLATION DATE\", \"type\": \"temporal\" },\n { \"field\": \"Daily Violations\", \"type\": \"quantitative\" },\n { \"field\": \"Season\", \"type\": \"nominal\" }\n ]\n }\n }\n ]\n }\n\n values = input_data\n if hasattr(input_data, \"to_dict\"):\n values = input_data.to_dict(orient=\"records\")\n\n if isinstance(spec, dict):\n spec[\"data\"] = {\"values\": values}\n\n from IPython.display import display\n display({\"application/vnd.vegalite.v5+json\": spec, \"text/plain\": spec}, raw=True)\n\n return input_data\n\n\n_curio_output = _curio_node()\n\ntry:\n result_b3beb8b4_d227_48af_b0c1_d9b99564888b = _curio_output\nexcept NameError:\n result_b3beb8b4_d227_48af_b0c1_d9b99564888b = None\n", + "metadata": { + "id": "b3beb8b4-d227-48af-b0c1-d9b99564888b", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"1c3f8346-a156-4716-9b67-5e3cd0c4b256\",\n \"type\": \"DATA_CLEANING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"6f4c2cd3-e83e-4e85-81de-3ec50986a2ed\",\n \"target\": \"1c3f8346-a156-4716-9b67-5e3cd0c4b256\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": [\n {\n \"source\": \"1c3f8346-a156-4716-9b67-5e3cd0c4b256\",\n \"target\": \"9acbea5b-9bb7-4eea-84dd-3dc16e25e634\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n input_0 = data_6f4c2cd3_e83e_4e85_81de_3ec50986a2ed\n arg = input_0\n\n import pandas as pd\n\n df = arg\n\n df[\"VIOLATION DATE\"] = pd.to_datetime(df[\"VIOLATION DATE\"])\n\n df[\"Month\"] = df[\"VIOLATION DATE\"].dt.month\n df[\"Year\"] = df[\"VIOLATION DATE\"].dt.year\n\n def assign_season(month):\n if month in [12, 1, 2]:\n return \"Winter\"\n elif month in [3, 4, 5]:\n return \"Spring\"\n elif month in [6, 7, 8]:\n return \"Summer\"\n else:\n return \"Fall\"\n\n df[\"Season\"] = df[\"Month\"].apply(assign_season)\n\n df_seasonal = df.groupby([\"Year\", \"Season\"])[\"VIOLATIONS\"].sum().reset_index()\n\n return pd.DataFrame(df_seasonal)\n\n\n_curio_output = _curio_node()\n\ntry:\n result_1c3f8346_a156_4716_9b67_5e3cd0c4b256 = _curio_output\nexcept NameError:\n result_1c3f8346_a156_4716_9b67_5e3cd0c4b256 = None\n", + "metadata": { + "id": "1c3f8346-a156-4716-9b67-5e3cd0c4b256", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"9acbea5b-9bb7-4eea-84dd-3dc16e25e634\",\n \"type\": \"VIS_VEGA\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"1c3f8346-a156-4716-9b67-5e3cd0c4b256\",\n \"target\": \"9acbea5b-9bb7-4eea-84dd-3dc16e25e634\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": []\n}\n\ndef _curio_node():\n\n\n input_data = result_1c3f8346_a156_4716_9b67_5e3cd0c4b256\n\n spec = {\n \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.json\",\n \"title\": \"Seasonal Red-Light Violations Over Time\",\n \"mark\": \"area\",\n \"encoding\": {\n \"x\": {\n \"field\": \"Year\",\n \"type\": \"ordinal\",\n \"title\": \"Year\"\n },\n \"y\": {\n \"field\": \"VIOLATIONS\",\n \"type\": \"quantitative\",\n \"title\": \"Total Violations\"\n },\n \"color\": {\n \"field\": \"Season\",\n \"type\": \"nominal\",\n \"title\": \"Season\"\n },\n \"tooltip\": [\n { \"field\": \"Year\", \"type\": \"ordinal\" },\n { \"field\": \"Season\", \"type\": \"nominal\" },\n { \"field\": \"VIOLATIONS\", \"type\": \"quantitative\" }\n ]\n },\n \"width\": 600,\n \"height\": 400\n }\n\n values = input_data\n if hasattr(input_data, \"to_dict\"):\n values = input_data.to_dict(orient=\"records\")\n\n if isinstance(spec, dict):\n spec[\"data\"] = {\"values\": values}\n\n from IPython.display import display\n display({\"application/vnd.vegalite.v5+json\": spec, \"text/plain\": spec}, raw=True)\n\n return input_data\n\n\n_curio_output = _curio_node()\n\ntry:\n result_9acbea5b_9bb7_4eea_84dd_3dc16e25e634 = _curio_output\nexcept NameError:\n result_9acbea5b_9bb7_4eea_84dd_3dc16e25e634 = None\n", + "metadata": { + "id": "9acbea5b-9bb7-4eea-84dd-3dc16e25e634", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"ad6d1689-5be3-4b18-a72a-ed54f74621ee\",\n \"type\": \"DATA_CLEANING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"6f4c2cd3-e83e-4e85-81de-3ec50986a2ed\",\n \"target\": \"ad6d1689-5be3-4b18-a72a-ed54f74621ee\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": [\n {\n \"source\": \"ad6d1689-5be3-4b18-a72a-ed54f74621ee\",\n \"target\": \"9c70e329-602e-4309-98d7-b12f44c99319\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n input_0 = data_6f4c2cd3_e83e_4e85_81de_3ec50986a2ed\n arg = input_0\n\n import pandas as pd\n\n df = arg\n df[\"VIOLATION DATE\"] = pd.to_datetime(df[\"VIOLATION DATE\"])\n df[\"Year\"] = df[\"VIOLATION DATE\"].dt.year\n\n grouped = df.groupby([\"INTERSECTION\", \"Year\"])[\"VIOLATIONS\"].sum().reset_index()\n grouped[\"Rank\"] = grouped.groupby(\"Year\")[\"VIOLATIONS\"].rank(ascending=False, method=\"first\")\n\n top3 = grouped[grouped[\"Rank\"] <= 3]\n\n return pd.DataFrame(top3)\n\n\n_curio_output = _curio_node()\n\ntry:\n result_ad6d1689_5be3_4b18_a72a_ed54f74621ee = _curio_output\nexcept NameError:\n result_ad6d1689_5be3_4b18_a72a_ed54f74621ee = None\n", + "metadata": { + "id": "ad6d1689-5be3-4b18-a72a-ed54f74621ee", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"9c70e329-602e-4309-98d7-b12f44c99319\",\n \"type\": \"VIS_VEGA\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"ad6d1689-5be3-4b18-a72a-ed54f74621ee\",\n \"target\": \"9c70e329-602e-4309-98d7-b12f44c99319\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": []\n}\n\ndef _curio_node():\n\n\n input_data = result_ad6d1689_5be3_4b18_a72a_ed54f74621ee\n\n spec = {\n \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.json\",\n \"title\": \"Top 3 Intersections with Most Violations by Year\",\n \"mark\": \"bar\",\n \"encoding\": {\n \"x\": {\n \"field\": \"Year\",\n \"type\": \"ordinal\",\n \"title\": \"Year\"\n },\n \"y\": {\n \"field\": \"VIOLATIONS\",\n \"type\": \"quantitative\",\n \"title\": \"Violations\"\n },\n \"color\": {\n \"field\": \"INTERSECTION\",\n \"type\": \"nominal\",\n \"title\": \"Intersection\"\n },\n \"tooltip\": [\n { \"field\": \"INTERSECTION\", \"type\": \"nominal\" },\n { \"field\": \"VIOLATIONS\", \"type\": \"quantitative\" }\n ]\n },\n \"width\": 600,\n \"height\": 400\n }\n\n values = input_data\n if hasattr(input_data, \"to_dict\"):\n values = input_data.to_dict(orient=\"records\")\n\n if isinstance(spec, dict):\n spec[\"data\"] = {\"values\": values}\n\n from IPython.display import display\n display({\"application/vnd.vegalite.v5+json\": spec, \"text/plain\": spec}, raw=True)\n\n return input_data\n\n\n_curio_output = _curio_node()\n\ntry:\n result_9c70e329_602e_4309_98d7_b12f44c99319 = _curio_output\nexcept NameError:\n result_9c70e329_602e_4309_98d7_b12f44c99319 = None\n", + "metadata": { + "id": "9c70e329-602e-4309-98d7-b12f44c99319", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"da73932f-b91a-4542-98df-d731a888b8b4\",\n \"type\": \"DATA_CLEANING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"6f4c2cd3-e83e-4e85-81de-3ec50986a2ed\",\n \"target\": \"da73932f-b91a-4542-98df-d731a888b8b4\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": [\n {\n \"id\": \"edge_in_0_da73932f_b91a_4542_98df_d731a888b8b4_04689857_6cf6_4ee9_801d_e7b075c16da5\",\n \"source\": \"da73932f-b91a-4542-98df-d731a888b8b4\",\n \"target\": \"04689857-6cf6-4ee9-801d-e7b075c16da5\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in_0\"\n }\n ]\n}\n\ndef _curio_node():\n\n input_0 = data_6f4c2cd3_e83e_4e85_81de_3ec50986a2ed\n arg = input_0\n\n import pandas as pd\n import numpy as np\n\n df = arg.copy()\n df[\"VIOLATION DATE\"] = pd.to_datetime(df[\"VIOLATION DATE\"])\n df[\"Year\"] = df[\"VIOLATION DATE\"].dt.year\n\n\n grouped = df.groupby(\"INTERSECTION\").agg({\n \"CAMERA ID\": \"nunique\",\n \"VIOLATIONS\": \"sum\"\n }).reset_index().rename(columns={\"CAMERA ID\": \"CAMERA_COUNT\"})\n\n grouped[\"CAMERA_BIN\"] = grouped[\"CAMERA_COUNT\"].apply(lambda x: \"4+\" if x >= 4 else str(x))\n\n\n camera_order = {\"1\": 1, \"2\": 2, \"3\": 3, \"4+\": 4}\n grouped[\"x_base\"] = grouped[\"CAMERA_BIN\"].map(camera_order)\n np.random.seed(42)\n grouped[\"jittered_x\"] = grouped[\"x_base\"] + np.random.uniform(-0.2, 0.2, size=len(grouped))\n\n\n return pd.DataFrame(grouped)\n\n\n_curio_output = _curio_node()\n\ntry:\n result_da73932f_b91a_4542_98df_d731a888b8b4 = _curio_output\nexcept NameError:\n result_da73932f_b91a_4542_98df_d731a888b8b4 = None\n", + "metadata": { + "id": "da73932f-b91a-4542-98df-d731a888b8b4", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"2bbbd05f-f5cb-48a2-b96f-c55a9ac95a34\",\n \"type\": \"DATA_CLEANING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"6f4c2cd3-e83e-4e85-81de-3ec50986a2ed\",\n \"target\": \"2bbbd05f-f5cb-48a2-b96f-c55a9ac95a34\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": [\n {\n \"id\": \"edge_in_1_2bbbd05f_f5cb_48a2_b96f_c55a9ac95a34_04689857_6cf6_4ee9_801d_e7b075c16da5\",\n \"source\": \"2bbbd05f-f5cb-48a2-b96f-c55a9ac95a34\",\n \"target\": \"04689857-6cf6-4ee9-801d-e7b075c16da5\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in_1\"\n }\n ]\n}\n\ndef _curio_node():\n\n input_0 = data_6f4c2cd3_e83e_4e85_81de_3ec50986a2ed\n arg = input_0\n\n import pandas as pd\n\n df = arg.copy()\n\n df[\"VIOLATION DATE\"] = pd.to_datetime(df[\"VIOLATION DATE\"])\n df[\"Year\"] = df[\"VIOLATION DATE\"].dt.year\n\n trend = df.groupby(['INTERSECTION', 'Year'])['VIOLATIONS'].sum().reset_index()\n first = trend.groupby('INTERSECTION').first().reset_index()\n last = trend.groupby('INTERSECTION').last().reset_index()\n\n change = first.merge(last, on='INTERSECTION', suffixes=('_first', '_last'))\n change = change[change['VIOLATIONS_first'] > 0]\n\n change['Percent_Reduction'] = ((change['VIOLATIONS_first'] - change['VIOLATIONS_last']) / change['VIOLATIONS_first']) * 100\n\n\n return pd.DataFrame(change)\n\n\n_curio_output = _curio_node()\n\ntry:\n result_2bbbd05f_f5cb_48a2_b96f_c55a9ac95a34 = _curio_output\nexcept NameError:\n result_2bbbd05f_f5cb_48a2_b96f_c55a9ac95a34 = None\n", + "metadata": { + "id": "2bbbd05f-f5cb-48a2-b96f-c55a9ac95a34", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"04689857-6cf6-4ee9-801d-e7b075c16da5\",\n \"type\": \"MERGE_FLOW\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"id\": \"edge_in_0_da73932f_b91a_4542_98df_d731a888b8b4_04689857_6cf6_4ee9_801d_e7b075c16da5\",\n \"source\": \"da73932f-b91a-4542-98df-d731a888b8b4\",\n \"target\": \"04689857-6cf6-4ee9-801d-e7b075c16da5\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in_0\"\n },\n {\n \"id\": \"edge_in_1_2bbbd05f_f5cb_48a2_b96f_c55a9ac95a34_04689857_6cf6_4ee9_801d_e7b075c16da5\",\n \"source\": \"2bbbd05f-f5cb-48a2-b96f-c55a9ac95a34\",\n \"target\": \"04689857-6cf6-4ee9-801d-e7b075c16da5\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in_1\"\n }\n ],\n \"outputs\": [\n {\n \"source\": \"04689857-6cf6-4ee9-801d-e7b075c16da5\",\n \"target\": \"bed93b38-cdb2-4db3-be54-7e60d68013d9\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n\n inputs = [\n result_da73932f_b91a_4542_98df_d731a888b8b4,\n result_2bbbd05f_f5cb_48a2_b96f_c55a9ac95a34\n ]\n\n merged_inputs = [i for i in inputs if i is not None]\n\n return merged_inputs\n\n\n_curio_output = _curio_node()\n\ntry:\n merged_04689857_6cf6_4ee9_801d_e7b075c16da5 = _curio_output\nexcept NameError:\n merged_04689857_6cf6_4ee9_801d_e7b075c16da5 = None\n", + "metadata": { + "id": "04689857-6cf6-4ee9-801d-e7b075c16da5", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"bed93b38-cdb2-4db3-be54-7e60d68013d9\",\n \"type\": \"COMPUTATION_ANALYSIS\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"04689857-6cf6-4ee9-801d-e7b075c16da5\",\n \"target\": \"bed93b38-cdb2-4db3-be54-7e60d68013d9\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": [\n {\n \"source\": \"bed93b38-cdb2-4db3-be54-7e60d68013d9\",\n \"target\": \"b5ff6442-ea3f-468f-8c85-8fb634eb88f1\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n input_0 = merged_04689857_6cf6_4ee9_801d_e7b075c16da5\n arg = input_0\n\n import pandas as pd\n\n grouped = pd.DataFrame(arg[0])\n change = pd.DataFrame(arg[1])\n\n merged = grouped.merge(\n change[['INTERSECTION', 'Percent_Reduction']],\n on='INTERSECTION',\n how='left'\n )\n\n merged['VIOLATIONS'] = pd.to_numeric(merged['VIOLATIONS'], errors='coerce')\n merged['Percent_Reduction'] = pd.to_numeric(merged['Percent_Reduction'], errors='coerce')\n\n merged = merged.dropna(subset=['Percent_Reduction'])\n\n array_data = merged.to_dict(orient='records')\n\n shape = [merged.shape[0], merged.shape[1]]\n\n return pd.DataFrame(array_data)\n\n\n_curio_output = _curio_node()\n\ntry:\n result_bed93b38_cdb2_4db3_be54_7e60d68013d9 = _curio_output\nexcept NameError:\n result_bed93b38_cdb2_4db3_be54_7e60d68013d9 = None\n", + "metadata": { + "id": "bed93b38-cdb2-4db3-be54-7e60d68013d9", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"b5ff6442-ea3f-468f-8c85-8fb634eb88f1\",\n \"type\": \"DATA_CLEANING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"bed93b38-cdb2-4db3-be54-7e60d68013d9\",\n \"target\": \"b5ff6442-ea3f-468f-8c85-8fb634eb88f1\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": [\n {\n \"source\": \"b5ff6442-ea3f-468f-8c85-8fb634eb88f1\",\n \"target\": \"e1c71f22-c26c-498b-964e-7413763c812a\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n input_0 = result_bed93b38_cdb2_4db3_be54_7e60d68013d9\n arg = input_0\n\n import pandas as pd\n\n df = arg.copy()\n\n df['VIOLATIONS'] = pd.to_numeric(df['VIOLATIONS'], errors='coerce')\n df['CAMERA_COUNT'] = pd.to_numeric(df['CAMERA_COUNT'], errors='coerce')\n df['Percent_Reduction'] = pd.to_numeric(df['Percent_Reduction'], errors='coerce')\n df['jittered_x'] = pd.to_numeric(df['jittered_x'], errors='coerce')\n\n df = df.dropna(subset=['VIOLATIONS', 'CAMERA_COUNT', 'Percent_Reduction', 'jittered_x'])\n\n\n return pd.DataFrame(df)\n\n\n_curio_output = _curio_node()\n\ntry:\n result_b5ff6442_ea3f_468f_8c85_8fb634eb88f1 = _curio_output\nexcept NameError:\n result_b5ff6442_ea3f_468f_8c85_8fb634eb88f1 = None\n", + "metadata": { + "id": "b5ff6442-ea3f-468f-8c85-8fb634eb88f1", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"e1c71f22-c26c-498b-964e-7413763c812a\",\n \"type\": \"VIS_VEGA\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"b5ff6442-ea3f-468f-8c85-8fb634eb88f1\",\n \"target\": \"e1c71f22-c26c-498b-964e-7413763c812a\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": []\n}\n\ndef _curio_node():\n\n\n input_data = result_b5ff6442_ea3f_468f_8c85_8fb634eb88f1\n\n spec = {\n \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.json\",\n \"params\": [\n {\n \"name\": \"cameraFilter\",\n \"bind\": {\n \"input\": \"select\",\n \"options\": [\"1\", \"2\", \"3\", \"4+\"],\n \"labels\": [\"1 Camera\", \"2 Cameras\", \"3 Cameras\", \"4+ Cameras\"]\n }\n }\n ],\n \"hconcat\": [\n {\n \"width\": 500,\n \"mark\": \"boxplot\",\n \"encoding\": {\n \"x\": {\n \"field\": \"CAMERA_BIN\",\n \"type\": \"nominal\",\n \"title\": \"Camera Count\"\n },\n \"y\": {\n \"field\": \"VIOLATIONS\",\n \"type\": \"quantitative\",\n \"title\": \"Violations\"\n },\n \"color\": {\n \"field\": \"CAMERA_BIN\",\n \"type\": \"nominal\"\n }\n }\n },\n {\n \"width\": 500,\n \"mark\": {\n \"type\": \"bar\",\n \"cursor\": \"pointer\"\n },\n \"transform\": [\n { \"filter\": \"cameraFilter == null || datum.CAMERA_BIN == cameraFilter\" }\n ],\n \"encoding\": {\n \"x\": {\n \"field\": \"Percent_Reduction\",\n \"type\": \"quantitative\",\n \"title\": \"Percent Reduction\"\n },\n \"y\": {\n \"field\": \"INTERSECTION\",\n \"type\": \"nominal\",\n \"sort\": \"-x\",\n \"title\": \"Intersection\"\n },\n \"color\": {\n \"field\": \"Percent_Reduction\",\n \"type\": \"quantitative\",\n \"scale\": { \"scheme\": \"blues\" }\n }\n }\n }\n ]\n }\n\n values = input_data\n if hasattr(input_data, \"to_dict\"):\n values = input_data.to_dict(orient=\"records\")\n\n if isinstance(spec, dict):\n spec[\"data\"] = {\"values\": values}\n\n from IPython.display import display\n display({\"application/vnd.vegalite.v5+json\": spec, \"text/plain\": spec}, raw=True)\n\n return input_data\n\n\n_curio_output = _curio_node()\n\ntry:\n result_e1c71f22_c26c_498b_964e_7413763c812a = _curio_output\nexcept NameError:\n result_e1c71f22_c26c_498b_964e_7413763c812a = None\n", + "metadata": { + "id": "e1c71f22-c26c-498b-964e-7413763c812a", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"be851609-c0f7-4cae-afae-2094965ead93\",\n \"type\": \"DATA_CLEANING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"6f4c2cd3-e83e-4e85-81de-3ec50986a2ed\",\n \"target\": \"be851609-c0f7-4cae-afae-2094965ead93\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": [\n {\n \"id\": \"edge_in_0_be851609_c0f7_4cae_afae_2094965ead93_412c23ca_fafd_4af8_ab87_abd9df17241a\",\n \"source\": \"be851609-c0f7-4cae-afae-2094965ead93\",\n \"target\": \"412c23ca-fafd-4af8-ab87-abd9df17241a\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in_0\"\n }\n ]\n}\n\ndef _curio_node():\n\n input_0 = data_6f4c2cd3_e83e_4e85_81de_3ec50986a2ed\n arg = input_0\n\n import pandas as pd\n\n df = pd.DataFrame(arg)\n\n df['VIOLATION DATE'] = pd.to_datetime(df['VIOLATION DATE'], errors='coerce')\n\n df['Year'] = df['VIOLATION DATE'].dt.year\n\n df['VIOLATION DATE'] = df['VIOLATION DATE'].astype(str)\n\n\n return pd.DataFrame(df)\n\n\n_curio_output = _curio_node()\n\ntry:\n result_be851609_c0f7_4cae_afae_2094965ead93 = _curio_output\nexcept NameError:\n result_be851609_c0f7_4cae_afae_2094965ead93 = None\n", + "metadata": { + "id": "be851609-c0f7-4cae-afae-2094965ead93", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"18b287f6-0a05-4dbc-92d3-4eb10f177bc5\",\n \"type\": \"DATA_CLEANING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"6f4c2cd3-e83e-4e85-81de-3ec50986a2ed\",\n \"target\": \"18b287f6-0a05-4dbc-92d3-4eb10f177bc5\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": [\n {\n \"id\": \"edge_in_1_18b287f6_0a05_4dbc_92d3_4eb10f177bc5_412c23ca_fafd_4af8_ab87_abd9df17241a\",\n \"source\": \"18b287f6-0a05-4dbc-92d3-4eb10f177bc5\",\n \"target\": \"412c23ca-fafd-4af8-ab87-abd9df17241a\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in_1\"\n }\n ]\n}\n\ndef _curio_node():\n\n input_0 = data_6f4c2cd3_e83e_4e85_81de_3ec50986a2ed\n arg = input_0\n\n import pandas as pd\n\n df = arg\n\n df_map = df.groupby(['INTERSECTION', 'LATITUDE', 'LONGITUDE']).agg({\n 'VIOLATIONS': 'sum',\n 'CAMERA ID': 'nunique'\n }).reset_index().rename(columns={\n 'VIOLATIONS': 'TOTAL_VIOLATIONS',\n 'CAMERA ID': 'CAMERA_COUNT'\n })\n\n\n df_map = df_map.dropna(subset=['LATITUDE', 'LONGITUDE'])\n\n df_map['CAMERA_BIN'] = df_map['CAMERA_COUNT'].apply(lambda x: \"4+\" if x >= 4 else str(int(x)))\n\n return pd.DataFrame(df_map)\n\n\n_curio_output = _curio_node()\n\ntry:\n result_18b287f6_0a05_4dbc_92d3_4eb10f177bc5 = _curio_output\nexcept NameError:\n result_18b287f6_0a05_4dbc_92d3_4eb10f177bc5 = None\n", + "metadata": { + "id": "18b287f6-0a05-4dbc-92d3-4eb10f177bc5", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"412c23ca-fafd-4af8-ab87-abd9df17241a\",\n \"type\": \"MERGE_FLOW\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"id\": \"edge_in_0_be851609_c0f7_4cae_afae_2094965ead93_412c23ca_fafd_4af8_ab87_abd9df17241a\",\n \"source\": \"be851609-c0f7-4cae-afae-2094965ead93\",\n \"target\": \"412c23ca-fafd-4af8-ab87-abd9df17241a\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in_0\"\n },\n {\n \"id\": \"edge_in_1_18b287f6_0a05_4dbc_92d3_4eb10f177bc5_412c23ca_fafd_4af8_ab87_abd9df17241a\",\n \"source\": \"18b287f6-0a05-4dbc-92d3-4eb10f177bc5\",\n \"target\": \"412c23ca-fafd-4af8-ab87-abd9df17241a\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in_1\"\n }\n ],\n \"outputs\": [\n {\n \"source\": \"412c23ca-fafd-4af8-ab87-abd9df17241a\",\n \"target\": \"0ef770f3-e789-40b0-a6e5-5f5fea079389\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n\n inputs = [\n result_be851609_c0f7_4cae_afae_2094965ead93,\n result_18b287f6_0a05_4dbc_92d3_4eb10f177bc5\n ]\n\n merged_inputs = [i for i in inputs if i is not None]\n\n return merged_inputs\n\n\n_curio_output = _curio_node()\n\ntry:\n merged_412c23ca_fafd_4af8_ab87_abd9df17241a = _curio_output\nexcept NameError:\n merged_412c23ca_fafd_4af8_ab87_abd9df17241a = None\n", + "metadata": { + "id": "412c23ca-fafd-4af8-ab87-abd9df17241a", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"0ef770f3-e789-40b0-a6e5-5f5fea079389\",\n \"type\": \"COMPUTATION_ANALYSIS\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"412c23ca-fafd-4af8-ab87-abd9df17241a\",\n \"target\": \"0ef770f3-e789-40b0-a6e5-5f5fea079389\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": [\n {\n \"source\": \"0ef770f3-e789-40b0-a6e5-5f5fea079389\",\n \"target\": \"d7e27f2a-2175-4fe0-8bff-8dfcd95f36f2\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n input_0 = merged_412c23ca_fafd_4af8_ab87_abd9df17241a\n arg = input_0\n\n import pandas as pd\n\n df_base = pd.DataFrame(arg[0])\n df_additional = pd.DataFrame(arg[1])\n\n df_map = df_base.groupby(['INTERSECTION', 'LATITUDE', 'LONGITUDE']).agg({\n 'VIOLATIONS': 'sum',\n 'CAMERA ID': 'nunique'\n }).reset_index().rename(columns={\n 'VIOLATIONS': 'TOTAL_VIOLATIONS',\n 'CAMERA ID': 'CAMERA_COUNT'\n })\n\n df_map = df_map.dropna(subset=['LATITUDE', 'LONGITUDE'])\n\n df_map[\"CAMERA_BIN\"] = df_map[\"CAMERA_COUNT\"].apply(lambda x: \"4+\" if x >= 4 else str(int(x)))\n\n array_data = df_map.to_dict(orient='records')\n\n return pd.DataFrame(array_data)\n\n\n_curio_output = _curio_node()\n\ntry:\n result_0ef770f3_e789_40b0_a6e5_5f5fea079389 = _curio_output\nexcept NameError:\n result_0ef770f3_e789_40b0_a6e5_5f5fea079389 = None\n", + "metadata": { + "id": "0ef770f3-e789-40b0-a6e5-5f5fea079389", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"d7e27f2a-2175-4fe0-8bff-8dfcd95f36f2\",\n \"type\": \"DATA_CLEANING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"0ef770f3-e789-40b0-a6e5-5f5fea079389\",\n \"target\": \"d7e27f2a-2175-4fe0-8bff-8dfcd95f36f2\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": [\n {\n \"source\": \"d7e27f2a-2175-4fe0-8bff-8dfcd95f36f2\",\n \"target\": \"3ad91d4c-3f0f-4db6-98a0-83fcbc564598\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n input_0 = result_0ef770f3_e789_40b0_a6e5_5f5fea079389\n arg = input_0\n\n import pandas as pd\n\n df = pd.DataFrame(arg)\n\n df['TOTAL_VIOLATIONS'] = pd.to_numeric(df['TOTAL_VIOLATIONS'], errors='coerce')\n df['CAMERA_COUNT'] = pd.to_numeric(df['CAMERA_COUNT'], errors='coerce')\n\n df['CAMERA_BIN'] = df['CAMERA_BIN'].astype(str)\n\n df['LATITUDE'] = pd.to_numeric(df['LATITUDE'], errors='coerce')\n df['LONGITUDE'] = pd.to_numeric(df['LONGITUDE'], errors='coerce')\n\n return pd.DataFrame(df)\n\n\n_curio_output = _curio_node()\n\ntry:\n result_d7e27f2a_2175_4fe0_8bff_8dfcd95f36f2 = _curio_output\nexcept NameError:\n result_d7e27f2a_2175_4fe0_8bff_8dfcd95f36f2 = None\n", + "metadata": { + "id": "d7e27f2a-2175-4fe0-8bff-8dfcd95f36f2", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"3ad91d4c-3f0f-4db6-98a0-83fcbc564598\",\n \"type\": \"VIS_VEGA\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"d7e27f2a-2175-4fe0-8bff-8dfcd95f36f2\",\n \"target\": \"3ad91d4c-3f0f-4db6-98a0-83fcbc564598\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": []\n}\n\ndef _curio_node():\n\n\n input_data = result_d7e27f2a_2175_4fe0_8bff_8dfcd95f36f2\n\n spec = {\n \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.json\",\n \"hconcat\": [\n {\n \"width\": 600,\n \"height\": 500,\n \"title\": \"Spatial Map – Select Area to Filter\",\n \"params\": [\n {\n \"name\": \"spatialBrush\",\n \"select\": {\n \"type\": \"interval\",\n \"encodings\": [\"x\", \"y\"]\n }\n }\n ],\n \"mark\": \"circle\",\n \"encoding\": {\n \"x\": {\n \"field\": \"LONGITUDE\",\n \"type\": \"quantitative\",\n \"scale\": { \"domain\": [-87.95, -87.5] },\n \"title\": \"Longitude\"\n },\n \"y\": {\n \"field\": \"LATITUDE\",\n \"type\": \"quantitative\",\n \"scale\": { \"domain\": [41.6, 42.1] },\n \"title\": \"Latitude\"\n },\n \"size\": {\n \"field\": \"TOTAL_VIOLATIONS\",\n \"type\": \"quantitative\",\n \"scale\": { \"range\": [20, 800] },\n \"title\": \"Total Violations\"\n },\n \"color\": {\n \"field\": \"CAMERA_BIN\",\n \"type\": \"nominal\",\n \"title\": \"Camera Count\",\n \"scale\": { \"scheme\": \"plasma\" }\n },\n \"tooltip\": [\n { \"field\": \"INTERSECTION\", \"type\": \"nominal\" },\n { \"field\": \"TOTAL_VIOLATIONS\", \"type\": \"quantitative\" },\n { \"field\": \"CAMERA_COUNT\", \"type\": \"quantitative\" }\n ]\n }\n },\n {\n \"width\": 400,\n \"height\": 500,\n \"title\": \"Top Intersections by Total Violations (Filtered by Spatial Selection)\",\n \"mark\": \"bar\",\n \"transform\": [\n {\n \"filter\": { \"param\": \"spatialBrush\" }\n },\n {\n \"window\": [{ \"op\": \"rank\", \"as\": \"rank\" }],\n \"sort\": [{ \"field\": \"TOTAL_VIOLATIONS\", \"order\": \"descending\" }]\n },\n {\n \"filter\": \"datum.rank <= 15\"\n }\n ],\n \"encoding\": {\n \"x\": {\n \"field\": \"TOTAL_VIOLATIONS\",\n \"type\": \"quantitative\",\n \"title\": \"Total Violations\"\n },\n \"y\": {\n \"field\": \"INTERSECTION\",\n \"type\": \"nominal\",\n \"sort\": \"-x\",\n \"title\": \"Intersection\"\n },\n \"color\": {\n \"field\": \"TOTAL_VIOLATIONS\",\n \"type\": \"quantitative\",\n \"scale\": { \"scheme\": \"blues\" },\n \"legend\": None\n },\n \"tooltip\": [\n { \"field\": \"INTERSECTION\", \"type\": \"nominal\" },\n { \"field\": \"TOTAL_VIOLATIONS\", \"type\": \"quantitative\" }\n ]\n }\n }\n ]\n }\n\n values = input_data\n if hasattr(input_data, \"to_dict\"):\n values = input_data.to_dict(orient=\"records\")\n\n if isinstance(spec, dict):\n spec[\"data\"] = {\"values\": values}\n\n from IPython.display import display\n display({\"application/vnd.vegalite.v5+json\": spec, \"text/plain\": spec}, raw=True)\n\n return input_data\n\n\n_curio_output = _curio_node()\n\ntry:\n result_3ad91d4c_3f0f_4db6_98a0_83fcbc564598 = _curio_output\nexcept NameError:\n result_3ad91d4c_3f0f_4db6_98a0_83fcbc564598 = None\n", + "metadata": { + "id": "3ad91d4c-3f0f-4db6-98a0-83fcbc564598", + "language": "python" + } + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/docs/examples/notebooks/example9-notebook.ipynb b/docs/examples/notebooks/example9-notebook.ipynb new file mode 100644 index 00000000..9aad4c8b --- /dev/null +++ b/docs/examples/notebooks/example9-notebook.ipynb @@ -0,0 +1,40 @@ +{ + "cells": [ + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"345e363c-7676-45e3-9fc7-3c44f52b1b6a\",\n \"type\": \"DATA_LOADING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [],\n \"outputs\": [\n {\n \"source\": \"345e363c-7676-45e3-9fc7-3c44f52b1b6a\",\n \"target\": \"b9d6eeac-790f-46aa-ac25-4d88514a47d8\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n import pandas as pd\n\n df = pd.read_csv(\"data/energy_dataset.csv\")\n return df\n\n_curio_output = _curio_node()\n\ntry:\n data_345e363c_7676_45e3_9fc7_3c44f52b1b6a = _curio_output\nexcept NameError:\n data_345e363c_7676_45e3_9fc7_3c44f52b1b6a = None\n", + "metadata": { + "id": "345e363c-7676-45e3-9fc7-3c44f52b1b6a", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"b9d6eeac-790f-46aa-ac25-4d88514a47d8\",\n \"type\": \"DATA_CLEANING\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"345e363c-7676-45e3-9fc7-3c44f52b1b6a\",\n \"target\": \"b9d6eeac-790f-46aa-ac25-4d88514a47d8\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": [\n {\n \"source\": \"b9d6eeac-790f-46aa-ac25-4d88514a47d8\",\n \"target\": \"5db6d526-79a1-40cf-9605-f2f525e513c0\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ]\n}\n\ndef _curio_node():\n\n input_0 = data_345e363c_7676_45e3_9fc7_3c44f52b1b6a\n arg = input_0\n\n edf = arg[['Data Year', 'ID', 'Property Name', 'Address', 'ZIP Code', 'Chicago Energy Rating', 'Community Area', 'Primary Property Type', 'Gross Floor Area - Buildings (sq ft)', 'Year Built', '# of Buildings', 'ENERGY STAR Score', 'Site EUI (kBtu/sq ft)', 'Source EUI (kBtu/sq ft)', 'Weather Normalized Site EUI (kBtu/sq ft)', 'Weather Normalized Source EUI (kBtu/sq ft)', 'Total GHG Emissions (Metric Tons CO2e)', 'GHG Intensity (kg CO2e/sq ft)', 'Latitude', 'Longitude', 'Location']]\n\n # Rename the data columns for consistency and easy use\n edf.columns = ['Year', 'ID', 'Property Name', 'Address', 'ZIP Code', 'Chicago Energy Rating', 'Community Area', 'Primary Property Type', 'Gross Floor Area', 'Year Built', '# of Buildings', 'ENERGY STAR Score', 'Site EUI', 'Source EUI', 'Weather Normalized Site EUI', 'Weather Normalized Source EUI', 'Total GHG Emissions', 'GHG Intensity', 'Latitude', 'Longitude', 'Location']\n\n # Filter out rows with missing data\n edf = edf.dropna()\n edf['ZIP Code'] = edf['ZIP Code'].astype(int)\n\n return edf\n\n\n_curio_output = _curio_node()\n\ntry:\n result_b9d6eeac_790f_46aa_ac25_4d88514a47d8 = _curio_output\nexcept NameError:\n result_b9d6eeac_790f_46aa_ac25_4d88514a47d8 = None\n", + "metadata": { + "id": "b9d6eeac-790f-46aa-ac25-4d88514a47d8", + "language": "python" + } + }, + { + "cell_type": "code", + "source": "__trill_node__ = {\n \"id\": \"5db6d526-79a1-40cf-9605-f2f525e513c0\",\n \"type\": \"VIS_VEGA\",\n \"in\": \"DEFAULT\",\n \"out\": \"DEFAULT\"\n}\n\n__trill_connections__ = {\n \"inputs\": [\n {\n \"source\": \"b9d6eeac-790f-46aa-ac25-4d88514a47d8\",\n \"target\": \"5db6d526-79a1-40cf-9605-f2f525e513c0\",\n \"sourceHandle\": \"out\",\n \"targetHandle\": \"in\"\n }\n ],\n \"outputs\": []\n}\n\ndef _curio_node():\n\n\n input_data = result_b9d6eeac_790f_46aa_ac25_4d88514a47d8\n\n spec = {\n \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.json\",\n \"description\": \"ENERGY STAR Score by Primary Property Type (mean bars with median ticks)\",\n \"title\": \"ENERGY STAR Score by Primary Property Type\",\n \"data\": { \"name\": \"edf\" },\n \"width\": 600,\n \"height\": 400,\n \"layer\": [\n {\n \"transform\": [\n {\n \"aggregate\": [\n { \"op\": \"mean\", \"field\": \"ENERGY STAR Score\", \"as\": \"mean_score\" }\n ],\n \"groupby\": [\"Primary Property Type\"]\n }\n ],\n \"mark\": \"bar\",\n \"encoding\": {\n \"y\": {\n \"field\": \"Primary Property Type\",\n \"type\": \"nominal\",\n \"title\": \"Primary Property Type\"\n },\n \"x\": {\n \"field\": \"mean_score\",\n \"type\": \"quantitative\",\n \"scale\": { \"domain\": [0, 100] },\n \"title\": \"Mean ENERGY STAR Score\"\n },\n \"tooltip\": [\n {\n \"field\": \"Primary Property Type\",\n \"type\": \"nominal\",\n \"title\": \"Primary Property Type\"\n },\n {\n \"field\": \"mean_score\",\n \"type\": \"quantitative\",\n \"title\": \"Mean ENERGY STAR Score\",\n \"format\": \".2f\"\n }\n ]\n }\n },\n {\n \"transform\": [\n {\n \"aggregate\": [\n { \"op\": \"median\", \"field\": \"ENERGY STAR Score\", \"as\": \"median_score\" }\n ],\n \"groupby\": [\"Primary Property Type\"]\n }\n ],\n \"mark\": {\n \"type\": \"tick\",\n \"color\": \"red\",\n \"thickness\": 2\n },\n \"encoding\": {\n \"y\": {\n \"field\": \"Primary Property Type\",\n \"type\": \"nominal\"\n },\n \"x\": {\n \"field\": \"median_score\",\n \"type\": \"quantitative\",\n \"scale\": { \"domain\": [0, 100] },\n \"title\": \"Median ENERGY STAR Score\"\n },\n \"tooltip\": [\n {\n \"field\": \"Primary Property Type\",\n \"type\": \"nominal\",\n \"title\": \"Primary Property Type\"\n },\n {\n \"field\": \"median_score\",\n \"type\": \"quantitative\",\n \"title\": \"Median ENERGY STAR Score\",\n \"format\": \".2f\"\n }\n ]\n }\n }\n ]\n }\n\n values = input_data\n if hasattr(input_data, \"to_dict\"):\n values = input_data.to_dict(orient=\"records\")\n\n if isinstance(spec, dict):\n spec[\"data\"] = {\"values\": values}\n\n from IPython.display import display\n display({\"application/vnd.vegalite.v5+json\": spec, \"text/plain\": spec}, raw=True)\n\n return input_data\n\n\n_curio_output = _curio_node()\n\ntry:\n result_5db6d526_79a1_40cf_9605_f2f525e513c0 = _curio_output\nexcept NameError:\n result_5db6d526_79a1_40cf_9605_f2f525e513c0 = None\n", + "metadata": { + "id": "5db6d526-79a1-40cf-9605-f2f525e513c0", + "language": "python" + } + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/docs/examples/notebooks/example10-original-notebook-W.ipynb b/docs/examples/notebooks/v1-example10-original-notebook-W.ipynb similarity index 100% rename from docs/examples/notebooks/example10-original-notebook-W.ipynb rename to docs/examples/notebooks/v1-example10-original-notebook-W.ipynb diff --git a/docs/examples/notebooks/example11-original-notebook-W.ipynb b/docs/examples/notebooks/v1-example11-original-notebook-W.ipynb similarity index 100% rename from docs/examples/notebooks/example11-original-notebook-W.ipynb rename to docs/examples/notebooks/v1-example11-original-notebook-W.ipynb diff --git a/docs/examples/notebooks/example3-original-notebook-NW.ipynb b/docs/examples/notebooks/v1-example3-original-notebook-NW.ipynb similarity index 100% rename from docs/examples/notebooks/example3-original-notebook-NW.ipynb rename to docs/examples/notebooks/v1-example3-original-notebook-NW.ipynb diff --git a/docs/examples/notebooks/example5-original-notebook-W.ipynb b/docs/examples/notebooks/v1-example5-original-notebook-W.ipynb similarity index 100% rename from docs/examples/notebooks/example5-original-notebook-W.ipynb rename to docs/examples/notebooks/v1-example5-original-notebook-W.ipynb diff --git a/docs/examples/notebooks/example7-original-notebook-W.ipynb b/docs/examples/notebooks/v1-example7-original-notebook-W.ipynb similarity index 100% rename from docs/examples/notebooks/example7-original-notebook-W.ipynb rename to docs/examples/notebooks/v1-example7-original-notebook-W.ipynb diff --git a/docs/examples/notebooks/example8-original-notebook-W.ipynb b/docs/examples/notebooks/v1-example8-original-notebook-W.ipynb similarity index 100% rename from docs/examples/notebooks/example8-original-notebook-W.ipynb rename to docs/examples/notebooks/v1-example8-original-notebook-W.ipynb diff --git a/docs/examples/notebooks/example9-original-notebook-W.ipynb b/docs/examples/notebooks/v1-example9-original-notebook-W.ipynb similarity index 100% rename from docs/examples/notebooks/example9-original-notebook-W.ipynb rename to docs/examples/notebooks/v1-example9-original-notebook-W.ipynb