databricks-demos · QuentinAmbard · Nov 5, 2025 · Nov 13, 2025
diff --git a/dbdemos/__init__.py b/dbdemos/__init__.py
@@ -1,4 +1,4 @@
-__version__ = "0.6.28"
+__version__ = "0.6.34"
 
 from .dbdemos import list_demos, install, create_cluster, help, install_all, check_status_all, check_status, get_html_list_demos
 
diff --git a/dbdemos/conf.py b/dbdemos/conf.py
@@ -242,6 +242,7 @@ def __init__(self, path: str, json_conf: dict, catalog:str = None, schema: str =
         self.dashboards = json_conf.get('dashboards', [])
         self.sql_queries = json_conf.get('sql_queries', [])
         self.bundle = json_conf.get('bundle', False)
+        self.env_version = json_conf.get('env_version', 2)
 
         self.data_folders: List[DataFolder] = []
         for data_folder in json_conf.get('data_folders', []):

diff --git a/dbdemos/installer_genie.py b/dbdemos/installer_genie.py
@@ -217,40 +217,53 @@ def load_data_to_volume(self, ws: WorkspaceClient, data_folder: DataFolder, demo
 
         import requests
         import collections
+        dbutils = self.installer.get_dbutils()
         try:
-            # Get list of files from GitHub API, to avoid adding a S3 boto dependency just for this
-            github_path = f"https://api.github.com/repos/databricks-demos/dbdemos-dataset/contents/{data_folder.source_folder}"
-            if debug:
-                print(f"Getting files from {github_path}")
-            files = requests.get(github_path).json()
-            if 'message' in files:
-                print(f"Error getting files from {github_path}: {files}")
-            files = [f['download_url'] for f in files]
-
-            if debug:
-                print(f"Found {len(files)} files in GitHub repo for {data_folder.source_folder}")
-
-            def copy_file(file_url):
-                if not file_url.endswith('/'):
-                    file_name = file_url.split('/')[-1]
-                    folder = data_folder.target_volume_folder_name if data_folder.target_volume_folder_name else data_folder.source_folder
-                    target_path = f"/Volumes/{demo_conf.catalog}/{demo_conf.schema}/{InstallerGenie.VOLUME_NAME}/{folder}/{file_name}"
-
-                    s3_url = file_url.replace("https://raw.githubusercontent.com/databricks-demos/dbdemos-dataset/main/", 
-                                            "https://dbdemos-dataset.s3.amazonaws.com/")
-
-                    if debug:
-                        print(f"Copying {s3_url} to {target_path}")
-                    response = requests.get(s3_url)
-                    response.raise_for_status()
-                    if debug:
-                        print(f"File {file_name} in memory. sending to volume...")
-                    ws.files.upload(target_path, response.content, overwrite=True)
+            folder = data_folder.target_volume_folder_name if data_folder.target_volume_folder_name else data_folder.source_folder
+            #first try with a dbutils copy if available
+            copied_successfully = False
+            if dbutils is not None:
+                try:
+                    dbutils.fs.cp(f"s3://dbdemos-dataset/{data_folder.source_folder}", f"/Volumes/{demo_conf.catalog}/{demo_conf.schema}/{InstallerGenie.VOLUME_NAME}/{folder}", recurse=True)
+                    copied_successfully = True
+                except Exception as e:
                     if debug:
-                        print(f"File {file_name} in volume!")
-
-            with ThreadPoolExecutor(max_workers=5) as executor:
-                collections.deque(executor.map(copy_file, files))
+                        print(f"Error copying {data_folder.source_folder} to {f'/Volumes/{demo_conf.catalog}/{demo_conf.schema}/{InstallerGenie.VOLUME_NAME}/{folder}'} using dbutils fs.cp: {e}")
+                if debug:
+                    print(f"Copied {data_folder.source_folder} to {f'/Volumes/{demo_conf.catalog}/{demo_conf.schema}/{InstallerGenie.VOLUME_NAME}/{folder}'} using dbutils fs.cp")
+            if not copied_successfully:
+                # Get list of files from GitHub API, to avoid adding a S3 boto dependency just for this
+                github_path = f"https://api.github.com/repos/databricks-demos/dbdemos-dataset/contents/{data_folder.source_folder}"
+                if debug:
+                    print(f"Getting files from {github_path}")
+                files = requests.get(github_path).json()
+                if 'message' in files:
+                    print(f"Error getting files from {github_path}: {files}")
+                files = [f['download_url'] for f in files]
+
+                if debug:
+                    print(f"Found {len(files)} files in GitHub repo for {data_folder.source_folder}")
+
+                def copy_file(file_url):
+                    if not file_url.endswith('/'):
+                        file_name = file_url.split('/')[-1]
+                        target_path = f"/Volumes/{demo_conf.catalog}/{demo_conf.schema}/{InstallerGenie.VOLUME_NAME}/{folder}/{file_name}"
+
+                        s3_url = file_url.replace("https://raw.githubusercontent.com/databricks-demos/dbdemos-dataset/main/", 
+                                                "https://dbdemos-dataset.s3.amazonaws.com/")
+
+                        if debug:
+                            print(f"Copying {s3_url} to {target_path}")
+                        response = requests.get(s3_url)
+                        response.raise_for_status()
+                        if debug:
+                            print(f"File {file_name} in memory. sending to volume...")
+                        ws.files.upload(target_path, response.content, overwrite=True)
+                        if debug:
+                            print(f"File {file_name} in volume!")
+
+                with ThreadPoolExecutor(max_workers=5) as executor:
+                    collections.deque(executor.map(copy_file, files))
 
         except Exception as e:
             raise DataLoaderException(f"Error loading data from S3: {str(e)}")

diff --git a/dbdemos/notebook_parser.py b/dbdemos/notebook_parser.py
@@ -324,15 +324,15 @@ def add_javascript_to_minisite_relative_links(self, notebook_path):
 
     #Set the environment metadata to the notebook.
     # TODO: might want to re-evaluate this once we move to ipynb format as it'll be set in the ipynb file, as metadata.
-    def set_environement_metadata(self, client_version: str = "2"):
+    def set_environement_metadata(self, client_version: str = "3"):
         content = json.loads(self.content)
         env_metadata = content.get("environmentMetadata", {})
         if env_metadata is None:
             env_metadata = {}
         if ("client" not in env_metadata or 
             env_metadata["client"] is None or 
             int(env_metadata["client"]) < int(client_version)):
-            env_metadata["client"] = client_version
+            env_metadata["client"] = str(client_version)
         content["environmentMetadata"] = env_metadata
         self.content = json.dumps(content)
 

diff --git a/dbdemos/packager.py b/dbdemos/packager.py
@@ -58,11 +58,11 @@ def process_file_content(self, file, destination_path, extension = ""):
         with open(destination_path + extension, "wb") as f:
             f.write(file_content)
 
-    def process_notebook_content(self, html, full_path):
+    def process_notebook_content(self, demo_conf: DemoConf, html, full_path):
         #Replace notebook content.
         parser = NotebookParser(html)
         parser.remove_uncomment_tag()
-        parser.set_environement_metadata()
+        parser.set_environement_metadata(demo_conf.env_version)
         parser.remove_dbdemos_build()
         #parser.remove_static_settings()
         parser.hide_commands_and_results()
@@ -106,7 +106,7 @@ def download_notebook_html(notebook: DemoNotebook):
                     if 'error_code' in file:
                         raise Exception(f"Couldn't find file {repo_path} in workspace. Check notebook path in bundle conf file. {file['error_code']} - {file['message']}")
                     html = base64.b64decode(file['content']).decode('utf-8')
-                    return self.process_notebook_content(html, full_path+".html")
+                    return self.process_notebook_content(demo_conf, html, full_path+".html")
                 elif status['object_type'] == 'DIRECTORY':
                     folder = self.db.get("2.0/workspace/export", {"path": repo_path, "format": "AUTO", "direct_download": True})
                     return self.process_file_content(folder, full_path, ".zip")
@@ -124,7 +124,7 @@ def download_notebook_html(notebook: DemoNotebook):
                 if "views" not in notebook_result:
                     raise Exception(f"couldn't get notebook for run {tasks[0]['run_id']} - {notebook.path}. {demo_conf.name}. You probably did a run repair. Please re run the job. - {notebook_result}")
                 html = notebook_result["views"][0]["content"]
-                return self.process_notebook_content(html, full_path+".html")
+                return self.process_notebook_content(demo_conf, html, full_path+".html")
 
 
         requires_global_setup_v2 = False

diff --git a/main.py b/main.py
@@ -35,12 +35,12 @@ def bundle():
 
 
     # Run the jobs (only if there is a new commit since the last time, or failure, or force execution)
-    bundler.start_and_wait_bundle_jobs(force_execution = False, skip_execution=True, recreate_jobs=False)
+    bundler.start_and_wait_bundle_jobs(force_execution = False, skip_execution=False, recreate_jobs=False)
 
     packager = Packager(conf, bundler)
     packager.package_all()
 
-#bundle()
+bundle()
 
 #Loads conf to install on cse2.
 with open("local_conf_E2FE.json", "r") as r:
@@ -97,13 +97,13 @@ def bundle():
 #dbdemos.install("lakehouse-fsi-credit", "/Users/quentin.ambard@databricks.com/test_install_quentin", True, c['username'], c['pat_token'], c['url'], catalog='main', schema='quentin_test2', cloud="AWS", start_cluster = False, skip_dashboards=False)
 #dbdemos.install("lakehouse-fsi-fraud", "/Users/quentin.ambard@databricks.com/test_install_quentin", True, c['username'], c['pat_token'], c['url'], catalog='main', schema='quentin_test2', cloud="AWS", start_cluster = False, skip_dashboards=False)
 
-dbdemos.install("lakehouse-iot-platform", "/Users/quentin.ambard@databricks.com/test_install_quentin", True, c['username'], c['pat_token'], c['url'], catalog='main', schema='quentin_test3', cloud="AWS", start_cluster = False)
-dbdemos.install("pipeline-bike", "/Users/quentin.ambard@databricks.com/test_install_quentin", True, c['username'], c['pat_token'], c['url'], catalog='main', schema='quentin_test3', cloud="AWS", start_cluster = False)
+#dbdemos.install("lakehouse-iot-platform", "/Users/quentin.ambard@databricks.com/test_install_quentin", True, c['username'], c['pat_token'], c['url'], catalog='main', schema='quentin_test3', cloud="AWS", start_cluster = False)
+#dbdemos.install("pipeline-bike", "/Users/quentin.ambard@databricks.com/test_install_quentin", True, c['username'], c['pat_token'], c['url'], catalog='main', schema='quentin_test3', cloud="AWS", start_cluster = False)
 
-#dbdemos.install("feature-store", "/Users/quentin.ambard@databricks.com/test_install_quentin", True, c['username'], c['pat_token'], c['url'], cloud="AWS", use_current_cluster=False, current_cluster_id=c["current_cluster_id"])
+dbdemos.install("feature-store", "/Users/quentin.ambard@databricks.com/test_install_quentin", True, c['username'], c['pat_token'], c['url'], cloud="AWS", use_current_cluster=False, current_cluster_id=c["current_cluster_id"])
 #dbdemos.install("delta-lake", "/Users/quentin.ambard@databricks.com/test_install_quentin", True, c['username'], c['pat_token'], c['url'], cloud="GCP")
 #dbdemos.install("delta-lake", "/Users/quentin.ambard@databricks.com/test_install_quentin", True, c['username'], c['pat_token'], c['url'], cloud="Azure", use_current_cluster=True, current_cluster_id=c["current_cluster_id"])
-#dbdemos.install("mlops-end2end", "/Users/quentin.ambard@databricks.com/test_install_quentin", True, c['username'], c['pat_token'], c['url'], cloud="AWS", skip_dashboards=True, schema='test_quentin_rag', catalog='dbdemos')
+dbdemos.install("mlops-end2end", "/Users/quentin.ambard@databricks.com/test_install_quentin", True, c['username'], c['pat_token'], c['url'], cloud="AWS", skip_dashboards=True, schema='test_quentin_rag', catalog='dbdemos')
 #dbdemos.install("pandas-on-spark", "/Users/quentin.ambard@databricks.com/test_install_quentin", True, c['username'], c['pat_token'], c['url'], cloud="AWS")
 #dbdemos.install("delta-sharing-airlines", "/Users/quentin.ambard@databricks.com/test_install_quentin", True, c['username'], c['pat_token'], c['url'])
 #dbdemos.install("dlt-loans", "/Users/quentin.ambard@databricks.com/test_install_quentin", True, c['username'], c['pat_token'], c['url'])

diff --git a/setup.py b/setup.py
@@ -5,7 +5,7 @@
     #this will be the package name you will see, e.g. the output of 'conda list' in anaconda prompt
     name = 'dbdemos',
     #some version number you may wish to add - increment this after every update
-    version='0.6.28',
+    version='0.6.34',
     author="Databricks",
     author_email=["quentin.ambard@databricks.com", "cal.reynolds@databricks.com"],
     description="Install databricks demos: notebooks, Delta Live Table Pipeline, DBSQL Dashboards, ML Models etc.",