From 5536c1ea7763b066878d5e6a5a0d5945bb78c617 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Sun, 28 Dec 2025 21:34:00 +0800 Subject: [PATCH 1/4] feat: Add FromStr for Ref --- rust/lance/src/dataset/refs.rs | 96 +++++++++++++++++++++++++++++++++- 1 file changed, 95 insertions(+), 1 deletion(-) diff --git a/rust/lance/src/dataset/refs.rs b/rust/lance/src/dataset/refs.rs index 4044da9f60e..d02e5f216c8 100644 --- a/rust/lance/src/dataset/refs.rs +++ b/rust/lance/src/dataset/refs.rs @@ -25,7 +25,7 @@ use std::io::ErrorKind; pub const MAIN_BRANCH: &str = "main"; /// Lance Ref -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq, Eq)] pub enum Ref { // Version number points of the current branch VersionNumber(u64), @@ -37,6 +37,65 @@ pub enum Ref { Tag(String), } +impl std::str::FromStr for Ref { + type Err = Error; + + fn from_str(reference: &str) -> std::result::Result { + let reference = reference.trim(); + if reference.is_empty() { + return Err(Error::InvalidRef { + message: "Ref cannot be empty".to_string(), + }); + } + + if let Some((branch, version)) = reference.split_once(':') { + if branch.is_empty() { + return Err(Error::InvalidRef { + message: "Branch name cannot be empty".to_string(), + }); + } + + let branch = if branch == MAIN_BRANCH { + None + } else { + check_valid_branch(branch)?; + Some(branch.to_string()) + }; + + let version = match version { + "" | "latest" => None, + v => { + let v = v.parse::().map_err(|e| Error::InvalidRef { + message: format!("Failed to parse version number '{}': {}", v, e), + })?; + Some(v) + } + }; + + return Ok(Self::Version(branch, version)); + } + + if reference == MAIN_BRANCH { + return Ok(Self::Version(None, None)); + } + + if reference.chars().all(|c| c.is_ascii_digit()) { + let v = reference.parse::().map_err(|e| Error::InvalidRef { + message: format!("Failed to parse version number '{}': {}", reference, e), + })?; + return Ok(Self::VersionNumber(v)); + } + + if reference.contains('/') { + check_valid_branch(reference)?; + return Ok(Self::Version(Some(reference.to_string()), None)); + } + + check_valid_tag(reference)?; + Ok(Self::Tag(reference.to_string())) + } +} + impl From for Ref { fn from(reference: u64) -> Self { VersionNumber(reference) @@ -793,6 +852,41 @@ mod tests { use rstest::rstest; + #[rstest] + fn test_parse_ref_ok( + #[values( + ("0", Ref::VersionNumber(0)), + ("42", Ref::VersionNumber(42)), + ("main", Ref::Version(None, None)), + ("main:", Ref::Version(None, None)), + ("main:latest", Ref::Version(None, None)), + ("main:10", Ref::Version(None, Some(10))), + ("feature/a", Ref::Version(Some("feature/a".to_string()), None)), + ("feature/a:", Ref::Version(Some("feature/a".to_string()), None)), + ("feature/a:latest", Ref::Version(Some("feature/a".to_string()), None)), + ("feature/a:10", Ref::Version(Some("feature/a".to_string()), Some(10))), + ("tag1", Ref::Tag("tag1".to_string())) + )] + (input, expected): (&str, Ref), + ) { + assert_eq!(input.parse::().unwrap(), expected); + } + + #[rstest] + fn test_parse_ref_err( + #[values( + "", + ":", + ":10", + "main:bad", + "/start-with-slash", + "feature//double-slash" + )] + input: &str, + ) { + assert!(input.parse::().is_err()); + } + #[rstest] fn test_ok_ref( #[values( From 71c82d6b0a8ea30ffa24949a3b3d3f81e04228ff Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Sun, 28 Dec 2025 22:26:53 +0800 Subject: [PATCH 2/4] Forbid same branch and tag name --- rust/lance/src/dataset/refs.rs | 21 ++++++++ .../src/dataset/tests/dataset_versioning.rs | 53 +++++++++++++++++++ 2 files changed, 74 insertions(+) diff --git a/rust/lance/src/dataset/refs.rs b/rust/lance/src/dataset/refs.rs index d02e5f216c8..b0b6a32aac1 100644 --- a/rust/lance/src/dataset/refs.rs +++ b/rust/lance/src/dataset/refs.rs @@ -280,6 +280,13 @@ impl Tags<'_> { message: format!("tag {} already exists", tag), }); } + + let branch_file = branch_contents_path(&root_location.path, tag); + if self.object_store().exists(&branch_file).await? { + return Err(Error::RefConflict { + message: format!("tag {} conflicts with existing branch", tag), + }); + } let tag_contents = self.build_tag_content_by_ref(reference).await?; self.object_store() @@ -316,6 +323,13 @@ impl Tags<'_> { message: format!("tag {} does not exist", tag), }); } + + let branch_file = branch_contents_path(&root_location.path, tag); + if self.object_store().exists(&branch_file).await? { + return Err(Error::RefConflict { + message: format!("tag {} conflicts with existing branch", tag), + }); + } let tag_contents = self.build_tag_content_by_ref(reference).await?; self.object_store() @@ -452,6 +466,13 @@ impl Branches<'_> { let source_branch = source_branch.and_then(standardize_branch); let root_location = self.refs.root()?; + + let tag_file = tag_path(&root_location.path, branch_name); + if self.object_store().exists(&tag_file).await? { + return Err(Error::RefConflict { + message: format!("branch {} conflicts with existing tag", branch_name), + }); + } let branch_file = branch_contents_path(&root_location.path, branch_name); if self.object_store().exists(&branch_file).await? { return Err(Error::RefConflict { diff --git a/rust/lance/src/dataset/tests/dataset_versioning.rs b/rust/lance/src/dataset/tests/dataset_versioning.rs index 2e2fcdf6601..0f793e32f10 100644 --- a/rust/lance/src/dataset/tests/dataset_versioning.rs +++ b/rust/lance/src/dataset/tests/dataset_versioning.rs @@ -366,6 +366,59 @@ async fn test_tag( assert_eq!(dataset.manifest.version, 1); } +#[tokio::test] +async fn test_tag_branch_name_conflict() { + let test_uri = TempStrDir::default(); + + let schema = Arc::new(ArrowSchema::new(vec![ArrowField::new( + "i", + DataType::UInt32, + false, + )])); + + let data = RecordBatch::try_new( + schema.clone(), + vec![Arc::new(UInt32Array::from_iter_values(0..10))], + ) + .unwrap(); + let reader = RecordBatchIterator::new(vec![data].into_iter().map(Ok), schema); + let mut dataset = Dataset::write(reader, &test_uri, None).await.unwrap(); + + dataset + .create_branch("conflict", dataset.version().version, None) + .await + .unwrap(); + + let err = dataset + .tags() + .create("conflict", dataset.version().version) + .await + .err() + .unwrap() + .to_string(); + assert_eq!( + err, + "Ref conflict error: tag conflict conflicts with existing branch" + ); + + dataset + .tags() + .create("tag_conflict", dataset.version().version) + .await + .unwrap(); + + let err = dataset + .create_branch("tag_conflict", dataset.version().version, None) + .await + .err() + .unwrap() + .to_string(); + assert_eq!( + err, + "Ref conflict error: branch tag_conflict conflicts with existing tag" + ); +} + #[rstest] #[tokio::test] async fn test_fragment_id_zero_not_reused() { From a8f5284c54abc59c5cd4c2ef4084e2d209c1a666 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Mon, 29 Dec 2025 00:52:00 +0800 Subject: [PATCH 3/4] refactor --- rust/lance/src/dataset/builder.rs | 2 +- rust/lance/src/dataset/refs.rs | 74 ++++++++++++------------------- 2 files changed, 29 insertions(+), 47 deletions(-) diff --git a/rust/lance/src/dataset/builder.rs b/rust/lance/src/dataset/builder.rs index 3d463ce6ca4..61959d3599a 100644 --- a/rust/lance/src/dataset/builder.rs +++ b/rust/lance/src/dataset/builder.rs @@ -234,7 +234,7 @@ impl DatasetBuilder { /// Sets `version` for the builder using a tag pub fn with_tag(mut self, tag: &str) -> Self { - self.version = Some(Ref::from(tag)); + self.version = Some(Ref::Tag(tag.to_string())); self } diff --git a/rust/lance/src/dataset/refs.rs b/rust/lance/src/dataset/refs.rs index b0b6a32aac1..6b043f5d1fd 100644 --- a/rust/lance/src/dataset/refs.rs +++ b/rust/lance/src/dataset/refs.rs @@ -37,74 +37,56 @@ pub enum Ref { Tag(String), } -impl std::str::FromStr for Ref { - type Err = Error; +impl From for Ref { + fn from(reference: u64) -> Self { + VersionNumber(reference) + } +} - fn from_str(reference: &str) -> std::result::Result { +impl From<&str> for Ref { + fn from(reference: &str) -> Self { let reference = reference.trim(); - if reference.is_empty() { - return Err(Error::InvalidRef { - message: "Ref cannot be empty".to_string(), - }); - } - if let Some((branch, version)) = reference.split_once(':') { if branch.is_empty() { - return Err(Error::InvalidRef { - message: "Branch name cannot be empty".to_string(), - }); + return Tag(reference.to_string()); } let branch = if branch == MAIN_BRANCH { None - } else { - check_valid_branch(branch)?; + } else if check_valid_branch(branch).is_ok() { Some(branch.to_string()) + } else { + return Tag(reference.to_string()); }; let version = match version { "" | "latest" => None, - v => { - let v = v.parse::().map_err(|e| Error::InvalidRef { - message: format!("Failed to parse version number '{}': {}", v, e), - })?; - Some(v) - } + v => match v.parse::() { + Ok(v) => Some(v), + Err(_) => return Tag(reference.to_string()), + }, }; - return Ok(Self::Version(branch, version)); + return Version(branch, version); } if reference == MAIN_BRANCH { - return Ok(Self::Version(None, None)); + return Version(None, None); } if reference.chars().all(|c| c.is_ascii_digit()) { - let v = reference.parse::().map_err(|e| Error::InvalidRef { - message: format!("Failed to parse version number '{}': {}", reference, e), - })?; - return Ok(Self::VersionNumber(v)); - } - - if reference.contains('/') { - check_valid_branch(reference)?; - return Ok(Self::Version(Some(reference.to_string()), None)); + if let Ok(v) = reference.parse::() { + return VersionNumber(v); + } } - check_valid_tag(reference)?; - Ok(Self::Tag(reference.to_string())) - } -} - -impl From for Ref { - fn from(reference: u64) -> Self { - VersionNumber(reference) + Tag(reference.to_string()) } } -impl From<&str> for Ref { - fn from(reference: &str) -> Self { - Tag(reference.to_string()) +impl From for Ref { + fn from(reference: String) -> Self { + Ref::from(reference.as_str()) } } @@ -882,7 +864,6 @@ mod tests { ("main:", Ref::Version(None, None)), ("main:latest", Ref::Version(None, None)), ("main:10", Ref::Version(None, Some(10))), - ("feature/a", Ref::Version(Some("feature/a".to_string()), None)), ("feature/a:", Ref::Version(Some("feature/a".to_string()), None)), ("feature/a:latest", Ref::Version(Some("feature/a".to_string()), None)), ("feature/a:10", Ref::Version(Some("feature/a".to_string()), Some(10))), @@ -890,22 +871,23 @@ mod tests { )] (input, expected): (&str, Ref), ) { - assert_eq!(input.parse::().unwrap(), expected); + assert_eq!(Ref::from(input), expected); } #[rstest] - fn test_parse_ref_err( + fn test_ref_from_str_invalid_syntax_falls_back_to_tag( #[values( "", ":", ":10", "main:bad", + "feature/a", "/start-with-slash", "feature//double-slash" )] input: &str, ) { - assert!(input.parse::().is_err()); + assert_eq!(Ref::from(input), Ref::Tag(input.trim().to_string())); } #[rstest] From 4e68124d5d70cf98965af51eb8fa209a3bece0dd Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Mon, 29 Dec 2025 01:59:48 +0800 Subject: [PATCH 4/4] Fix clippy use-self lint in Ref conversion --- rust/lance/src/dataset/refs.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/lance/src/dataset/refs.rs b/rust/lance/src/dataset/refs.rs index 6b043f5d1fd..863b4ff4fc1 100644 --- a/rust/lance/src/dataset/refs.rs +++ b/rust/lance/src/dataset/refs.rs @@ -86,7 +86,7 @@ impl From<&str> for Ref { impl From for Ref { fn from(reference: String) -> Self { - Ref::from(reference.as_str()) + Self::from(reference.as_str()) } }