diff --git a/Cargo.toml b/Cargo.toml index 51df89b..d4b1d1c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,7 +2,7 @@ name = "burn_dino" description = "burn dinov2 model inference and training" version = "0.3.1" -edition = "2021" +edition = "2024" authors = ["mosure "] license = "MIT OR Apache-2.0" keywords = [ @@ -44,35 +44,41 @@ import = ["bevy_args", "burn-import", "clap", "serde"] [dependencies] # bevy_args = { version = "1.6", optional = true } -bevy_args = { git = "https://github.com/mosure/bevy_args.git", branch = "burn", optional = true } -burn-import = { version = "0.15", features = ["pytorch"], optional = true } +burn-import = { version = "0.16", features = ["pytorch"], optional = true } clap = { version = "4.5", features = ["derive"], optional = true } -ndarray = "0.16" +# ndarray = "0.16" serde = { version = "1.0", optional = true } +[dependencies.bevy_args] +git = "https://github.com/mosure/bevy_args.git" +branch = "burn" +optional = true + + [dependencies.burn] -version = "0.15" -default-features = false +version = "0.16" +default-features = true features = [ # "autotune", - # "fusion", + "fusion", + "ndarray", "std", "wgpu", ] [dependencies.burn-wgpu] -version = "0.15" -default-features = false +version = "0.16" +default-features = true features = [ - "fusion", + # "fusion", "std", # "template", ] [dependencies.cubecl] -version = "0.3" -default-features = false +version = "0.4" +default-features = true features = [ "linalg", "std", @@ -80,8 +86,8 @@ features = [ ] [dependencies.cubecl-runtime] -version = "0.3" -default-features = false +version = "0.4" +default-features = true features = [ "std", "channel-mpsc", @@ -112,7 +118,7 @@ criterion = { version = "0.5", features = ["html_reports"] } futures-intrusive = { version = "0.5.0" } image = { version = "0.25", default-features = false, features = ["png"] } pollster = { version = "0.4.0" } -safetensors = "0.4" +safetensors = "0.5" [profile.dev.package."*"] diff --git a/assets/models/dinov2.mpk b/assets/models/dinov2.mpk index 86098c5..a926418 100644 Binary files a/assets/models/dinov2.mpk and b/assets/models/dinov2.mpk differ diff --git a/crates/bevy_burn_dino/Cargo.toml b/crates/bevy_burn_dino/Cargo.toml index 5dc3d56..eb4a4dc 100644 --- a/crates/bevy_burn_dino/Cargo.toml +++ b/crates/bevy_burn_dino/Cargo.toml @@ -41,9 +41,9 @@ serde = "1.0" # TODO: ideally, bevy and burn synchronize wgpu versions upstream [dependencies.bevy] -# version = "0.14" +# version = "0.16" git = "https://github.com/mosure/bevy.git" -branch = "burn" +rev = "669d139c13f6b44652f38131a5c9d20ca54e024d" default-features = false features = [ "bevy_asset", @@ -60,11 +60,12 @@ features = [ ] [dependencies.burn] -version = "0.15" -default-features = false +version = "0.16" +default-features = true features = [ # "autotune", - # "fusion", + "fusion", + "ndarray", "std", # "template", "wgpu", diff --git a/crates/bevy_burn_dino/src/main.rs b/crates/bevy_burn_dino/src/main.rs index d6f6639..94ca2c9 100644 --- a/crates/bevy_burn_dino/src/main.rs +++ b/crates/bevy_burn_dino/src/main.rs @@ -8,7 +8,7 @@ use bevy::{ DiagnosticPath, Diagnostics, DiagnosticsStore, - FrameTimeDiagnosticsPlugin, + // FrameTimeDiagnosticsPlugin, RegisterDiagnostic, }, ecs::{system::SystemState, world::CommandQueue}, @@ -25,12 +25,14 @@ use bevy::{ WgpuSettings, }, RenderPlugin, - }, tasks::{ + }, + tasks::{ block_on, futures_lite::future, AsyncComputeTaskPool, Task, }, + ui::widget::NodeImageMode, }; use bevy_args::{ parse_args, @@ -41,7 +43,7 @@ use bevy_args::{ }; use burn::{ prelude::*, - backend::wgpu::{init_async, AutoGraphicsApi, Wgpu}, + backend::wgpu::{init_setup_async, AutoGraphicsApi, Wgpu}, }; use burn_dino::model::{ @@ -401,7 +403,7 @@ fn setup_ui( ..default() }) .with_children(|builder| { - builder.spawn(UiImage { + builder.spawn(ImageNode { image: pca_image.image.clone(), image_mode: NodeImageMode::Stretch, ..default() @@ -480,7 +482,7 @@ pub fn viewer_app(args: BevyBurnDinoConfig) -> App { } if args.show_fps { - app.add_plugins(FrameTimeDiagnosticsPlugin); + // app.add_plugins(FrameTimeDiagnosticsPlugin::default()); app.register_diagnostic(Diagnostic::new(INFERENCE_FPS)); app.add_systems(Startup, fps_display_setup); app.add_systems(Update, fps_update_system); @@ -555,7 +557,7 @@ async fn run_app() { log(&format!("{:?}", args)); let device = Default::default(); - init_async::(&device, Default::default()).await; + init_setup_async::(&device, Default::default()).await; log("device created"); diff --git a/example/tsne.rs b/example/tsne.rs index e95e6ab..e09db32 100644 --- a/example/tsne.rs +++ b/example/tsne.rs @@ -10,7 +10,6 @@ use image::{ RgbImage, }; use bhtsne::tSNE; -use ndarray::Array2; use burn_dino::model::dino::{ DinoVisionTransformer, @@ -34,7 +33,7 @@ pub fn load_model( .load(STATE_ENCODED.to_vec(), &Default::default()) .expect("failed to decode state"); - let model= config.init(device); + let model = config.init(device); model.load_record(record) } @@ -53,6 +52,7 @@ fn normalize( .permute([0, 3, 1, 2]) } + pub fn load_image( bytes: &[u8], config: &DinoVisionTransformerConfig, @@ -60,7 +60,11 @@ pub fn load_image( ) -> Tensor { let img = load_from_memory_with_format(bytes, ImageFormat::Png) .unwrap() - .resize_exact(config.image_size as u32, config.image_size as u32, image::imageops::FilterType::Lanczos3); + .resize_exact( + config.image_size as u32, + config.image_size as u32, + image::imageops::FilterType::Lanczos3, + ); let img = match img { DynamicImage::ImageRgb8(img) => img, @@ -72,12 +76,13 @@ pub fn load_image( .flat_map(|p| p.0.iter().map(|&c| c as f32 / 255.0)) .collect(); - let input: Tensor = Tensor::from_floats( - img_data.as_slice(), - device, - ); - - let input = input.reshape([1, config.input_channels, config.image_size, config.image_size]); + let input: Tensor = Tensor::from_floats(img_data.as_slice(), device); + let input = input.reshape([ + 1, + config.input_channels, + config.image_size, + config.image_size, + ]); normalize(input, device) } @@ -90,7 +95,12 @@ fn main() { }; let dino = load_model(&config, &device); - let input_pngs = vec![INPUT_IMAGE_0, INPUT_IMAGE_1, INPUT_IMAGE_2, INPUT_IMAGE_3]; + let input_pngs = vec![ + INPUT_IMAGE_0, + INPUT_IMAGE_1, + INPUT_IMAGE_2, + INPUT_IMAGE_3, + ]; let mut input_tensors = Vec::new(); for input in input_pngs { @@ -105,57 +115,63 @@ fn main() { let elements = output.shape().dims[1]; let features = output.shape().dims[2]; let n_samples = batch * elements; - let spatial_size = elements.isqrt(); let x = output.reshape([n_samples, features]); let binding = x.to_data() .to_vec::() .unwrap(); - let data: Vec<&[f32]> = binding - .chunks(config.embedding_dimension) - .collect(); + let data: Vec<&[f32]> = binding.chunks(config.embedding_dimension).collect(); - let tsne_features = tSNE::new(&data) + let mut tsne_features = tSNE::new(&data) .embedding_dim(3) .perplexity(10.0) .epochs(1000) .barnes_hut(0.5, |sample_a, sample_b| { - sample_a.iter() + sample_a + .iter() .zip(sample_b.iter()) .map(|(a, b)| (a - b).powi(2)) .sum::() .sqrt() }) .embedding(); - let mut tsne_features = Array2::from_shape_vec((n_samples, 3), tsne_features).unwrap(); - for mut col in tsne_features.columns_mut() { - let min = col.fold(f32::INFINITY, |a, &b| a.min(b)); - let max = col.fold(f32::NEG_INFINITY, |a, &b| a.max(b)); - let range = max - min; - col.mapv_inplace(|x| (x - min) / range); + let num_dims = 3; + for d in 0..num_dims { + let mut min_val = f32::INFINITY; + let mut max_val = f32::NEG_INFINITY; + for i in 0..n_samples { + let idx = i * num_dims + d; + let value = tsne_features[idx]; + if value < min_val { + min_val = value; + } + if value > max_val { + max_val = value; + } + } + let range = if max_val - min_val == 0.0 { 1.0 } else { max_val - min_val }; + for i in 0..n_samples { + let idx = i * num_dims + d; + tsne_features[idx] = (tsne_features[idx] - min_val) / range; + } } - let tsne_features = tsne_features.to_shape([batch, spatial_size, spatial_size, 3]).unwrap(); - - for (i, img) in tsne_features.outer_iter().enumerate() { - let collected: Vec = img.iter() - .map(|&x| (x * 255.0) - .max(0.0) - .min(255.0) as u8 - ).collect(); - let img = RgbImage::from_raw( - spatial_size as u32, - spatial_size as u32, - collected, - ) - .unwrap(); + for b in 0..batch { + let start = b * spatial_size * spatial_size * num_dims; + let end = start + spatial_size * spatial_size * num_dims; + let mut collected: Vec = Vec::with_capacity((spatial_size * spatial_size * 3) as usize); + for &value in &tsne_features[start..end] { + let pixel = (value * 255.0).max(0.0).min(255.0) as u8; + collected.push(pixel); + } let output_directory = std::path::Path::new("output/tsne"); std::fs::create_dir_all(output_directory).unwrap(); - - let output_path = output_directory.join(format!("{}.png", i)); + let output_path = output_directory.join(format!("{}.png", b)); + let img = RgbImage::from_raw(spatial_size as u32, spatial_size as u32, collected) + .unwrap(); img.save(output_path).unwrap(); } }