From 48c1f74f935ecf2c404f29d3aafe4bd052d3b35d Mon Sep 17 00:00:00 2001 From: prescientmoon Date: Sun, 11 Aug 2024 19:49:46 +0200 Subject: [PATCH] No longer use tesseract, I guess? Signed-off-by: prescientmoon --- Cargo.lock | 1 + Cargo.toml | 1 + src/arcaea/chart.rs | 2 +- src/commands/score.rs | 221 +++++++++++++++++----------------- src/context.rs | 6 +- src/logs.rs | 19 ++- src/recognition/hyperglass.rs | 27 +++-- src/recognition/recognize.rs | 83 ++++--------- 8 files changed, 177 insertions(+), 183 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0c7b8b2..7b05670 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2605,6 +2605,7 @@ dependencies = [ "num", "plotters", "poise", + "rand", "sqlx", "tokio", ] diff --git a/Cargo.toml b/Cargo.toml index 61c9553..329660f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,6 +14,7 @@ sqlx = { version = "0.8.0", features = ["sqlite", "runtime-tokio", "chrono"] } hypertesseract = { features=["image"], git="https://github.com/BlueGhostGH/hypertesseract.git", rev="4e05063" } tokio = {version="1.38.0", features=["rt-multi-thread"]} imageproc = "0.25.0" +rand = "0.8.5" [profile.dev.package."*"] opt-level = 3 diff --git a/src/arcaea/chart.rs b/src/arcaea/chart.rs index ef69240..5a8ae7e 100644 --- a/src/arcaea/chart.rs +++ b/src/arcaea/chart.rs @@ -21,7 +21,7 @@ impl Difficulty { pub const DIFFICULTY_SHORTHANDS: [&'static str; 5] = ["PST", "PRS", "FTR", "ETR", "BYD"]; pub const DIFFICULTY_STRINGS: [&'static str; 5] = - ["past", "present", "future", "eternal", "beyond"]; + ["PAST", "PRESENT", "FUTURE", "ETERNAL", "BEYOND"]; #[inline] pub fn to_index(self) -> usize { diff --git a/src/commands/score.rs b/src/commands/score.rs index e5007de..13007ca 100644 --- a/src/commands/score.rs +++ b/src/commands/score.rs @@ -7,6 +7,7 @@ use crate::recognition::recognize::{ImageAnalyzer, ScoreKind}; use crate::user::{discord_it_to_discord_user, User}; use crate::{edit_reply, get_user, timed}; use image::DynamicImage; +use poise::serenity_prelude::futures::future::join_all; use poise::serenity_prelude::CreateMessage; use poise::{serenity_prelude as serenity, CreateReply}; use sqlx::query; @@ -34,128 +35,132 @@ pub async fn magic( if files.len() == 0 { ctx.reply("No images found attached to message").await?; - } else { - let mut embeds = Vec::with_capacity(files.len()); - let mut attachments = Vec::with_capacity(files.len()); - let handle = ctx - .reply(format!("Processed 0/{} scores", files.len())) + return Ok(()); + } + + let mut embeds = Vec::with_capacity(files.len()); + let mut attachments = Vec::with_capacity(files.len()); + let handle = ctx + .reply(format!("Processed 0/{} scores", files.len())) + .await?; + + let mut analyzer = ImageAnalyzer::default(); + + // {{{ Download files + let download_tasks = files + .iter() + .filter(|file| file.dimensions().is_some()) + .map(|file| async move { (file, file.download().await) }); + + let downloaded = timed!("dowload_files", { join_all(download_tasks).await }); + + if downloaded.len() < files.len() { + ctx.reply("One or more of the attached files are not images!") .await?; + } + // }}} - let mut analyzer = ImageAnalyzer::default(); + for (i, (file, bytes)) in downloaded.into_iter().enumerate() { + let bytes = bytes?; - for (i, file) in files.iter().enumerate() { - let start = Instant::now(); - if let Some(_) = file.dimensions() { - let bytes = timed!("file download", { file.download().await? }); - let mut image = timed!("decode image", { image::load_from_memory(&bytes)? }); - let mut grayscale_image = timed!("grayscale image", { - DynamicImage::ImageLuma8(image.to_luma8()) - }); - // image = image.resize(1024, 1024, FilterType::Nearest); + let start = Instant::now(); + // {{{ Preapare image + let mut image = timed!("decode image", { image::load_from_memory(&bytes)? }); + let mut grayscale_image = timed!("grayscale image", { + DynamicImage::ImageLuma8(image.to_luma8()) + }); + // image = image.resize(1024, 1024, FilterType::Nearest); + // }}} - let result: Result<(), Error> = try { - // {{{ Detection + let result: Result<(), Error> = try { + // {{{ Detection - // edit_reply!(ctx, handle, "Image {}: reading kind", i + 1).await?; - let kind = timed!("read_score_kind", { - analyzer.read_score_kind(ctx.data(), &grayscale_image)? - }); + // edit_reply!(ctx, handle, "Image {}: reading kind", i + 1).await?; + let kind = timed!("read_score_kind", { + analyzer.read_score_kind(ctx.data(), &grayscale_image)? + }); - // edit_reply!(ctx, handle, "Image {}: reading difficulty", i + 1).await?; - // Do not use `ocr_image` because this reads the colors - let difficulty = timed!("read_difficulty", { - analyzer.read_difficulty(ctx.data(), &image, kind)? - }); + // edit_reply!(ctx, handle, "Image {}: reading difficulty", i + 1).await?; + // Do not use `ocr_image` because this reads the colors + let difficulty = timed!("read_difficulty", { + analyzer.read_difficulty(ctx.data(), &image, &grayscale_image, kind)? + }); - // edit_reply!(ctx, handle, "Image {}: reading jacket", i + 1).await?; - let (song, chart) = timed!("read_jacket", { - analyzer.read_jacket(ctx.data(), &mut image, kind, difficulty)? - }); + // edit_reply!(ctx, handle, "Image {}: reading jacket", i + 1).await?; + let (song, chart) = timed!("read_jacket", { + analyzer.read_jacket(ctx.data(), &mut image, kind, difficulty)? + }); - let (note_distribution, max_recall) = match kind { - ScoreKind::ScoreScreen => { - edit_reply!(ctx, handle, "Image {}: reading distribution", i + 1) - .await?; - let note_distribution = - Some(analyzer.read_distribution(ctx.data(), &grayscale_image)?); - - edit_reply!(ctx, handle, "Image {}: reading max recall", i + 1).await?; - let max_recall = - Some(analyzer.read_max_recall(ctx.data(), &grayscale_image)?); - - (note_distribution, max_recall) - } - ScoreKind::SongSelect => (None, None), - }; - - grayscale_image.invert(); - - // edit_reply!(ctx, handle, "Image {}: reading score", i + 1).await?; - let score = timed!("read_score", { - analyzer.read_score( - ctx.data(), - Some(chart.note_count), - &grayscale_image, - kind, - )? - }); - - // {{{ Build play - let maybe_fars = Score::resolve_distibution_ambiguities( - score, - note_distribution, - chart.note_count, - ); - - let play = CreatePlay::new(score, &chart, &user) - .with_attachment(file) - .with_fars(maybe_fars) - .with_max_recall(max_recall) - .save(&ctx.data()) - .await?; - // }}} - // }}} - // {{{ Deliver embed - - let (embed, attachment) = timed!("to embed", { - play.to_embed(&ctx.data().db, &user, &song, &chart, i, None) - .await? - }); - - embeds.push(embed); - attachments.extend(attachment); - // }}} - }; - - if let Err(err) = result { - analyzer - .send_discord_error(ctx, &image, &file.filename, err) - .await?; + let max_recall = match kind { + ScoreKind::ScoreScreen => { + // edit_reply!(ctx, handle, "Image {}: reading max recall", i + 1).await?; + Some(analyzer.read_max_recall(ctx.data(), &grayscale_image)?) } - } else { - ctx.reply("One of the attached files is not an image!") - .await?; - continue; - } - let took = start.elapsed(); + ScoreKind::SongSelect => None, + }; - edit_reply!( - ctx, - handle, - "Processed {}/{} scores. Last score took {took:?} to process.", - i + 1, - files.len() - ) - .await?; - } + grayscale_image.invert(); + let note_distribution = match kind { + ScoreKind::ScoreScreen => { + // edit_reply!(ctx, handle, "Image {}: reading distribution", i + 1).await?; + Some(analyzer.read_distribution(ctx.data(), &grayscale_image)?) + } + ScoreKind::SongSelect => None, + }; - handle.delete(ctx).await?; + // edit_reply!(ctx, handle, "Image {}: reading score", i + 1).await?; + let score = timed!("read_score", { + analyzer.read_score(ctx.data(), Some(chart.note_count), &grayscale_image, kind)? + }); - if embeds.len() > 0 { - ctx.channel_id() - .send_files(ctx.http(), attachments, CreateMessage::new().embeds(embeds)) + // {{{ Build play + let maybe_fars = + Score::resolve_distibution_ambiguities(score, note_distribution, chart.note_count); + + let play = CreatePlay::new(score, &chart, &user) + .with_attachment(file) + .with_fars(maybe_fars) + .with_max_recall(max_recall) + .save(&ctx.data()) + .await?; + // }}} + // }}} + // {{{ Deliver embed + + let (embed, attachment) = timed!("to embed", { + play.to_embed(&ctx.data().db, &user, &song, &chart, i, None) + .await? + }); + + embeds.push(embed); + attachments.extend(attachment); + // }}} + }; + + if let Err(err) = result { + analyzer + .send_discord_error(ctx, &image, &file.filename, err) .await?; } + + let took = start.elapsed(); + + edit_reply!( + ctx, + handle, + "Processed {}/{} scores. Last score took {took:?} to process.", + i + 1, + files.len() + ) + .await?; + } + + handle.delete(ctx).await?; + + if embeds.len() > 0 { + ctx.channel_id() + .send_files(ctx.http(), attachments, CreateMessage::new().embeds(embeds)) + .await?; } Ok(()) diff --git a/src/context.rs b/src/context.rs index 64a48df..bdbd546 100644 --- a/src/context.rs +++ b/src/context.rs @@ -39,7 +39,11 @@ impl UserContext { let geosans_measurements = GEOSANS_FONT .with_borrow_mut(|font| CharMeasurements::from_text(font, "0123456789'", None))?; let exo_measurements = EXO_FONT.with_borrow_mut(|font| { - CharMeasurements::from_text(font, "0123456789'abcdefghijklmnopqrstuvwxyz", Some(700)) + CharMeasurements::from_text( + font, + "0123456789'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", + Some(700), + ) })?; println!("Created user context"); diff --git a/src/logs.rs b/src/logs.rs index b86d83e..a278ff1 100644 --- a/src/logs.rs +++ b/src/logs.rs @@ -6,10 +6,9 @@ //! allows for a convenient way to throw images into a `logs` directory with //! a simple env var. -use std::{env, ops::Deref}; +use std::{env, ops::Deref, sync::OnceLock, time::Instant}; use image::{DynamicImage, EncodableLayout, ImageBuffer, PixelWithColorType}; -use poise::serenity_prelude::Timestamp; use crate::context::Error; @@ -20,10 +19,19 @@ fn should_save_debug_images() -> bool { .unwrap_or(false) } +#[inline] +fn get_startup_time() -> Instant { + static CELL: OnceLock = OnceLock::new(); + *CELL.get_or_init(|| Instant::now()) +} + #[inline] pub fn debug_image_log(image: &DynamicImage) -> Result<(), Error> { if should_save_debug_images() { - image.save(format!("./logs/{}.png", Timestamp::now()))?; + image.save(format!( + "./logs/{:0>15}.png", + get_startup_time().elapsed().as_nanos() + ))?; } Ok(()) @@ -37,7 +45,10 @@ where C: Deref, { if should_save_debug_images() { - image.save(format!("./logs/{}.png", Timestamp::now()))?; + image.save(format!( + "./logs/{:0>15}.png", + get_startup_time().elapsed().as_nanos() + ))?; } Ok(()) diff --git a/src/recognition/hyperglass.rs b/src/recognition/hyperglass.rs index ab6d02a..e6aefda 100644 --- a/src/recognition/hyperglass.rs +++ b/src/recognition/hyperglass.rs @@ -1,4 +1,4 @@ -//! Hyperglass my own specialized OCR system, created as a result of my +//! Hyperglass is my own specialized OCR system, created as a result of my //! annoyance with how unreliable tesseract is. Assuming we know the font, //! OCR should be almost perfect, even when faced with stange kerning. This is //! what this module achieves! @@ -158,8 +158,12 @@ struct ComponentsWithBounds { } impl ComponentsWithBounds { - fn from_image(image: &DynamicImage) -> Result { - let image = threshold(&image.to_luma8(), 100, ThresholdType::Binary); + fn from_image(image: &DynamicImage, binarisation_threshold: u8) -> Result { + let image = threshold( + &image.to_luma8(), + binarisation_threshold, + ThresholdType::Binary, + ); debug_image_buffer_log(&image)?; let background = Luma([u8::MAX]); @@ -168,7 +172,7 @@ impl ComponentsWithBounds { let mut bounds: Vec> = Vec::new(); for x in 0..components.width() { for y in 0..components.height() { - // {{{ Retrieve pixel if it's not backround + // {{{ Retrieve pixel if it's not background let component = components[(x, y)].0[0]; if component == 0 { continue; @@ -254,7 +258,7 @@ impl CharMeasurements { debug_image_log(&image)?; - let components = ComponentsWithBounds::from_image(&image)?; + let components = ComponentsWithBounds::from_image(&image, 100)?; // {{{ Compute max width/height let max_width = components @@ -293,9 +297,16 @@ impl CharMeasurements { } // }}} // {{{ Recognition - pub fn recognise(&self, image: &DynamicImage, whitelist: &str) -> Result { - let components = timed!("from_image", { ComponentsWithBounds::from_image(image)? }); - let mut result = String::new(); + pub fn recognise( + &self, + image: &DynamicImage, + whitelist: &str, + binarisation_threshold: Option, + ) -> Result { + let components = timed!("from_image", { + ComponentsWithBounds::from_image(image, binarisation_threshold.unwrap_or(100))? + }); + let mut result = String::with_capacity(components.bounds.len()); let max_height = components .bounds diff --git a/src/recognition/recognize.rs b/src/recognition/recognize.rs index b00835f..ebee196 100644 --- a/src/recognition/recognize.rs +++ b/src/recognition/recognize.rs @@ -1,5 +1,4 @@ use std::fmt::Display; -use std::str::FromStr; use hypertesseract::{PageSegMode, Tesseract}; use image::imageops::FilterType; @@ -154,7 +153,7 @@ impl ImageAnalyzer { let result = timed!("full recognition", { Score( measurements - .recognise(&image, "0123456789'")? + .recognise(&image, "0123456789'", None)? .chars() .filter(|c| *c != '\'') .collect::() @@ -182,6 +181,7 @@ impl ImageAnalyzer { &mut self, ctx: &UserContext, image: &DynamicImage, + grayscale_image: &DynamicImage, kind: ScoreKind, ) -> Result { if kind == ScoreKind::SongSelect { @@ -202,10 +202,6 @@ impl ImageAnalyzer { ) .unwrap(); - // rect.width = 100; - // rect.height = 100; - // self.crop_image_to_bytes(image, rect).unwrap(); - let image_color = image.get_pixel(rect.x as u32, rect.y as u32); let image_color = Color::from_bytes(image_color.0); @@ -217,25 +213,15 @@ impl ImageAnalyzer { return Ok(min.1); } - let (text, conf) = Tesseract::builder() - .language(hypertesseract::Language::English) - .page_seg_mode(PageSegMode::RawLine) - .build()? - .recognize_text_cloned_with_conf( - &self - .interp_crop(ctx, image, ScoreScreen(ScoreScreenRect::Difficulty))? - .into_rgba8(), - )?; + let image = self.interp_crop( + ctx, + grayscale_image, + ScoreScreen(ScoreScreenRect::Difficulty), + )?; - let text = text.trim().to_lowercase(); - - if conf < 10 && conf != 0 { - return Err(format!( - "Difficulty text is not readable (confidence = {}, text = {}).", - conf, text - ) - .into()); - } + let text = + ctx.exo_measurements + .recognise(&image, "PASTPRESENTFUTUREETERNALBEYOND", None)?; let difficulty = Difficulty::DIFFICULTIES .iter() @@ -256,7 +242,7 @@ impl ImageAnalyzer { let image = self.interp_crop(ctx, image, PlayKind)?; let text = ctx .exo_measurements - .recognise(&image, "resultselectasong")?; + .recognise(&image, "resultselectasong", None)?; let result = if edit_distance(&text, "Result") < edit_distance(&text, "Select a song") { ScoreKind::ScoreScreen @@ -356,21 +342,13 @@ impl ImageAnalyzer { static KINDS: [ScoreScreenRect; 3] = [Pure, Far, Lost]; for i in 0..3 { - let text = Tesseract::builder() - .language(hypertesseract::Language::English) - .page_seg_mode(PageSegMode::SparseText) - .whitelist_str("0123456789")? - .assume_numeric_input() - .build()? - .recognize_text_cloned( - &self - .interp_crop(ctx, image, ScoreScreen(KINDS[i]))? - .into_rgba8(), - )?; - - println!("Raw '{}'", text.trim()); - out[i] = u32::from_str(&text.trim()).unwrap_or(0); + let image = self.interp_crop(ctx, image, ScoreScreen(KINDS[i]))?; + out[i] = ctx + .exo_measurements + .recognise(&image, "0123456789", Some(30))? + .parse()?; } + println!("Ditribution {out:?}"); Ok((out[0], out[1], out[2])) @@ -382,28 +360,11 @@ impl ImageAnalyzer { ctx: &'a UserContext, image: &DynamicImage, ) -> Result { - let (text, conf) = Tesseract::builder() - .language(hypertesseract::Language::English) - .page_seg_mode(PageSegMode::SingleLine) - .whitelist_str("0123456789")? - .assume_numeric_input() - .build()? - .recognize_text_cloned_with_conf( - &self - .interp_crop(ctx, image, ScoreScreen(ScoreScreenRect::MaxRecall))? - .into_rgba8(), - )?; - - let max_recall = u32::from_str_radix(text.trim(), 10)?; - - if conf < 20 && conf != 0 { - return Err(format!( - "Title text is not readable (confidence = {}, text = {}).", - conf, - text.trim() - ) - .into()); - } + let image = self.interp_crop(ctx, image, ScoreScreen(ScoreScreenRect::MaxRecall))?; + let max_recall = ctx + .exo_measurements + .recognise(&image, "0123456789", None)? + .parse()?; Ok(max_recall) }