From 18d0f320ab895bf6c8583f13520aff7cd4a62f76 Mon Sep 17 00:00:00 2001
From: prescientmoon <git@moonythm.dev>
Date: Tue, 17 Sep 2024 03:23:45 +0200
Subject: [PATCH] Improve OCR on downscaled images

---
 src/recognition/hyperglass.rs | 39 +++++++++++++++++++++++------------
 src/recognition/recognize.rs  | 11 ++++++----
 2 files changed, 33 insertions(+), 17 deletions(-)

diff --git a/src/recognition/hyperglass.rs b/src/recognition/hyperglass.rs
index 636ef99..f254ebf 100644
--- a/src/recognition/hyperglass.rs
+++ b/src/recognition/hyperglass.rs
@@ -72,7 +72,7 @@ impl ComponentVec {
 					if let Some(p) = components.components.get_pixel_checked(x, y)
 						&& p.0[0] == component
 					{
-						count += 1;
+						count += 255 - components.image[(x, y)].0[0] as u32;
 					}
 				}
 			}
@@ -141,6 +141,7 @@ struct ComponentBounds {
 }
 
 struct ComponentsWithBounds {
+	image: ImageBuffer<Luma<u8>, Vec<u8>>,
 	components: ImageBuffer<Luma<u32>, Vec<u32>>,
 
 	// NOTE: the index is (the id of the component) - 1
@@ -153,16 +154,17 @@ struct ComponentsWithBounds {
 }
 
 impl ComponentsWithBounds {
-	fn from_image(image: &DynamicImage, binarisation_threshold: u8) -> Result<Self, Error> {
-		let image = threshold(
-			&image.to_luma8(),
-			binarisation_threshold,
-			ThresholdType::Binary,
-		);
-		debug_image_buffer_log(&image);
+	fn from_image(
+		image: &DynamicImage,
+		binarisation_threshold: u8,
+		max_sizes: (f32, f32),
+	) -> Result<Self, Error> {
+		let luma_image = image.to_luma8();
+		let binarized_image = threshold(&luma_image, binarisation_threshold, ThresholdType::Binary);
+		debug_image_buffer_log(&binarized_image);
 
 		let background = Luma([u8::MAX]);
-		let components = connected_components(&image, Connectivity::Eight, background);
+		let components = connected_components(&binarized_image, Connectivity::Eight, background);
 
 		let mut bounds: Vec<Option<ComponentBounds>> = Vec::new();
 		for x in 0..components.width() {
@@ -198,7 +200,13 @@ impl ComponentsWithBounds {
 
 		// {{{ Remove components that are too large
 		for bound in &mut bounds {
-			if bound.map_or(false, |b| (b.x_max - b.x_min) >= 9 * image.width() / 10) {
+			if bound.map_or(false, |b| {
+				(b.x_max - b.x_min) as f32 >= max_sizes.0 * image.width() as f32
+			}) {
+				*bound = None;
+			} else if bound.map_or(false, |b| {
+				(b.y_max - b.y_min) as f32 >= max_sizes.1 * image.height() as f32
+			}) {
 				*bound = None;
 			}
 		}
@@ -210,6 +218,7 @@ impl ComponentsWithBounds {
 		bounds_by_position.sort_by_key(|i| bounds[*i].unwrap().x_min);
 
 		Ok(Self {
+			image: luma_image,
 			components,
 			bounds,
 			bounds_by_position,
@@ -254,7 +263,7 @@ impl CharMeasurements {
 
 		debug_image_log(&image);
 
-		let components = ComponentsWithBounds::from_image(&image, 100)?;
+		let components = ComponentsWithBounds::from_image(&image, 100, (1.0, 1.0))?;
 
 		// {{{ Compute max width/height
 		let max_width = components
@@ -298,9 +307,13 @@ impl CharMeasurements {
 		image: &DynamicImage,
 		whitelist: &str,
 		binarisation_threshold: Option<u8>,
+		max_sizes: Option<(f32, f32)>,
 	) -> Result<String, Error> {
-		let components =
-			ComponentsWithBounds::from_image(image, binarisation_threshold.unwrap_or(100))?;
+		let components = ComponentsWithBounds::from_image(
+			image,
+			binarisation_threshold.unwrap_or(100),
+			max_sizes.unwrap_or((0.9, 1.0)),
+		)?;
 		let mut result = String::with_capacity(components.bounds.len());
 
 		let max_height = components
diff --git a/src/recognition/recognize.rs b/src/recognition/recognize.rs
index 530fe3c..9bfa63f 100644
--- a/src/recognition/recognize.rs
+++ b/src/recognition/recognize.rs
@@ -150,7 +150,7 @@ impl ImageAnalyzer {
 
 		let result = Score(
 			measurements
-				.recognise(&image, "0123456789'", None)?
+				.recognise(&image, "0123456789'", None, None)?
 				.chars()
 				.filter(|c| *c != '\'')
 				.collect::<String>()
@@ -218,6 +218,7 @@ impl ImageAnalyzer {
 		let text = ctx.kazesawa_bold_measurements.recognise(
 			&image,
 			"PASTPRESENTFUTUREETERNALBEYOND",
+			Some(200), // We can afford to be generous with binarization here
 			None,
 		)?;
 
@@ -240,7 +241,7 @@ impl ImageAnalyzer {
 		let image = self.interp_crop(ctx, image, PlayKind)?;
 		let text = ctx
 			.kazesawa_measurements
-			.recognise(&image, "ResultSelectaSong ", None)?;
+			.recognise(&image, "ResultSelectaSong ", None, None)?;
 
 		let result = if edit_distance(&text, "Result") < edit_distance(&text, "SelectaSong") {
 			ScoreKind::ScoreScreen
@@ -342,7 +343,8 @@ impl ImageAnalyzer {
 			let image = self.interp_crop(ctx, image, ScoreScreen(KINDS[i]))?;
 			out[i] = ctx
 				.kazesawa_bold_measurements
-				.recognise(&image, "0123456789", Some(30))?
+				// We need to be very strict with binarization here
+				.recognise(&image, "0123456789", Some(30), Some((0.33, 0.85)))?
 				.parse()
 				.unwrap_or(100000); // This will get discarded as making no sense
 		}
@@ -361,7 +363,8 @@ impl ImageAnalyzer {
 		let image = self.interp_crop(ctx, image, ScoreScreen(ScoreScreenRect::MaxRecall))?;
 		let max_recall = ctx
 			.exo_measurements
-			.recognise(&image, "0123456789", None)?
+			// We can afford to be generous with binarization here
+			.recognise(&image, "0123456789", Some(200), None)?
 			.parse()?;
 
 		Ok(max_recall)