1
Fork 0

Improve OCR on downscaled images

This commit is contained in:
prescientmoon 2024-09-17 03:23:45 +02:00
parent 50f8db4e2e
commit 18d0f320ab
Signed by: prescientmoon
SSH key fingerprint: SHA256:WFp/cO76nbarETAoQcQXuV+0h7XJsEsOCI0UsyPIy6U
2 changed files with 33 additions and 17 deletions

View file

@ -72,7 +72,7 @@ impl ComponentVec {
if let Some(p) = components.components.get_pixel_checked(x, y) if let Some(p) = components.components.get_pixel_checked(x, y)
&& p.0[0] == component && p.0[0] == component
{ {
count += 1; count += 255 - components.image[(x, y)].0[0] as u32;
} }
} }
} }
@ -141,6 +141,7 @@ struct ComponentBounds {
} }
struct ComponentsWithBounds { struct ComponentsWithBounds {
image: ImageBuffer<Luma<u8>, Vec<u8>>,
components: ImageBuffer<Luma<u32>, Vec<u32>>, components: ImageBuffer<Luma<u32>, Vec<u32>>,
// NOTE: the index is (the id of the component) - 1 // NOTE: the index is (the id of the component) - 1
@ -153,16 +154,17 @@ struct ComponentsWithBounds {
} }
impl ComponentsWithBounds { impl ComponentsWithBounds {
fn from_image(image: &DynamicImage, binarisation_threshold: u8) -> Result<Self, Error> { fn from_image(
let image = threshold( image: &DynamicImage,
&image.to_luma8(), binarisation_threshold: u8,
binarisation_threshold, max_sizes: (f32, f32),
ThresholdType::Binary, ) -> Result<Self, Error> {
); let luma_image = image.to_luma8();
debug_image_buffer_log(&image); let binarized_image = threshold(&luma_image, binarisation_threshold, ThresholdType::Binary);
debug_image_buffer_log(&binarized_image);
let background = Luma([u8::MAX]); let background = Luma([u8::MAX]);
let components = connected_components(&image, Connectivity::Eight, background); let components = connected_components(&binarized_image, Connectivity::Eight, background);
let mut bounds: Vec<Option<ComponentBounds>> = Vec::new(); let mut bounds: Vec<Option<ComponentBounds>> = Vec::new();
for x in 0..components.width() { for x in 0..components.width() {
@ -198,7 +200,13 @@ impl ComponentsWithBounds {
// {{{ Remove components that are too large // {{{ Remove components that are too large
for bound in &mut bounds { for bound in &mut bounds {
if bound.map_or(false, |b| (b.x_max - b.x_min) >= 9 * image.width() / 10) { if bound.map_or(false, |b| {
(b.x_max - b.x_min) as f32 >= max_sizes.0 * image.width() as f32
}) {
*bound = None;
} else if bound.map_or(false, |b| {
(b.y_max - b.y_min) as f32 >= max_sizes.1 * image.height() as f32
}) {
*bound = None; *bound = None;
} }
} }
@ -210,6 +218,7 @@ impl ComponentsWithBounds {
bounds_by_position.sort_by_key(|i| bounds[*i].unwrap().x_min); bounds_by_position.sort_by_key(|i| bounds[*i].unwrap().x_min);
Ok(Self { Ok(Self {
image: luma_image,
components, components,
bounds, bounds,
bounds_by_position, bounds_by_position,
@ -254,7 +263,7 @@ impl CharMeasurements {
debug_image_log(&image); debug_image_log(&image);
let components = ComponentsWithBounds::from_image(&image, 100)?; let components = ComponentsWithBounds::from_image(&image, 100, (1.0, 1.0))?;
// {{{ Compute max width/height // {{{ Compute max width/height
let max_width = components let max_width = components
@ -298,9 +307,13 @@ impl CharMeasurements {
image: &DynamicImage, image: &DynamicImage,
whitelist: &str, whitelist: &str,
binarisation_threshold: Option<u8>, binarisation_threshold: Option<u8>,
max_sizes: Option<(f32, f32)>,
) -> Result<String, Error> { ) -> Result<String, Error> {
let components = let components = ComponentsWithBounds::from_image(
ComponentsWithBounds::from_image(image, binarisation_threshold.unwrap_or(100))?; image,
binarisation_threshold.unwrap_or(100),
max_sizes.unwrap_or((0.9, 1.0)),
)?;
let mut result = String::with_capacity(components.bounds.len()); let mut result = String::with_capacity(components.bounds.len());
let max_height = components let max_height = components

View file

@ -150,7 +150,7 @@ impl ImageAnalyzer {
let result = Score( let result = Score(
measurements measurements
.recognise(&image, "0123456789'", None)? .recognise(&image, "0123456789'", None, None)?
.chars() .chars()
.filter(|c| *c != '\'') .filter(|c| *c != '\'')
.collect::<String>() .collect::<String>()
@ -218,6 +218,7 @@ impl ImageAnalyzer {
let text = ctx.kazesawa_bold_measurements.recognise( let text = ctx.kazesawa_bold_measurements.recognise(
&image, &image,
"PASTPRESENTFUTUREETERNALBEYOND", "PASTPRESENTFUTUREETERNALBEYOND",
Some(200), // We can afford to be generous with binarization here
None, None,
)?; )?;
@ -240,7 +241,7 @@ impl ImageAnalyzer {
let image = self.interp_crop(ctx, image, PlayKind)?; let image = self.interp_crop(ctx, image, PlayKind)?;
let text = ctx let text = ctx
.kazesawa_measurements .kazesawa_measurements
.recognise(&image, "ResultSelectaSong ", None)?; .recognise(&image, "ResultSelectaSong ", None, None)?;
let result = if edit_distance(&text, "Result") < edit_distance(&text, "SelectaSong") { let result = if edit_distance(&text, "Result") < edit_distance(&text, "SelectaSong") {
ScoreKind::ScoreScreen ScoreKind::ScoreScreen
@ -342,7 +343,8 @@ impl ImageAnalyzer {
let image = self.interp_crop(ctx, image, ScoreScreen(KINDS[i]))?; let image = self.interp_crop(ctx, image, ScoreScreen(KINDS[i]))?;
out[i] = ctx out[i] = ctx
.kazesawa_bold_measurements .kazesawa_bold_measurements
.recognise(&image, "0123456789", Some(30))? // We need to be very strict with binarization here
.recognise(&image, "0123456789", Some(30), Some((0.33, 0.85)))?
.parse() .parse()
.unwrap_or(100000); // This will get discarded as making no sense .unwrap_or(100000); // This will get discarded as making no sense
} }
@ -361,7 +363,8 @@ impl ImageAnalyzer {
let image = self.interp_crop(ctx, image, ScoreScreen(ScoreScreenRect::MaxRecall))?; let image = self.interp_crop(ctx, image, ScoreScreen(ScoreScreenRect::MaxRecall))?;
let max_recall = ctx let max_recall = ctx
.exo_measurements .exo_measurements
.recognise(&image, "0123456789", None)? // We can afford to be generous with binarization here
.recognise(&image, "0123456789", Some(200), None)?
.parse()?; .parse()?;
Ok(max_recall) Ok(max_recall)