Improve OCR on downscaled images
This commit is contained in:
parent
50f8db4e2e
commit
18d0f320ab
|
@ -72,7 +72,7 @@ impl ComponentVec {
|
||||||
if let Some(p) = components.components.get_pixel_checked(x, y)
|
if let Some(p) = components.components.get_pixel_checked(x, y)
|
||||||
&& p.0[0] == component
|
&& p.0[0] == component
|
||||||
{
|
{
|
||||||
count += 1;
|
count += 255 - components.image[(x, y)].0[0] as u32;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -141,6 +141,7 @@ struct ComponentBounds {
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ComponentsWithBounds {
|
struct ComponentsWithBounds {
|
||||||
|
image: ImageBuffer<Luma<u8>, Vec<u8>>,
|
||||||
components: ImageBuffer<Luma<u32>, Vec<u32>>,
|
components: ImageBuffer<Luma<u32>, Vec<u32>>,
|
||||||
|
|
||||||
// NOTE: the index is (the id of the component) - 1
|
// NOTE: the index is (the id of the component) - 1
|
||||||
|
@ -153,16 +154,17 @@ struct ComponentsWithBounds {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ComponentsWithBounds {
|
impl ComponentsWithBounds {
|
||||||
fn from_image(image: &DynamicImage, binarisation_threshold: u8) -> Result<Self, Error> {
|
fn from_image(
|
||||||
let image = threshold(
|
image: &DynamicImage,
|
||||||
&image.to_luma8(),
|
binarisation_threshold: u8,
|
||||||
binarisation_threshold,
|
max_sizes: (f32, f32),
|
||||||
ThresholdType::Binary,
|
) -> Result<Self, Error> {
|
||||||
);
|
let luma_image = image.to_luma8();
|
||||||
debug_image_buffer_log(&image);
|
let binarized_image = threshold(&luma_image, binarisation_threshold, ThresholdType::Binary);
|
||||||
|
debug_image_buffer_log(&binarized_image);
|
||||||
|
|
||||||
let background = Luma([u8::MAX]);
|
let background = Luma([u8::MAX]);
|
||||||
let components = connected_components(&image, Connectivity::Eight, background);
|
let components = connected_components(&binarized_image, Connectivity::Eight, background);
|
||||||
|
|
||||||
let mut bounds: Vec<Option<ComponentBounds>> = Vec::new();
|
let mut bounds: Vec<Option<ComponentBounds>> = Vec::new();
|
||||||
for x in 0..components.width() {
|
for x in 0..components.width() {
|
||||||
|
@ -198,7 +200,13 @@ impl ComponentsWithBounds {
|
||||||
|
|
||||||
// {{{ Remove components that are too large
|
// {{{ Remove components that are too large
|
||||||
for bound in &mut bounds {
|
for bound in &mut bounds {
|
||||||
if bound.map_or(false, |b| (b.x_max - b.x_min) >= 9 * image.width() / 10) {
|
if bound.map_or(false, |b| {
|
||||||
|
(b.x_max - b.x_min) as f32 >= max_sizes.0 * image.width() as f32
|
||||||
|
}) {
|
||||||
|
*bound = None;
|
||||||
|
} else if bound.map_or(false, |b| {
|
||||||
|
(b.y_max - b.y_min) as f32 >= max_sizes.1 * image.height() as f32
|
||||||
|
}) {
|
||||||
*bound = None;
|
*bound = None;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -210,6 +218,7 @@ impl ComponentsWithBounds {
|
||||||
bounds_by_position.sort_by_key(|i| bounds[*i].unwrap().x_min);
|
bounds_by_position.sort_by_key(|i| bounds[*i].unwrap().x_min);
|
||||||
|
|
||||||
Ok(Self {
|
Ok(Self {
|
||||||
|
image: luma_image,
|
||||||
components,
|
components,
|
||||||
bounds,
|
bounds,
|
||||||
bounds_by_position,
|
bounds_by_position,
|
||||||
|
@ -254,7 +263,7 @@ impl CharMeasurements {
|
||||||
|
|
||||||
debug_image_log(&image);
|
debug_image_log(&image);
|
||||||
|
|
||||||
let components = ComponentsWithBounds::from_image(&image, 100)?;
|
let components = ComponentsWithBounds::from_image(&image, 100, (1.0, 1.0))?;
|
||||||
|
|
||||||
// {{{ Compute max width/height
|
// {{{ Compute max width/height
|
||||||
let max_width = components
|
let max_width = components
|
||||||
|
@ -298,9 +307,13 @@ impl CharMeasurements {
|
||||||
image: &DynamicImage,
|
image: &DynamicImage,
|
||||||
whitelist: &str,
|
whitelist: &str,
|
||||||
binarisation_threshold: Option<u8>,
|
binarisation_threshold: Option<u8>,
|
||||||
|
max_sizes: Option<(f32, f32)>,
|
||||||
) -> Result<String, Error> {
|
) -> Result<String, Error> {
|
||||||
let components =
|
let components = ComponentsWithBounds::from_image(
|
||||||
ComponentsWithBounds::from_image(image, binarisation_threshold.unwrap_or(100))?;
|
image,
|
||||||
|
binarisation_threshold.unwrap_or(100),
|
||||||
|
max_sizes.unwrap_or((0.9, 1.0)),
|
||||||
|
)?;
|
||||||
let mut result = String::with_capacity(components.bounds.len());
|
let mut result = String::with_capacity(components.bounds.len());
|
||||||
|
|
||||||
let max_height = components
|
let max_height = components
|
||||||
|
|
|
@ -150,7 +150,7 @@ impl ImageAnalyzer {
|
||||||
|
|
||||||
let result = Score(
|
let result = Score(
|
||||||
measurements
|
measurements
|
||||||
.recognise(&image, "0123456789'", None)?
|
.recognise(&image, "0123456789'", None, None)?
|
||||||
.chars()
|
.chars()
|
||||||
.filter(|c| *c != '\'')
|
.filter(|c| *c != '\'')
|
||||||
.collect::<String>()
|
.collect::<String>()
|
||||||
|
@ -218,6 +218,7 @@ impl ImageAnalyzer {
|
||||||
let text = ctx.kazesawa_bold_measurements.recognise(
|
let text = ctx.kazesawa_bold_measurements.recognise(
|
||||||
&image,
|
&image,
|
||||||
"PASTPRESENTFUTUREETERNALBEYOND",
|
"PASTPRESENTFUTUREETERNALBEYOND",
|
||||||
|
Some(200), // We can afford to be generous with binarization here
|
||||||
None,
|
None,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
|
@ -240,7 +241,7 @@ impl ImageAnalyzer {
|
||||||
let image = self.interp_crop(ctx, image, PlayKind)?;
|
let image = self.interp_crop(ctx, image, PlayKind)?;
|
||||||
let text = ctx
|
let text = ctx
|
||||||
.kazesawa_measurements
|
.kazesawa_measurements
|
||||||
.recognise(&image, "ResultSelectaSong ", None)?;
|
.recognise(&image, "ResultSelectaSong ", None, None)?;
|
||||||
|
|
||||||
let result = if edit_distance(&text, "Result") < edit_distance(&text, "SelectaSong") {
|
let result = if edit_distance(&text, "Result") < edit_distance(&text, "SelectaSong") {
|
||||||
ScoreKind::ScoreScreen
|
ScoreKind::ScoreScreen
|
||||||
|
@ -342,7 +343,8 @@ impl ImageAnalyzer {
|
||||||
let image = self.interp_crop(ctx, image, ScoreScreen(KINDS[i]))?;
|
let image = self.interp_crop(ctx, image, ScoreScreen(KINDS[i]))?;
|
||||||
out[i] = ctx
|
out[i] = ctx
|
||||||
.kazesawa_bold_measurements
|
.kazesawa_bold_measurements
|
||||||
.recognise(&image, "0123456789", Some(30))?
|
// We need to be very strict with binarization here
|
||||||
|
.recognise(&image, "0123456789", Some(30), Some((0.33, 0.85)))?
|
||||||
.parse()
|
.parse()
|
||||||
.unwrap_or(100000); // This will get discarded as making no sense
|
.unwrap_or(100000); // This will get discarded as making no sense
|
||||||
}
|
}
|
||||||
|
@ -361,7 +363,8 @@ impl ImageAnalyzer {
|
||||||
let image = self.interp_crop(ctx, image, ScoreScreen(ScoreScreenRect::MaxRecall))?;
|
let image = self.interp_crop(ctx, image, ScoreScreen(ScoreScreenRect::MaxRecall))?;
|
||||||
let max_recall = ctx
|
let max_recall = ctx
|
||||||
.exo_measurements
|
.exo_measurements
|
||||||
.recognise(&image, "0123456789", None)?
|
// We can afford to be generous with binarization here
|
||||||
|
.recognise(&image, "0123456789", Some(200), None)?
|
||||||
.parse()?;
|
.parse()?;
|
||||||
|
|
||||||
Ok(max_recall)
|
Ok(max_recall)
|
||||||
|
|
Loading…
Reference in a new issue