No longer use tesseract, I guess?
Signed-off-by: prescientmoon <git@moonythm.dev>
This commit is contained in:
parent
5c95cdb018
commit
48c1f74f93
1
Cargo.lock
generated
1
Cargo.lock
generated
|
@ -2605,6 +2605,7 @@ dependencies = [
|
|||
"num",
|
||||
"plotters",
|
||||
"poise",
|
||||
"rand",
|
||||
"sqlx",
|
||||
"tokio",
|
||||
]
|
||||
|
|
|
@ -14,6 +14,7 @@ sqlx = { version = "0.8.0", features = ["sqlite", "runtime-tokio", "chrono"] }
|
|||
hypertesseract = { features=["image"], git="https://github.com/BlueGhostGH/hypertesseract.git", rev="4e05063" }
|
||||
tokio = {version="1.38.0", features=["rt-multi-thread"]}
|
||||
imageproc = "0.25.0"
|
||||
rand = "0.8.5"
|
||||
|
||||
[profile.dev.package."*"]
|
||||
opt-level = 3
|
||||
|
|
|
@ -21,7 +21,7 @@ impl Difficulty {
|
|||
|
||||
pub const DIFFICULTY_SHORTHANDS: [&'static str; 5] = ["PST", "PRS", "FTR", "ETR", "BYD"];
|
||||
pub const DIFFICULTY_STRINGS: [&'static str; 5] =
|
||||
["past", "present", "future", "eternal", "beyond"];
|
||||
["PAST", "PRESENT", "FUTURE", "ETERNAL", "BEYOND"];
|
||||
|
||||
#[inline]
|
||||
pub fn to_index(self) -> usize {
|
||||
|
|
|
@ -7,6 +7,7 @@ use crate::recognition::recognize::{ImageAnalyzer, ScoreKind};
|
|||
use crate::user::{discord_it_to_discord_user, User};
|
||||
use crate::{edit_reply, get_user, timed};
|
||||
use image::DynamicImage;
|
||||
use poise::serenity_prelude::futures::future::join_all;
|
||||
use poise::serenity_prelude::CreateMessage;
|
||||
use poise::{serenity_prelude as serenity, CreateReply};
|
||||
use sqlx::query;
|
||||
|
@ -34,7 +35,9 @@ pub async fn magic(
|
|||
|
||||
if files.len() == 0 {
|
||||
ctx.reply("No images found attached to message").await?;
|
||||
} else {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let mut embeds = Vec::with_capacity(files.len());
|
||||
let mut attachments = Vec::with_capacity(files.len());
|
||||
let handle = ctx
|
||||
|
@ -43,15 +46,31 @@ pub async fn magic(
|
|||
|
||||
let mut analyzer = ImageAnalyzer::default();
|
||||
|
||||
for (i, file) in files.iter().enumerate() {
|
||||
// {{{ Download files
|
||||
let download_tasks = files
|
||||
.iter()
|
||||
.filter(|file| file.dimensions().is_some())
|
||||
.map(|file| async move { (file, file.download().await) });
|
||||
|
||||
let downloaded = timed!("dowload_files", { join_all(download_tasks).await });
|
||||
|
||||
if downloaded.len() < files.len() {
|
||||
ctx.reply("One or more of the attached files are not images!")
|
||||
.await?;
|
||||
}
|
||||
// }}}
|
||||
|
||||
for (i, (file, bytes)) in downloaded.into_iter().enumerate() {
|
||||
let bytes = bytes?;
|
||||
|
||||
let start = Instant::now();
|
||||
if let Some(_) = file.dimensions() {
|
||||
let bytes = timed!("file download", { file.download().await? });
|
||||
// {{{ Preapare image
|
||||
let mut image = timed!("decode image", { image::load_from_memory(&bytes)? });
|
||||
let mut grayscale_image = timed!("grayscale image", {
|
||||
DynamicImage::ImageLuma8(image.to_luma8())
|
||||
});
|
||||
// image = image.resize(1024, 1024, FilterType::Nearest);
|
||||
// }}}
|
||||
|
||||
let result: Result<(), Error> = try {
|
||||
// {{{ Detection
|
||||
|
@ -64,7 +83,7 @@ pub async fn magic(
|
|||
// edit_reply!(ctx, handle, "Image {}: reading difficulty", i + 1).await?;
|
||||
// Do not use `ocr_image` because this reads the colors
|
||||
let difficulty = timed!("read_difficulty", {
|
||||
analyzer.read_difficulty(ctx.data(), &image, kind)?
|
||||
analyzer.read_difficulty(ctx.data(), &image, &grayscale_image, kind)?
|
||||
});
|
||||
|
||||
// edit_reply!(ctx, handle, "Image {}: reading jacket", i + 1).await?;
|
||||
|
@ -72,40 +91,31 @@ pub async fn magic(
|
|||
analyzer.read_jacket(ctx.data(), &mut image, kind, difficulty)?
|
||||
});
|
||||
|
||||
let (note_distribution, max_recall) = match kind {
|
||||
let max_recall = match kind {
|
||||
ScoreKind::ScoreScreen => {
|
||||
edit_reply!(ctx, handle, "Image {}: reading distribution", i + 1)
|
||||
.await?;
|
||||
let note_distribution =
|
||||
Some(analyzer.read_distribution(ctx.data(), &grayscale_image)?);
|
||||
|
||||
edit_reply!(ctx, handle, "Image {}: reading max recall", i + 1).await?;
|
||||
let max_recall =
|
||||
Some(analyzer.read_max_recall(ctx.data(), &grayscale_image)?);
|
||||
|
||||
(note_distribution, max_recall)
|
||||
// edit_reply!(ctx, handle, "Image {}: reading max recall", i + 1).await?;
|
||||
Some(analyzer.read_max_recall(ctx.data(), &grayscale_image)?)
|
||||
}
|
||||
ScoreKind::SongSelect => (None, None),
|
||||
ScoreKind::SongSelect => None,
|
||||
};
|
||||
|
||||
grayscale_image.invert();
|
||||
let note_distribution = match kind {
|
||||
ScoreKind::ScoreScreen => {
|
||||
// edit_reply!(ctx, handle, "Image {}: reading distribution", i + 1).await?;
|
||||
Some(analyzer.read_distribution(ctx.data(), &grayscale_image)?)
|
||||
}
|
||||
ScoreKind::SongSelect => None,
|
||||
};
|
||||
|
||||
// edit_reply!(ctx, handle, "Image {}: reading score", i + 1).await?;
|
||||
let score = timed!("read_score", {
|
||||
analyzer.read_score(
|
||||
ctx.data(),
|
||||
Some(chart.note_count),
|
||||
&grayscale_image,
|
||||
kind,
|
||||
)?
|
||||
analyzer.read_score(ctx.data(), Some(chart.note_count), &grayscale_image, kind)?
|
||||
});
|
||||
|
||||
// {{{ Build play
|
||||
let maybe_fars = Score::resolve_distibution_ambiguities(
|
||||
score,
|
||||
note_distribution,
|
||||
chart.note_count,
|
||||
);
|
||||
let maybe_fars =
|
||||
Score::resolve_distibution_ambiguities(score, note_distribution, chart.note_count);
|
||||
|
||||
let play = CreatePlay::new(score, &chart, &user)
|
||||
.with_attachment(file)
|
||||
|
@ -132,11 +142,7 @@ pub async fn magic(
|
|||
.send_discord_error(ctx, &image, &file.filename, err)
|
||||
.await?;
|
||||
}
|
||||
} else {
|
||||
ctx.reply("One of the attached files is not an image!")
|
||||
.await?;
|
||||
continue;
|
||||
}
|
||||
|
||||
let took = start.elapsed();
|
||||
|
||||
edit_reply!(
|
||||
|
@ -156,7 +162,6 @@ pub async fn magic(
|
|||
.send_files(ctx.http(), attachments, CreateMessage::new().embeds(embeds))
|
||||
.await?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
@ -39,7 +39,11 @@ impl UserContext {
|
|||
let geosans_measurements = GEOSANS_FONT
|
||||
.with_borrow_mut(|font| CharMeasurements::from_text(font, "0123456789'", None))?;
|
||||
let exo_measurements = EXO_FONT.with_borrow_mut(|font| {
|
||||
CharMeasurements::from_text(font, "0123456789'abcdefghijklmnopqrstuvwxyz", Some(700))
|
||||
CharMeasurements::from_text(
|
||||
font,
|
||||
"0123456789'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ",
|
||||
Some(700),
|
||||
)
|
||||
})?;
|
||||
|
||||
println!("Created user context");
|
||||
|
|
19
src/logs.rs
19
src/logs.rs
|
@ -6,10 +6,9 @@
|
|||
//! allows for a convenient way to throw images into a `logs` directory with
|
||||
//! a simple env var.
|
||||
|
||||
use std::{env, ops::Deref};
|
||||
use std::{env, ops::Deref, sync::OnceLock, time::Instant};
|
||||
|
||||
use image::{DynamicImage, EncodableLayout, ImageBuffer, PixelWithColorType};
|
||||
use poise::serenity_prelude::Timestamp;
|
||||
|
||||
use crate::context::Error;
|
||||
|
||||
|
@ -20,10 +19,19 @@ fn should_save_debug_images() -> bool {
|
|||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn get_startup_time() -> Instant {
|
||||
static CELL: OnceLock<Instant> = OnceLock::new();
|
||||
*CELL.get_or_init(|| Instant::now())
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn debug_image_log(image: &DynamicImage) -> Result<(), Error> {
|
||||
if should_save_debug_images() {
|
||||
image.save(format!("./logs/{}.png", Timestamp::now()))?;
|
||||
image.save(format!(
|
||||
"./logs/{:0>15}.png",
|
||||
get_startup_time().elapsed().as_nanos()
|
||||
))?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
@ -37,7 +45,10 @@ where
|
|||
C: Deref<Target = [P::Subpixel]>,
|
||||
{
|
||||
if should_save_debug_images() {
|
||||
image.save(format!("./logs/{}.png", Timestamp::now()))?;
|
||||
image.save(format!(
|
||||
"./logs/{:0>15}.png",
|
||||
get_startup_time().elapsed().as_nanos()
|
||||
))?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
//! Hyperglass my own specialized OCR system, created as a result of my
|
||||
//! Hyperglass is my own specialized OCR system, created as a result of my
|
||||
//! annoyance with how unreliable tesseract is. Assuming we know the font,
|
||||
//! OCR should be almost perfect, even when faced with stange kerning. This is
|
||||
//! what this module achieves!
|
||||
|
@ -158,8 +158,12 @@ struct ComponentsWithBounds {
|
|||
}
|
||||
|
||||
impl ComponentsWithBounds {
|
||||
fn from_image(image: &DynamicImage) -> Result<Self, Error> {
|
||||
let image = threshold(&image.to_luma8(), 100, ThresholdType::Binary);
|
||||
fn from_image(image: &DynamicImage, binarisation_threshold: u8) -> Result<Self, Error> {
|
||||
let image = threshold(
|
||||
&image.to_luma8(),
|
||||
binarisation_threshold,
|
||||
ThresholdType::Binary,
|
||||
);
|
||||
debug_image_buffer_log(&image)?;
|
||||
|
||||
let background = Luma([u8::MAX]);
|
||||
|
@ -168,7 +172,7 @@ impl ComponentsWithBounds {
|
|||
let mut bounds: Vec<Option<ComponentBounds>> = Vec::new();
|
||||
for x in 0..components.width() {
|
||||
for y in 0..components.height() {
|
||||
// {{{ Retrieve pixel if it's not backround
|
||||
// {{{ Retrieve pixel if it's not background
|
||||
let component = components[(x, y)].0[0];
|
||||
if component == 0 {
|
||||
continue;
|
||||
|
@ -254,7 +258,7 @@ impl CharMeasurements {
|
|||
|
||||
debug_image_log(&image)?;
|
||||
|
||||
let components = ComponentsWithBounds::from_image(&image)?;
|
||||
let components = ComponentsWithBounds::from_image(&image, 100)?;
|
||||
|
||||
// {{{ Compute max width/height
|
||||
let max_width = components
|
||||
|
@ -293,9 +297,16 @@ impl CharMeasurements {
|
|||
}
|
||||
// }}}
|
||||
// {{{ Recognition
|
||||
pub fn recognise(&self, image: &DynamicImage, whitelist: &str) -> Result<String, Error> {
|
||||
let components = timed!("from_image", { ComponentsWithBounds::from_image(image)? });
|
||||
let mut result = String::new();
|
||||
pub fn recognise(
|
||||
&self,
|
||||
image: &DynamicImage,
|
||||
whitelist: &str,
|
||||
binarisation_threshold: Option<u8>,
|
||||
) -> Result<String, Error> {
|
||||
let components = timed!("from_image", {
|
||||
ComponentsWithBounds::from_image(image, binarisation_threshold.unwrap_or(100))?
|
||||
});
|
||||
let mut result = String::with_capacity(components.bounds.len());
|
||||
|
||||
let max_height = components
|
||||
.bounds
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
use std::fmt::Display;
|
||||
use std::str::FromStr;
|
||||
|
||||
use hypertesseract::{PageSegMode, Tesseract};
|
||||
use image::imageops::FilterType;
|
||||
|
@ -154,7 +153,7 @@ impl ImageAnalyzer {
|
|||
let result = timed!("full recognition", {
|
||||
Score(
|
||||
measurements
|
||||
.recognise(&image, "0123456789'")?
|
||||
.recognise(&image, "0123456789'", None)?
|
||||
.chars()
|
||||
.filter(|c| *c != '\'')
|
||||
.collect::<String>()
|
||||
|
@ -182,6 +181,7 @@ impl ImageAnalyzer {
|
|||
&mut self,
|
||||
ctx: &UserContext,
|
||||
image: &DynamicImage,
|
||||
grayscale_image: &DynamicImage,
|
||||
kind: ScoreKind,
|
||||
) -> Result<Difficulty, Error> {
|
||||
if kind == ScoreKind::SongSelect {
|
||||
|
@ -202,10 +202,6 @@ impl ImageAnalyzer {
|
|||
)
|
||||
.unwrap();
|
||||
|
||||
// rect.width = 100;
|
||||
// rect.height = 100;
|
||||
// self.crop_image_to_bytes(image, rect).unwrap();
|
||||
|
||||
let image_color = image.get_pixel(rect.x as u32, rect.y as u32);
|
||||
let image_color = Color::from_bytes(image_color.0);
|
||||
|
||||
|
@ -217,25 +213,15 @@ impl ImageAnalyzer {
|
|||
return Ok(min.1);
|
||||
}
|
||||
|
||||
let (text, conf) = Tesseract::builder()
|
||||
.language(hypertesseract::Language::English)
|
||||
.page_seg_mode(PageSegMode::RawLine)
|
||||
.build()?
|
||||
.recognize_text_cloned_with_conf(
|
||||
&self
|
||||
.interp_crop(ctx, image, ScoreScreen(ScoreScreenRect::Difficulty))?
|
||||
.into_rgba8(),
|
||||
let image = self.interp_crop(
|
||||
ctx,
|
||||
grayscale_image,
|
||||
ScoreScreen(ScoreScreenRect::Difficulty),
|
||||
)?;
|
||||
|
||||
let text = text.trim().to_lowercase();
|
||||
|
||||
if conf < 10 && conf != 0 {
|
||||
return Err(format!(
|
||||
"Difficulty text is not readable (confidence = {}, text = {}).",
|
||||
conf, text
|
||||
)
|
||||
.into());
|
||||
}
|
||||
let text =
|
||||
ctx.exo_measurements
|
||||
.recognise(&image, "PASTPRESENTFUTUREETERNALBEYOND", None)?;
|
||||
|
||||
let difficulty = Difficulty::DIFFICULTIES
|
||||
.iter()
|
||||
|
@ -256,7 +242,7 @@ impl ImageAnalyzer {
|
|||
let image = self.interp_crop(ctx, image, PlayKind)?;
|
||||
let text = ctx
|
||||
.exo_measurements
|
||||
.recognise(&image, "resultselectasong")?;
|
||||
.recognise(&image, "resultselectasong", None)?;
|
||||
|
||||
let result = if edit_distance(&text, "Result") < edit_distance(&text, "Select a song") {
|
||||
ScoreKind::ScoreScreen
|
||||
|
@ -356,21 +342,13 @@ impl ImageAnalyzer {
|
|||
static KINDS: [ScoreScreenRect; 3] = [Pure, Far, Lost];
|
||||
|
||||
for i in 0..3 {
|
||||
let text = Tesseract::builder()
|
||||
.language(hypertesseract::Language::English)
|
||||
.page_seg_mode(PageSegMode::SparseText)
|
||||
.whitelist_str("0123456789")?
|
||||
.assume_numeric_input()
|
||||
.build()?
|
||||
.recognize_text_cloned(
|
||||
&self
|
||||
.interp_crop(ctx, image, ScoreScreen(KINDS[i]))?
|
||||
.into_rgba8(),
|
||||
)?;
|
||||
|
||||
println!("Raw '{}'", text.trim());
|
||||
out[i] = u32::from_str(&text.trim()).unwrap_or(0);
|
||||
let image = self.interp_crop(ctx, image, ScoreScreen(KINDS[i]))?;
|
||||
out[i] = ctx
|
||||
.exo_measurements
|
||||
.recognise(&image, "0123456789", Some(30))?
|
||||
.parse()?;
|
||||
}
|
||||
|
||||
println!("Ditribution {out:?}");
|
||||
|
||||
Ok((out[0], out[1], out[2]))
|
||||
|
@ -382,28 +360,11 @@ impl ImageAnalyzer {
|
|||
ctx: &'a UserContext,
|
||||
image: &DynamicImage,
|
||||
) -> Result<u32, Error> {
|
||||
let (text, conf) = Tesseract::builder()
|
||||
.language(hypertesseract::Language::English)
|
||||
.page_seg_mode(PageSegMode::SingleLine)
|
||||
.whitelist_str("0123456789")?
|
||||
.assume_numeric_input()
|
||||
.build()?
|
||||
.recognize_text_cloned_with_conf(
|
||||
&self
|
||||
.interp_crop(ctx, image, ScoreScreen(ScoreScreenRect::MaxRecall))?
|
||||
.into_rgba8(),
|
||||
)?;
|
||||
|
||||
let max_recall = u32::from_str_radix(text.trim(), 10)?;
|
||||
|
||||
if conf < 20 && conf != 0 {
|
||||
return Err(format!(
|
||||
"Title text is not readable (confidence = {}, text = {}).",
|
||||
conf,
|
||||
text.trim()
|
||||
)
|
||||
.into());
|
||||
}
|
||||
let image = self.interp_crop(ctx, image, ScoreScreen(ScoreScreenRect::MaxRecall))?;
|
||||
let max_recall = ctx
|
||||
.exo_measurements
|
||||
.recognise(&image, "0123456789", None)?
|
||||
.parse()?;
|
||||
|
||||
Ok(max_recall)
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue