1
Fork 0

Last commit of the day, I guess

Signed-off-by: prescientmoon <git@moonythm.dev>
This commit is contained in:
prescientmoon 2024-06-23 04:20:32 +02:00
parent 5c4bfa25c9
commit ab12acd916
Signed by: prescientmoon
SSH key fingerprint: SHA256:UUF9JT2s8Xfyv76b8ZuVL7XrmimH4o49p4b+iexbVH4
5 changed files with 73 additions and 43 deletions

2
.gitignore vendored
View file

@ -3,3 +3,5 @@ target
.envrc .envrc
data/db.sqlite data/db.sqlite
data/jackets data/jackets
backups
dump.sql

View file

@ -54,9 +54,12 @@ def import_charts_from_csv():
for title, entry in songs.items(): for title, entry in songs.items():
artist = None artist = None
if title.startswith("Quon"): # Problematic titles that can belong to multiple artists
artist = title[6:-1] for possibility in ["Quon", "Gensis"]:
title = "Quon" if title.startswith(possibility):
artist = title[len(possibility) + 2 : -1]
title = possibility
break
row = conn.execute( row = conn.execute(
""" """
@ -96,3 +99,6 @@ subcommand = sys.argv[2]
if command == "import" and subcommand == "charts": if command == "import" and subcommand == "charts":
import_charts_from_csv() import_charts_from_csv()
&song_title
if command == "export" and subcommand == "jackets":
import_charts_from_csv()

View file

@ -166,9 +166,9 @@ pub async fn magic(
"Hey! I could not read the score in the provided picture.", "Hey! I could not read the score in the provided picture.",
&format!( &format!(
"This can mean one of three things: "This can mean one of three things:
1) The image you provided is not that of an Arcaea score 1. The image you provided is not that of an Arcaea score
2) The image you provided contains a newly added chart that is not in my database yet 2. The image you provided contains a newly added chart that is not in my database yet
3) The image you provided contains character art that covers the chart name. When this happens, I try to make use of the jacket art in order to determine the chart. It is possible that I've never seen the jacket art for this particular song on this particular difficulty. Contact `@prescientmoon` on discord in order to resolve the issue for you & future users playing this chart! 3. The image you provided contains character art that covers the chart name. When this happens, I try to make use of the jacket art in order to determine the chart. It is possible that I've never seen the jacket art for this particular song on this particular difficulty. Contact `@prescientmoon` on discord in order to resolve the issue for you & future users playing this chart!
Nerdy info: Nerdy info:
``` ```

View file

@ -1,5 +1,6 @@
#![warn(clippy::str_to_string)] #![warn(clippy::str_to_string)]
#![feature(iter_map_windows)] #![feature(iter_map_windows)]
#![feature(let_chains)]
mod chart; mod chart;
mod commands; mod commands;

View file

@ -5,6 +5,7 @@ use std::{
sync::{Mutex, OnceLock}, sync::{Mutex, OnceLock},
}; };
use edit_distance::edit_distance;
use image::{DynamicImage, GenericImageView}; use image::{DynamicImage, GenericImageView};
use num::Rational64; use num::Rational64;
use poise::serenity_prelude::{Attachment, AttachmentId, CreateAttachment, CreateEmbed}; use poise::serenity_prelude::{Attachment, AttachmentId, CreateAttachment, CreateEmbed};
@ -100,6 +101,15 @@ impl RelativeRect {
} }
} }
/// Clamps the values apropriately
#[inline]
pub fn fix(&mut self) {
self.x = self.x.max(0.);
self.y = self.y.max(0.);
self.width = self.width.min(1. - self.x);
self.height = self.height.min(1. - self.y);
}
#[inline] #[inline]
pub fn to_absolute(&self) -> AbsoluteRect { pub fn to_absolute(&self) -> AbsoluteRect {
AbsoluteRect::new( AbsoluteRect::new(
@ -168,6 +178,7 @@ fn widen_by(rects: &mut Vec<RelativeRect>, x: f32, y: f32) {
rect.y -= y; rect.y -= y;
rect.width += 2. * x; rect.width += 2. * x;
rect.height += 2. * y; rect.height += 2. * y;
rect.fix();
} }
} }
// }}} // }}}
@ -228,7 +239,7 @@ fn title_rects() -> &'static [RelativeRect] {
AbsoluteRect::new(760, 128, 1270, 118, ImageDimensions::new(2778, 1284)).to_relative(), AbsoluteRect::new(760, 128, 1270, 118, ImageDimensions::new(2778, 1284)).to_relative(),
]; ];
process_datapoints(&mut rects); process_datapoints(&mut rects);
widen_by(&mut rects, 0.1, 0.0); widen_by(&mut rects, 0.3, 0.0);
rects rects
}) })
} }
@ -550,7 +561,7 @@ impl ImageCropper {
PageSegMode::PsmRawLine, PageSegMode::PsmRawLine,
PageSegMode::PsmSingleLine, PageSegMode::PsmSingleLine,
] { ] {
let result = self.read_score_with_mode(image, mode)?; let result = self.read_score_with_mode(mode)?;
results.push(result.0); results.push(result.0);
// OCR sometimes loses digits // OCR sometimes loses digits
if result.0 < 1_000_000 { if result.0 < 1_000_000 {
@ -567,26 +578,23 @@ impl ImageCropper {
unreachable!() unreachable!()
} }
pub fn read_score_with_mode( fn read_score_with_mode(&mut self, mode: PageSegMode) -> Result<Score, Error> {
&mut self,
image: &DynamicImage,
mode: PageSegMode,
) -> Result<Score, Error> {
let mut t = Tesseract::new(None, Some("eng"))? let mut t = Tesseract::new(None, Some("eng"))?
// .set_variable("classify_bln_numeric_mode", "1'")? // .set_variable("classify_bln_numeric_mode", "1'")?
.set_variable("tessedit_char_whitelist", "0123456789'")? .set_variable("tessedit_char_whitelist", "0123456789'")?
.set_image_from_mem(&self.bytes)?; .set_image_from_mem(&self.bytes)?;
t.set_page_seg_mode(mode); t.set_page_seg_mode(mode);
t = t.recognize()?; t = t.recognize()?;
let conf = t.mean_text_conf();
if conf < 10 && conf != 0 { // Disabled, as this was super unreliable
Err(format!( // let conf = t.mean_text_conf();
"Score text is not readable (confidence = {}, text = {}).", // if conf < 10 && conf != 0 {
conf, // Err(format!(
t.get_text()?.trim() // "Score text is not readable (confidence = {}, text = {}).",
))?; // conf,
} // t.get_text()?.trim()
// ))?;
// }
let text: String = t let text: String = t
.get_text()? .get_text()?
@ -622,9 +630,7 @@ impl ImageCropper {
let difficulty = Difficulty::DIFFICULTIES let difficulty = Difficulty::DIFFICULTIES
.iter() .iter()
.zip(Difficulty::DIFFICULTY_STRINGS) .zip(Difficulty::DIFFICULTY_STRINGS)
.min_by_key(|(_, difficulty_string)| { .min_by_key(|(_, difficulty_string)| edit_distance(difficulty_string, text))
edit_distance::edit_distance(difficulty_string, text)
})
.map(|(difficulty, _)| *difficulty) .map(|(difficulty, _)| *difficulty)
.ok_or_else(|| format!("Unrecognised difficulty '{}'", text))?; .ok_or_else(|| format!("Unrecognised difficulty '{}'", text))?;
@ -647,39 +653,49 @@ impl ImageCropper {
let mut t = Tesseract::new(None, Some("eng"))? let mut t = Tesseract::new(None, Some("eng"))?
.set_variable( .set_variable(
"tessedit_char_whitelist", "tessedit_char_whitelist",
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 ", "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789,.()- ",
)? )?
.set_image_from_mem(&self.bytes)?; .set_image_from_mem(&self.bytes)?;
t.set_page_seg_mode(PageSegMode::PsmSingleLine); t.set_page_seg_mode(PageSegMode::PsmSingleLine);
t = t.recognize()?; t = t.recognize()?;
// if t.mean_text_conf() < 10 {
// Err("Difficulty text is not readable.")?;
// }
let raw_text: &str = &t.get_text()?; let raw_text: &str = &t.get_text()?;
let raw_text = raw_text.trim(); // not quite raw 🤔 let raw_text = raw_text.trim(); // not quite raw 🤔
let mut text = raw_text; let mut text: &str = &raw_text.to_lowercase();
let conf = t.mean_text_conf();
if conf < 20 && conf != 0 {
Err(format!(
"Title text is not readable (confidence = {}, text = {}).",
conf, raw_text
))?;
}
println!("Raw text: {}, confidence: {}", text, t.mean_text_conf()); println!("Raw text: {}, confidence: {}", text, t.mean_text_conf());
let lock = cache.lock().map_err(|_| "Poisoned song cache")?; let lock = cache.lock().map_err(|_| "Poisoned song cache")?;
let cached_song = loop { let cached_song = loop {
let (closest, distance) = lock let close_enough: Vec<_> = lock
.songs() .songs()
.map(|item| { .map(|item| {
( let song_title = item.song.title.to_lowercase();
item, let shortest_len = Ord::min(song_title.len(), text.len());
edit_distance::edit_distance( let mut smallest_distance = edit_distance(&text, &song_title);
&item.song.title.to_lowercase(),
&text.to_lowercase(),
),
)
})
.min_by_key(|(_, d)| *d)
.ok_or_else(|| "Empty song cache")?;
if distance > closest.song.title.len() / 3 { if let Some(sliced) = &song_title.get(..shortest_len)
&& text.len() >= 6
{
// We want to make this route super costly, which is why we multiply by 50
smallest_distance =
smallest_distance.min(50 * edit_distance(&text, sliced));
}
(item, smallest_distance)
})
.filter(|(item, d)| *d < item.song.title.len() / 3)
.collect();
if close_enough.len() == 0 {
if text.len() == 1 { if text.len() == 1 {
Err(format!( Err(format!(
"Could not find match for chart name '{}'", "Could not find match for chart name '{}'",
@ -688,8 +704,13 @@ impl ImageCropper {
} else { } else {
text = &text[..text.len() - 1]; text = &text[..text.len() - 1];
} }
} else if close_enough.len() == 1 {
break close_enough[0].0;
} else { } else {
break closest; Err(format!(
"Name '{}' is too vague to choose a match",
raw_text
))?;
}; };
}; };