Last commit of the day, I guess
Signed-off-by: prescientmoon <git@moonythm.dev>
This commit is contained in:
parent
5c4bfa25c9
commit
ab12acd916
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -3,3 +3,5 @@ target
|
||||||
.envrc
|
.envrc
|
||||||
data/db.sqlite
|
data/db.sqlite
|
||||||
data/jackets
|
data/jackets
|
||||||
|
backups
|
||||||
|
dump.sql
|
||||||
|
|
|
@ -54,9 +54,12 @@ def import_charts_from_csv():
|
||||||
for title, entry in songs.items():
|
for title, entry in songs.items():
|
||||||
artist = None
|
artist = None
|
||||||
|
|
||||||
if title.startswith("Quon"):
|
# Problematic titles that can belong to multiple artists
|
||||||
artist = title[6:-1]
|
for possibility in ["Quon", "Gensis"]:
|
||||||
title = "Quon"
|
if title.startswith(possibility):
|
||||||
|
artist = title[len(possibility) + 2 : -1]
|
||||||
|
title = possibility
|
||||||
|
break
|
||||||
|
|
||||||
row = conn.execute(
|
row = conn.execute(
|
||||||
"""
|
"""
|
||||||
|
@ -96,3 +99,6 @@ subcommand = sys.argv[2]
|
||||||
|
|
||||||
if command == "import" and subcommand == "charts":
|
if command == "import" and subcommand == "charts":
|
||||||
import_charts_from_csv()
|
import_charts_from_csv()
|
||||||
|
&song_title
|
||||||
|
if command == "export" and subcommand == "jackets":
|
||||||
|
import_charts_from_csv()
|
||||||
|
|
|
@ -166,9 +166,9 @@ pub async fn magic(
|
||||||
"Hey! I could not read the score in the provided picture.",
|
"Hey! I could not read the score in the provided picture.",
|
||||||
&format!(
|
&format!(
|
||||||
"This can mean one of three things:
|
"This can mean one of three things:
|
||||||
1) The image you provided is not that of an Arcaea score
|
1. The image you provided is not that of an Arcaea score
|
||||||
2) The image you provided contains a newly added chart that is not in my database yet
|
2. The image you provided contains a newly added chart that is not in my database yet
|
||||||
3) The image you provided contains character art that covers the chart name. When this happens, I try to make use of the jacket art in order to determine the chart. It is possible that I've never seen the jacket art for this particular song on this particular difficulty. Contact `@prescientmoon` on discord in order to resolve the issue for you & future users playing this chart!
|
3. The image you provided contains character art that covers the chart name. When this happens, I try to make use of the jacket art in order to determine the chart. It is possible that I've never seen the jacket art for this particular song on this particular difficulty. Contact `@prescientmoon` on discord in order to resolve the issue for you & future users playing this chart!
|
||||||
|
|
||||||
Nerdy info:
|
Nerdy info:
|
||||||
```
|
```
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
#![warn(clippy::str_to_string)]
|
#![warn(clippy::str_to_string)]
|
||||||
#![feature(iter_map_windows)]
|
#![feature(iter_map_windows)]
|
||||||
|
#![feature(let_chains)]
|
||||||
|
|
||||||
mod chart;
|
mod chart;
|
||||||
mod commands;
|
mod commands;
|
||||||
|
|
95
src/score.rs
95
src/score.rs
|
@ -5,6 +5,7 @@ use std::{
|
||||||
sync::{Mutex, OnceLock},
|
sync::{Mutex, OnceLock},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
use edit_distance::edit_distance;
|
||||||
use image::{DynamicImage, GenericImageView};
|
use image::{DynamicImage, GenericImageView};
|
||||||
use num::Rational64;
|
use num::Rational64;
|
||||||
use poise::serenity_prelude::{Attachment, AttachmentId, CreateAttachment, CreateEmbed};
|
use poise::serenity_prelude::{Attachment, AttachmentId, CreateAttachment, CreateEmbed};
|
||||||
|
@ -100,6 +101,15 @@ impl RelativeRect {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Clamps the values apropriately
|
||||||
|
#[inline]
|
||||||
|
pub fn fix(&mut self) {
|
||||||
|
self.x = self.x.max(0.);
|
||||||
|
self.y = self.y.max(0.);
|
||||||
|
self.width = self.width.min(1. - self.x);
|
||||||
|
self.height = self.height.min(1. - self.y);
|
||||||
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn to_absolute(&self) -> AbsoluteRect {
|
pub fn to_absolute(&self) -> AbsoluteRect {
|
||||||
AbsoluteRect::new(
|
AbsoluteRect::new(
|
||||||
|
@ -168,6 +178,7 @@ fn widen_by(rects: &mut Vec<RelativeRect>, x: f32, y: f32) {
|
||||||
rect.y -= y;
|
rect.y -= y;
|
||||||
rect.width += 2. * x;
|
rect.width += 2. * x;
|
||||||
rect.height += 2. * y;
|
rect.height += 2. * y;
|
||||||
|
rect.fix();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// }}}
|
// }}}
|
||||||
|
@ -228,7 +239,7 @@ fn title_rects() -> &'static [RelativeRect] {
|
||||||
AbsoluteRect::new(760, 128, 1270, 118, ImageDimensions::new(2778, 1284)).to_relative(),
|
AbsoluteRect::new(760, 128, 1270, 118, ImageDimensions::new(2778, 1284)).to_relative(),
|
||||||
];
|
];
|
||||||
process_datapoints(&mut rects);
|
process_datapoints(&mut rects);
|
||||||
widen_by(&mut rects, 0.1, 0.0);
|
widen_by(&mut rects, 0.3, 0.0);
|
||||||
rects
|
rects
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
@ -550,7 +561,7 @@ impl ImageCropper {
|
||||||
PageSegMode::PsmRawLine,
|
PageSegMode::PsmRawLine,
|
||||||
PageSegMode::PsmSingleLine,
|
PageSegMode::PsmSingleLine,
|
||||||
] {
|
] {
|
||||||
let result = self.read_score_with_mode(image, mode)?;
|
let result = self.read_score_with_mode(mode)?;
|
||||||
results.push(result.0);
|
results.push(result.0);
|
||||||
// OCR sometimes loses digits
|
// OCR sometimes loses digits
|
||||||
if result.0 < 1_000_000 {
|
if result.0 < 1_000_000 {
|
||||||
|
@ -567,26 +578,23 @@ impl ImageCropper {
|
||||||
unreachable!()
|
unreachable!()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn read_score_with_mode(
|
fn read_score_with_mode(&mut self, mode: PageSegMode) -> Result<Score, Error> {
|
||||||
&mut self,
|
|
||||||
image: &DynamicImage,
|
|
||||||
mode: PageSegMode,
|
|
||||||
) -> Result<Score, Error> {
|
|
||||||
let mut t = Tesseract::new(None, Some("eng"))?
|
let mut t = Tesseract::new(None, Some("eng"))?
|
||||||
// .set_variable("classify_bln_numeric_mode", "1'")?
|
// .set_variable("classify_bln_numeric_mode", "1'")?
|
||||||
.set_variable("tessedit_char_whitelist", "0123456789'")?
|
.set_variable("tessedit_char_whitelist", "0123456789'")?
|
||||||
.set_image_from_mem(&self.bytes)?;
|
.set_image_from_mem(&self.bytes)?;
|
||||||
t.set_page_seg_mode(mode);
|
t.set_page_seg_mode(mode);
|
||||||
t = t.recognize()?;
|
t = t.recognize()?;
|
||||||
let conf = t.mean_text_conf();
|
|
||||||
|
|
||||||
if conf < 10 && conf != 0 {
|
// Disabled, as this was super unreliable
|
||||||
Err(format!(
|
// let conf = t.mean_text_conf();
|
||||||
"Score text is not readable (confidence = {}, text = {}).",
|
// if conf < 10 && conf != 0 {
|
||||||
conf,
|
// Err(format!(
|
||||||
t.get_text()?.trim()
|
// "Score text is not readable (confidence = {}, text = {}).",
|
||||||
))?;
|
// conf,
|
||||||
}
|
// t.get_text()?.trim()
|
||||||
|
// ))?;
|
||||||
|
// }
|
||||||
|
|
||||||
let text: String = t
|
let text: String = t
|
||||||
.get_text()?
|
.get_text()?
|
||||||
|
@ -622,9 +630,7 @@ impl ImageCropper {
|
||||||
let difficulty = Difficulty::DIFFICULTIES
|
let difficulty = Difficulty::DIFFICULTIES
|
||||||
.iter()
|
.iter()
|
||||||
.zip(Difficulty::DIFFICULTY_STRINGS)
|
.zip(Difficulty::DIFFICULTY_STRINGS)
|
||||||
.min_by_key(|(_, difficulty_string)| {
|
.min_by_key(|(_, difficulty_string)| edit_distance(difficulty_string, text))
|
||||||
edit_distance::edit_distance(difficulty_string, text)
|
|
||||||
})
|
|
||||||
.map(|(difficulty, _)| *difficulty)
|
.map(|(difficulty, _)| *difficulty)
|
||||||
.ok_or_else(|| format!("Unrecognised difficulty '{}'", text))?;
|
.ok_or_else(|| format!("Unrecognised difficulty '{}'", text))?;
|
||||||
|
|
||||||
|
@ -647,39 +653,49 @@ impl ImageCropper {
|
||||||
let mut t = Tesseract::new(None, Some("eng"))?
|
let mut t = Tesseract::new(None, Some("eng"))?
|
||||||
.set_variable(
|
.set_variable(
|
||||||
"tessedit_char_whitelist",
|
"tessedit_char_whitelist",
|
||||||
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 ",
|
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789,.()- ",
|
||||||
)?
|
)?
|
||||||
.set_image_from_mem(&self.bytes)?;
|
.set_image_from_mem(&self.bytes)?;
|
||||||
t.set_page_seg_mode(PageSegMode::PsmSingleLine);
|
t.set_page_seg_mode(PageSegMode::PsmSingleLine);
|
||||||
t = t.recognize()?;
|
t = t.recognize()?;
|
||||||
|
|
||||||
// if t.mean_text_conf() < 10 {
|
|
||||||
// Err("Difficulty text is not readable.")?;
|
|
||||||
// }
|
|
||||||
|
|
||||||
let raw_text: &str = &t.get_text()?;
|
let raw_text: &str = &t.get_text()?;
|
||||||
let raw_text = raw_text.trim(); // not quite raw 🤔
|
let raw_text = raw_text.trim(); // not quite raw 🤔
|
||||||
let mut text = raw_text;
|
let mut text: &str = &raw_text.to_lowercase();
|
||||||
|
|
||||||
|
let conf = t.mean_text_conf();
|
||||||
|
if conf < 20 && conf != 0 {
|
||||||
|
Err(format!(
|
||||||
|
"Title text is not readable (confidence = {}, text = {}).",
|
||||||
|
conf, raw_text
|
||||||
|
))?;
|
||||||
|
}
|
||||||
|
|
||||||
println!("Raw text: {}, confidence: {}", text, t.mean_text_conf());
|
println!("Raw text: {}, confidence: {}", text, t.mean_text_conf());
|
||||||
|
|
||||||
let lock = cache.lock().map_err(|_| "Poisoned song cache")?;
|
let lock = cache.lock().map_err(|_| "Poisoned song cache")?;
|
||||||
let cached_song = loop {
|
let cached_song = loop {
|
||||||
let (closest, distance) = lock
|
let close_enough: Vec<_> = lock
|
||||||
.songs()
|
.songs()
|
||||||
.map(|item| {
|
.map(|item| {
|
||||||
(
|
let song_title = item.song.title.to_lowercase();
|
||||||
item,
|
let shortest_len = Ord::min(song_title.len(), text.len());
|
||||||
edit_distance::edit_distance(
|
let mut smallest_distance = edit_distance(&text, &song_title);
|
||||||
&item.song.title.to_lowercase(),
|
|
||||||
&text.to_lowercase(),
|
|
||||||
),
|
|
||||||
)
|
|
||||||
})
|
|
||||||
.min_by_key(|(_, d)| *d)
|
|
||||||
.ok_or_else(|| "Empty song cache")?;
|
|
||||||
|
|
||||||
if distance > closest.song.title.len() / 3 {
|
if let Some(sliced) = &song_title.get(..shortest_len)
|
||||||
|
&& text.len() >= 6
|
||||||
|
{
|
||||||
|
// We want to make this route super costly, which is why we multiply by 50
|
||||||
|
smallest_distance =
|
||||||
|
smallest_distance.min(50 * edit_distance(&text, sliced));
|
||||||
|
}
|
||||||
|
|
||||||
|
(item, smallest_distance)
|
||||||
|
})
|
||||||
|
.filter(|(item, d)| *d < item.song.title.len() / 3)
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
if close_enough.len() == 0 {
|
||||||
if text.len() == 1 {
|
if text.len() == 1 {
|
||||||
Err(format!(
|
Err(format!(
|
||||||
"Could not find match for chart name '{}'",
|
"Could not find match for chart name '{}'",
|
||||||
|
@ -688,8 +704,13 @@ impl ImageCropper {
|
||||||
} else {
|
} else {
|
||||||
text = &text[..text.len() - 1];
|
text = &text[..text.len() - 1];
|
||||||
}
|
}
|
||||||
|
} else if close_enough.len() == 1 {
|
||||||
|
break close_enough[0].0;
|
||||||
} else {
|
} else {
|
||||||
break closest;
|
Err(format!(
|
||||||
|
"Name '{}' is too vague to choose a match",
|
||||||
|
raw_text
|
||||||
|
))?;
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue