136 lines
4.2 KiB
Rust
136 lines
4.2 KiB
Rust
use std::io::prelude::*;
|
|
use std::{collections::BTreeMap, fs::File, io::BufReader, path::Path};
|
|
|
|
use itertools::Itertools;
|
|
use nom::branch::alt;
|
|
use nom::bytes::complete::{tag, take_until};
|
|
use nom::character::complete::{alphanumeric1, space0};
|
|
use nom::multi::many1;
|
|
use nom::sequence::delimited;
|
|
use nom::IResult;
|
|
|
|
fn tags(s: &str) -> IResult<&str, Vec<&str>> {
|
|
many1(alt((quoted_tag, word_tag)))(s)
|
|
}
|
|
|
|
fn word_tag(s: &str) -> IResult<&str, &str> {
|
|
let (rest, tag) = delimited(space0, alphanumeric1, space0)(s)?;
|
|
Ok((rest, tag))
|
|
}
|
|
|
|
fn quoted_tag(s: &str) -> IResult<&str, &str> {
|
|
let (rest, tag) = delimited(
|
|
space0,
|
|
delimited(tag("\""), take_until("\""), tag("\"")),
|
|
space0,
|
|
)(s)?;
|
|
Ok((rest, tag))
|
|
}
|
|
|
|
pub struct TagSuggestions {
|
|
map: BTreeMap<String, String>,
|
|
}
|
|
|
|
impl TagSuggestions {
|
|
pub fn read_from_file<P: AsRef<Path>>(path: P) -> Result<Self, &'static str> {
|
|
let mut map = BTreeMap::new();
|
|
|
|
// TODO: better configuration of file
|
|
let file = File::open(path).expect("Failed to load tag terms...");
|
|
let buf_reader = BufReader::new(file);
|
|
for line in buf_reader.lines().map(|l| l.unwrap()) {
|
|
// TODO: no more expect, deduplication, error handling?
|
|
let (_, tag_list) = tags(&line).expect("invalid tag");
|
|
let first = tag_list.first().expect("at least one").to_string();
|
|
for tag in tag_list {
|
|
map.insert(tag.to_string(), first.to_string());
|
|
}
|
|
}
|
|
|
|
Ok(TagSuggestions { map })
|
|
}
|
|
|
|
pub fn get_canonical<'a>(&'a self, input: &'a String) -> &String {
|
|
self.map.get(input).unwrap_or(input)
|
|
}
|
|
|
|
pub fn get_suggestions<'a>(&'a self, input: &'a str) -> impl Iterator<Item = &String> {
|
|
input
|
|
.split([' ', std::path::MAIN_SEPARATOR, '_', '-', '.']) // todo: pascal case detection
|
|
.filter_map(|i| self.map.get(&i.to_lowercase()))
|
|
.unique()
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn parse() {
|
|
assert_eq!(
|
|
tags("test testing \"this is a test\" \"this is another test\" "),
|
|
Ok((
|
|
"",
|
|
vec!["test", "testing", "this is a test", "this is another test"]
|
|
))
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn load_suggestions() {
|
|
let mut map = BTreeMap::new();
|
|
map.insert("foo".to_string(), "foo".to_string());
|
|
map.insert("bar".to_string(), "foo".to_string());
|
|
map.insert("baz".to_string(), "foo".to_string());
|
|
map.insert("apple".to_string(), "fruit".to_string());
|
|
map.insert("fruit".to_string(), "fruit".to_string());
|
|
let suggestions = TagSuggestions { map };
|
|
|
|
assert_eq!(suggestions.get_canonical(&"baz".to_string()), "foo");
|
|
assert_eq!(suggestions.get_canonical(&"foo".to_string()), "foo");
|
|
assert_eq!(suggestions.get_canonical(&"apple".to_string()), "fruit");
|
|
}
|
|
|
|
#[test]
|
|
fn load_file() {
|
|
let suggestions = TagSuggestions::read_from_file("tag_terms.txt").unwrap();
|
|
assert_eq!(suggestions.get_canonical(&"aux".to_string()), "aux");
|
|
assert_eq!(
|
|
suggestions.get_canonical(&"fromblespatz".to_string()),
|
|
"fromblespatz"
|
|
);
|
|
|
|
assert_eq!(suggestions.get_canonical(&"auxiliary".to_string()), "aux");
|
|
}
|
|
|
|
#[test]
|
|
fn test_suggestions() {
|
|
let suggestions = TagSuggestions::read_from_file("tag_terms.txt").unwrap();
|
|
|
|
let res: Vec<&String> = suggestions
|
|
.get_suggestions([
|
|
"auxiliary",
|
|
"frangle",
|
|
"scratches",
|
|
"warm",
|
|
"warm",
|
|
"SAMPLE001",
|
|
])
|
|
.collect();
|
|
assert_eq!(res, vec!["aux", "scratch", "warm"]);
|
|
|
|
let res: Vec<&String> = suggestions
|
|
.get_suggestions([
|
|
"planet", "of", "the", "breaks", "1", "vec1", "cymbals", "open", "hh", "oh", "001",
|
|
])
|
|
.collect();
|
|
assert_eq!(res, vec!["break", "cymbal", "hihat"]);
|
|
|
|
let res: Vec<&String> = suggestions
|
|
.get_suggestions(["vec1", "long", "basses", "warm", "synthesizer"])
|
|
.collect();
|
|
assert_eq!(res, vec!["long", "bass", "warm", "synth"]);
|
|
}
|
|
}
|