sample-amp/src/tag_terms.rs

136 lines
4.2 KiB
Rust

use std::io::prelude::*;
use std::{collections::BTreeMap, fs::File, io::BufReader, path::Path};
use itertools::Itertools;
use nom::branch::alt;
use nom::bytes::complete::{tag, take_until};
use nom::character::complete::{alphanumeric1, space0};
use nom::multi::many1;
use nom::sequence::delimited;
use nom::IResult;
fn tags(s: &str) -> IResult<&str, Vec<&str>> {
many1(alt((quoted_tag, word_tag)))(s)
}
fn word_tag(s: &str) -> IResult<&str, &str> {
let (rest, tag) = delimited(space0, alphanumeric1, space0)(s)?;
Ok((rest, tag))
}
fn quoted_tag(s: &str) -> IResult<&str, &str> {
let (rest, tag) = delimited(
space0,
delimited(tag("\""), take_until("\""), tag("\"")),
space0,
)(s)?;
Ok((rest, tag))
}
pub struct TagSuggestions {
map: BTreeMap<String, String>,
}
impl TagSuggestions {
pub fn read_from_file<P: AsRef<Path>>(path: P) -> Result<Self, &'static str> {
let mut map = BTreeMap::new();
// TODO: better configuration of file
let file = File::open(path).expect("Failed to load tag terms...");
let buf_reader = BufReader::new(file);
for line in buf_reader.lines().map(|l| l.unwrap()) {
// TODO: no more expect, deduplication, error handling?
let (_, tag_list) = tags(&line).expect("invalid tag");
let first = tag_list.first().expect("at least one").to_string();
for tag in tag_list {
map.insert(tag.to_string(), first.to_string());
}
}
Ok(TagSuggestions { map })
}
pub fn get_canonical<'a>(&'a self, input: &'a String) -> &String {
self.map.get(input).unwrap_or(input)
}
pub fn get_suggestions<'a>(&'a self, input: &'a str) -> impl Iterator<Item = &String> {
input
.split([' ', std::path::MAIN_SEPARATOR, '_', '-', '.']) // todo: pascal case detection
.filter_map(|i| self.map.get(&i.to_lowercase()))
.unique()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parse() {
assert_eq!(
tags("test testing \"this is a test\" \"this is another test\" "),
Ok((
"",
vec!["test", "testing", "this is a test", "this is another test"]
))
);
}
#[test]
fn load_suggestions() {
let mut map = BTreeMap::new();
map.insert("foo".to_string(), "foo".to_string());
map.insert("bar".to_string(), "foo".to_string());
map.insert("baz".to_string(), "foo".to_string());
map.insert("apple".to_string(), "fruit".to_string());
map.insert("fruit".to_string(), "fruit".to_string());
let suggestions = TagSuggestions { map };
assert_eq!(suggestions.get_canonical(&"baz".to_string()), "foo");
assert_eq!(suggestions.get_canonical(&"foo".to_string()), "foo");
assert_eq!(suggestions.get_canonical(&"apple".to_string()), "fruit");
}
#[test]
fn load_file() {
let suggestions = TagSuggestions::read_from_file("tag_terms.txt").unwrap();
assert_eq!(suggestions.get_canonical(&"aux".to_string()), "aux");
assert_eq!(
suggestions.get_canonical(&"fromblespatz".to_string()),
"fromblespatz"
);
assert_eq!(suggestions.get_canonical(&"auxiliary".to_string()), "aux");
}
#[test]
fn test_suggestions() {
let suggestions = TagSuggestions::read_from_file("tag_terms.txt").unwrap();
let res: Vec<&String> = suggestions
.get_suggestions([
"auxiliary",
"frangle",
"scratches",
"warm",
"warm",
"SAMPLE001",
])
.collect();
assert_eq!(res, vec!["aux", "scratch", "warm"]);
let res: Vec<&String> = suggestions
.get_suggestions([
"planet", "of", "the", "breaks", "1", "vec1", "cymbals", "open", "hh", "oh", "001",
])
.collect();
assert_eq!(res, vec!["break", "cymbal", "hihat"]);
let res: Vec<&String> = suggestions
.get_suggestions(["vec1", "long", "basses", "warm", "synthesizer"])
.collect();
assert_eq!(res, vec!["long", "bass", "warm", "synth"]);
}
}