better support for smaller splits.

2023-02-02 15:30:11 +00:00 · 2023-02-02 15:30:11 +00:00 · edfc826daa
commit edfc826daa
parent 2c1c6424d1
2 changed files with 72 additions and 11 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@ -5,5 +5,11 @@ edition = "2021"

 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

+[profile.release]
+strip = true  # Automatically strip symbols from the binary.
+opt-level = "z"  # Optimize for size.
+lto = true
+codegen-units = 1
+
 [dependencies]
 clap = { version = "3.2.20", features = ["derive"] }
--- a/src/main.rs
+++ b/src/main.rs
@ -25,47 +25,85 @@ fn main() -> io::Result<()> {
    let mut results: Vec<String> = Vec::new();
    let mut buffer = String::new();
    let lines = io::stdin().lines();
+    let mut first_word = false;
    for line in lines {
        // read line to variable
-        let inline = line.unwrap();
-        // check if line is bigger then 2000 characters and drop it
-        if inline.len() as u32 > args.count
+        let inline = line.unwrap().trim().to_string();
+        println!("line: {:?}",inline);
+        // check if line is bigger then 2000 characters and if so split it
+        if (inline.len()+1) as u32 > args.count
        {
            let res:Vec<&str> = inline.split(' ').collect();
-            for x in res
-            {
+            for x in res {
+                // check if the "word" is longer then the length it is going to be split into
                if x.len() as u32 > args.count
                {
-                    println!("{} was dropped because it was longer then {}",x,args.count);
-                    continue;
+                    // seperate into length chunks
+                    let result = sub_strings(x, args.count as usize);
+                    for x in result {
+                        //if the buffer + word is longer then the split number 
+                        if (buffer.len() + x.len()) as u32 > args.count
+                        {
+                            buffer = buffer.trim().to_string();
+                            buffer.push('\n');
+                            results.push(buffer);
+                            buffer = x.to_owned();
+                            println!("{:?}",buffer);
+                        } else { // if buffer+word is less then split number just add it
+                            if !first_word {
+                                buffer.push(' ');
+                            } else {
+                                first_word=false;
+                            }
+                            buffer.push_str(x);
+                            buffer.push(' ');
+                        }
+                    }
                }
+                //if the buffer + word is longer then the split number 
                if (buffer.len() + x.len()) as u32 > args.count
                {
+                    buffer=buffer.trim().to_string();
                    buffer.push('\n');
                    results.push(buffer);
+                    first_word=false;
                    buffer = x.to_owned();
-                } else {
+                } else { // if buffer+word is less then split number just add it
+                    println!("{:?}",buffer);
+                    if !first_word {
+                        buffer.push(' ');
+                    } else {
+                        first_word=false;
+                    }
                    buffer.push_str(x);
-                    buffer.push(' ');
+                    println!("{:?}",buffer);
                }
            }
-
+            buffer.push('\n');
+            first_word=true;
+            continue;
        }
        // check if buffer + current line would be bigger then 2000 characters (discord without
        // nitros limit
-        if (buffer.len() + inline.len()) as u32 >= args.count
+        if (buffer.len() + inline.len()) as u32 > args.count
        {
+            buffer=buffer.trim().to_string();
+            buffer.push('\n');
            results.push(buffer);
            buffer = inline;
            buffer.push('\n');
+            first_word=true;
        }
        // else append current line to buffer
        else
        {
            buffer.push_str(inline.as_str());
            buffer.push('\n');
+            first_word=true;
        }
    }
+    buffer = buffer.trim().to_owned();
+    buffer.push('\n');
    results.push(buffer);
    let zeropad_length = format!("{}",results.len()).len();
    let mut counter = 0;
@ -83,3 +121,20 @@ fn main() -> io::Result<()> {
    }
    Ok(())
 }
+
+// by juggle-tux at https://users.rust-lang.org/t/solved-how-to-split-string-into-multiple-sub-strings-with-given-length/10542/8
+fn sub_strings(string: &str, sub_len: usize) -> Vec<&str> {
+    let mut subs = Vec::with_capacity(string.len() / sub_len);
+    let mut iter = string.chars();
+    let mut pos = 0;
+
+    while pos < string.len() {
+        let mut len = 0;
+        for ch in iter.by_ref().take(sub_len) {
+            len += ch.len_utf8();
+        }
+        subs.push(&string[pos..pos + len]);
+        pos += len;
+    }
+    subs
+}