From f44330d3fbceaed77fcc727c426d59bd7cc274d8 Mon Sep 17 00:00:00 2001 From: Clemens Wehrmann Date: Tue, 16 Aug 2016 23:19:09 +0200 Subject: [PATCH] Add a script that writes out links inline for printing Closes #185 --- src/bin/link2print.rs | 396 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 396 insertions(+) create mode 100644 src/bin/link2print.rs diff --git a/src/bin/link2print.rs b/src/bin/link2print.rs new file mode 100644 index 0000000..b14c43a --- /dev/null +++ b/src/bin/link2print.rs @@ -0,0 +1,396 @@ +extern crate regex; + +use std::collections::HashMap; +use std::io; +use std::io::{Read, Write}; +use regex::{Regex, Captures}; + +fn main() { + + write_md(parse_links(parse_references(read_md()))); +} + +fn read_md() -> String { + let mut buffer = String::new(); + match io::stdin().read_to_string(&mut buffer) { + Ok(_) => buffer, + Err(error) => panic!(error), + } +} + +fn write_md(output: String) { + write!(io::stdout(), "{}", output).unwrap(); +} + +fn parse_references(buffer: String) -> (String, HashMap) { + let mut ref_map = HashMap::new(); + // TODO: Currently doesn't handle "title" in following line + let re = Regex::new(r###"(?m)\n?^ {0,3}\[([^]]+)\]:[[:blank:]]*(.*)$"###).unwrap(); + let output = re.replace_all(&buffer, |caps: &Captures| { + let key = caps.at(1).unwrap().to_owned().to_uppercase(); + let val = caps.at(2).unwrap().to_owned(); + if ref_map.insert(key, val).is_some() { + panic!("Did not expect markdown page to have duplicate reference"); + } + "".to_string() + }); + (output, ref_map) +} + +fn parse_links((buffer, ref_map): (String, HashMap)) -> String { + // TODO: check which punctuation is allowed by spec + let re = Regex::new(r###"(?:(?P
(?:```(?:[^`]|`[^`])*`?\n```\n)|(?:[^[]`[^`\n]+[\n]?[^`\n]*`))|(?:\[(?P[^]]+)\](?:(?:\([[:blank:]]*(?P[^")]*[^ ])(?:[[:blank:]]*"[^"]*")?\))|(?:\[(?P[^]]*)\]))?))"###).unwrap();
+    let output = re.replace_all(&buffer, |caps: &Captures| {
+        match caps.name("pre") {
+            Some(pre_section) => format!("{}", pre_section.to_owned()),
+            None => {
+                let name = caps.name("name").unwrap().to_owned();
+                let val = match caps.name("val") {
+                    // [name](link)
+                    Some(value) => value.to_owned(),
+                    None => {
+                        match caps.name("key") {
+                            Some(key) => {
+                                match key {
+                                    // [name][]
+                                    "" => format!("{}", ref_map.get(&name.to_uppercase()).unwrap()),
+                                    // [name][reference]
+                                    _ => format!("{}", ref_map.get(&key.to_uppercase()).unwrap()),
+                                }
+                            }
+                            // [name] as reference
+                            None => format!("{}", ref_map.get(&name.to_uppercase()).unwrap()),
+                        }
+                    }
+                };
+                format!("{} at *{}*", name, val)
+            }
+        }
+    });
+    output
+}
+
+#[cfg(test)]
+mod tests {
+    fn parse(source: String) -> String {
+        super::parse_links(super::parse_references(source))
+    }
+
+    #[test]
+    fn parses_inline_link() {
+        let source = r"This is a [link](http://google.com) that should be expanded".to_string();
+        let target = r"This is a link at *http://google.com* that should be expanded".to_string();
+        assert_eq!(parse(source), target);
+    }
+
+    #[test]
+    fn parses_multiline_links() {
+        let source = r"This is a [link](http://google.com) that
+should appear expanded. Another [location](/here/) and [another](http://gogogo)"
+            .to_string();
+        let target = r"This is a link at *http://google.com* that
+should appear expanded. Another location at */here/* and another at *http://gogogo*"
+            .to_string();
+        assert_eq!(parse(source), target);
+    }
+
+    #[test]
+    fn parses_reference() {
+        let source = r"This is a [link][theref].
+[theref]: http://example.com/foo
+more text"
+            .to_string();
+        let target = r"This is a link at *http://example.com/foo*.
+more text"
+            .to_string();
+        assert_eq!(parse(source), target);
+    }
+
+    #[test]
+    fn parses_implicit_link() {
+        let source = r"This is an [implicit][] link.
+[implicit]: /The Link/"
+            .to_string();
+        let target = r"This is an implicit at */The Link/* link.".to_string();
+        assert_eq!(parse(source), target);
+    }
+    #[test]
+    fn parses_refs_with_one_space_indentation() {
+        let source = r"This is a [link][ref]
+ [ref]: The link"
+            .to_string();
+        let target = r"This is a link at *The link*".to_string();
+        assert_eq!(parse(source), target);
+    }
+
+    #[test]
+    fn parses_refs_with_two_space_indentation() {
+        let source = r"This is a [link][ref]
+  [ref]: The link"
+            .to_string();
+        let target = r"This is a link at *The link*".to_string();
+        assert_eq!(parse(source), target);
+    }
+
+    #[test]
+    fn parses_refs_with_three_space_indentation() {
+        let source = r"This is a [link][ref]
+   [ref]: The link"
+            .to_string();
+        let target = r"This is a link at *The link*".to_string();
+        assert_eq!(parse(source), target);
+    }
+
+    #[test]
+    #[should_panic]
+    fn rejects_refs_with_four_space_indentation() {
+        let source = r"This is a [link][ref]
+    [ref]: The link"
+            .to_string();
+        let target = r"This is a link at *The link*".to_string();
+        assert_eq!(parse(source), target);
+    }
+
+    #[test]
+    fn ignores_optional_inline_title() {
+        let source = r###"This is a titled [link](http://example.com "My title")."###.to_string();
+        let target = r"This is a titled link at *http://example.com*.".to_string();
+        assert_eq!(parse(source), target);
+    }
+
+    #[test]
+    fn parses_title_with_puctuation() {
+        let source = r###"[link](http://example.com "It's Title")"###.to_string();
+        let target = r"link at *http://example.com*".to_string();
+        assert_eq!(parse(source), target);
+    }
+
+    #[test]
+    fn parses_name_with_punctuation() {
+        let source = r###"[I'm here](there)"###.to_string();
+        let target = r###"I'm here at *there*"###.to_string();
+        assert_eq!(parse(source), target);
+    }
+    #[test]
+    fn parses_name_with_utf8() {
+        let source = r###"[user’s forum](the user’s forum)"###.to_string();
+        let target = r###"user’s forum at *the user’s forum*"###.to_string();
+        assert_eq!(parse(source), target);
+    }
+
+
+    #[test]
+    fn parses_reference_with_punctuation() {
+        let source = r###"[link][the ref-ref]
+[the ref-ref]:http://example.com/ref-ref"###
+            .to_string();
+        let target = r###"link at *http://example.com/ref-ref*"###.to_string();
+        assert_eq!(parse(source), target);
+    }
+
+    #[test]
+    fn parses_reference_case_insensitively() {
+        let source = r"[link][Ref]
+[ref]: The reference"
+            .to_string();
+        let target = r"link at *The reference*".to_string();
+        assert_eq!(parse(source), target);
+    }
+    #[test]
+    fn parses_link_as_reference_when_reference_is_empty() {
+        let source = r"[link as reference][]
+[link as reference]: the actual reference"
+            .to_string();
+        let target = r"link as reference at *the actual reference*".to_string();
+        assert_eq!(parse(source), target);
+    }
+
+    #[test]
+    fn parses_link_without_reference_as_reference() {
+        let source = r"[link] is alone
+[link]: The contents"
+            .to_string();
+        let target = r"link at *The contents* is alone".to_string();
+        assert_eq!(parse(source), target);
+    }
+
+    #[test]
+    #[ignore]
+    fn parses_link_without_reference_as_reference_with_asterisks() {
+        let source = r"*[link]* is alone
+[link]: The contents"
+            .to_string();
+        let target = r"*link* at *The contents* is alone".to_string();
+        assert_eq!(parse(source), target);
+    }
+    #[test]
+    fn ignores_links_in_pre_sections() {
+        let source = r###"```toml
+[package]
+name = "hello_cargo"
+version = "0.1.0"
+authors = ["Your Name "]
+
+[dependencies]
+```
+"###
+            .to_string();
+        let target = source.clone();
+        assert_eq!(parse(source), target);
+    }
+
+    #[test]
+    fn ignores_links_in_quoted_sections() {
+        let source = r###"do not change `[package]`."###.to_string();
+        let target = source.clone();
+        assert_eq!(parse(source), target);
+    }
+    #[test]
+    fn ignores_links_in_quoted_sections_containing_newlines() {
+        let source = r"do not change `this [package]
+is still here` [link](ref)"
+            .to_string();
+        let target = r"do not change `this [package]
+is still here` link at *ref*"
+            .to_string();
+        assert_eq!(parse(source), target);
+    }
+
+    #[test]
+    fn ignores_links_in_pre_sections_while_still_handling_links() {
+        let source = r###"```toml
+[package]
+name = "hello_cargo"
+version = "0.1.0"
+authors = ["Your Name "]
+
+[dependencies]
+```
+Another [link]
+more text
+[link]: http://gohere
+"###
+            .to_string();
+        let target = r###"```toml
+[package]
+name = "hello_cargo"
+version = "0.1.0"
+authors = ["Your Name "]
+
+[dependencies]
+```
+Another link at *http://gohere*
+more text
+"###
+            .to_string();
+        assert_eq!(parse(source), target);
+    }
+    #[test]
+    fn ignores_quotes_in_pre_sections() {
+        let source = r###"```bash
+$ cargo build
+   Compiling guessing_game v0.1.0 (file:///projects/guessing_game)
+src/main.rs:23:21: 23:35 error: mismatched types [E0308]
+src/main.rs:23     match guess.cmp(&secret_number) {
+                                   ^~~~~~~~~~~~~~
+src/main.rs:23:21: 23:35 help: run `rustc --explain E0308` to see a detailed explanation
+src/main.rs:23:21: 23:35 note: expected type `&std::string::String`
+src/main.rs:23:21: 23:35 note:    found type `&_`
+error: aborting due to previous error
+Could not compile `guessing_game`.
+```
+"###
+            .to_string();
+        let target = source.clone();
+        assert_eq!(parse(source), target);
+    }
+    #[test]
+    fn ignores_short_quotes() {
+        let source = r"to `1` at index `[0]` i".to_string();
+        let target = source.clone();
+        assert_eq!(parse(source), target);
+    }
+    #[test]
+    fn ignores_pre_sections_with_final_quote() {
+        let source = r###"```bash
+$ cargo run
+   Compiling points v0.1.0 (file:///projects/points)
+error: the trait bound `Point: std::fmt::Display` is not satisfied [--explain E0277]
+ --> src/main.rs:8:29
+8 |>     println!("Point 1: {}", p1);
+  |>                             ^^
+:2:27: 2:58: note: in this expansion of format_args!
+:3:1: 3:54: note: in this expansion of print! (defined in )
+src/main.rs:8:5: 8:33: note: in this expansion of println! (defined in )
+note: `Point` cannot be formatted with the default formatter; try using `:?` instead if you are using a format string
+note: required by `std::fmt::Display::fmt`
+```
+`here` is another [link](the ref)
+"###.to_string();
+        let target = r###"```bash
+$ cargo run
+   Compiling points v0.1.0 (file:///projects/points)
+error: the trait bound `Point: std::fmt::Display` is not satisfied [--explain E0277]
+ --> src/main.rs:8:29
+8 |>     println!("Point 1: {}", p1);
+  |>                             ^^
+:2:27: 2:58: note: in this expansion of format_args!
+:3:1: 3:54: note: in this expansion of print! (defined in )
+src/main.rs:8:5: 8:33: note: in this expansion of println! (defined in )
+note: `Point` cannot be formatted with the default formatter; try using `:?` instead if you are using a format string
+note: required by `std::fmt::Display::fmt`
+```
+`here` is another link at *the ref*
+"###.to_string();
+        assert_eq!(parse(source), target);
+    }
+    #[test]
+    fn parses_adam_p_cheatsheet() {
+        let source = r###"[I'm an inline-style link](https://www.google.com)
+
+[I'm an inline-style link with title](https://www.google.com "Google's Homepage")
+
+[I'm a reference-style link][Arbitrary case-insensitive reference text]
+
+[I'm a relative reference to a repository file](../blob/master/LICENSE)
+
+[You can use numbers for reference-style link definitions][1]
+
+Or leave it empty and use the [link text itself][].
+
+URLs and URLs in angle brackets will automatically get turned into links.
+http://www.example.com or  and sometimes
+example.com (but not on Github, for example).
+
+Some text to show that the reference links can follow later.
+
+[arbitrary case-insensitive reference text]: https://www.mozilla.org
+[1]: http://slashdot.org
+[link text itself]: http://www.reddit.com"###
+            .to_string();
+
+        let target = r###"I'm an inline-style link at *https://www.google.com*
+
+I'm an inline-style link with title at *https://www.google.com*
+
+I'm a reference-style link at *https://www.mozilla.org*
+
+I'm a relative reference to a repository file at *../blob/master/LICENSE*
+
+You can use numbers for reference-style link definitions at *http://slashdot.org*
+
+Or leave it empty and use the link text itself at *http://www.reddit.com*.
+
+URLs and URLs in angle brackets will automatically get turned into links.
+http://www.example.com or  and sometimes
+example.com (but not on Github, for example).
+
+Some text to show that the reference links can follow later.
+"###
+            .to_string();
+        assert_eq!(parse(source), target);
+    }
+
+
+
+}