/* markdown: a C implementation of John Gruber's Markdown markup language. * * Copyright (C) 2007 David L Parsons. * The redistribution terms are provided in the COPYRIGHT file that must * be distributed with this source code. */ #include #include #include #include #include #include #include "config.h" #include "cstring.h" #include "markdown.h" #include "amalloc.h" typedef int (*stfu)(const void*,const void*); typedef void (*spanhandler)(MMIOT*,int); /* forward declarations */ static void text(MMIOT *f); static Paragraph *display(Paragraph*, MMIOT*); /* externals from markdown.c */ int __mkd_footsort(Footnote *, Footnote *); /* * push text into the generator input buffer */ static void push(char *bfr, int size, MMIOT *f) { while ( size-- > 0 ) EXPAND(f->in) = *bfr++; } /* look characters ahead of the cursor. */ static inline int peek(MMIOT *f, int i) { i += (f->isp-1); return (i >= 0) && (i < S(f->in)) ? T(f->in)[i] : EOF; } /* pull a byte from the input buffer */ static inline int pull(MMIOT *f) { return ( f->isp < S(f->in) ) ? T(f->in)[f->isp++] : EOF; } /* return a pointer to the current position in the input buffer. */ static inline char* cursor(MMIOT *f) { return T(f->in) + f->isp; } static inline int isthisspace(MMIOT *f, int i) { int c = peek(f, i); if ( c == EOF ) return 1; if ( c & 0x80 ) return 0; return isspace(c) || (c < ' '); } static inline int isthisalnum(MMIOT *f, int i) { int c = peek(f, i); return (c != EOF) && isalnum(c); } static inline int isthisnonword(MMIOT *f, int i) { return isthisspace(f, i) || ispunct(peek(f,i)); } /* return/set the current cursor position */ #define mmiotseek(f,x) (f->isp = x) #define mmiottell(f) (f->isp) /* move n characters forward ( or -n characters backward) in the input buffer. */ static void shift(MMIOT *f, int i) { if (f->isp + i >= 0 ) f->isp += i; } /* Qchar() */ static void Qchar(int c, MMIOT *f) { block *cur; if ( S(f->Q) == 0 ) { cur = &EXPAND(f->Q); memset(cur, 0, sizeof *cur); cur->b_type = bTEXT; } else cur = &T(f->Q)[S(f->Q)-1]; EXPAND(cur->b_text) = c; } /* Qstring() */ static void Qstring(char *s, MMIOT *f) { while (*s) Qchar(*s++, f); } /* Qwrite() */ static void Qwrite(char *s, int size, MMIOT *f) { while (size-- > 0) Qchar(*s++, f); } /* Qprintf() */ static void Qprintf(MMIOT *f, char *fmt, ...) { char bfr[80]; va_list ptr; va_start(ptr,fmt); vsnprintf(bfr, sizeof bfr, fmt, ptr); va_end(ptr); Qstring(bfr, f); } /* Qem() */ static void Qem(MMIOT *f, char c, int count) { block *p = &EXPAND(f->Q); memset(p, 0, sizeof *p); p->b_type = (c == '*') ? bSTAR : bUNDER; p->b_char = c; p->b_count = count; memset(&EXPAND(f->Q), 0, sizeof(block)); } /* generate html from a markup fragment */ void ___mkd_reparse(char *bfr, int size, int flags, MMIOT *f, char *esc) { MMIOT sub; struct escaped e; ___mkd_initmmiot(&sub, f->footnotes); sub.flags = f->flags | flags; sub.cb = f->cb; sub.ref_prefix = f->ref_prefix; if ( esc ) { sub.esc = &e; e.up = f->esc; e.text = esc; } else sub.esc = f->esc; push(bfr, size, &sub); EXPAND(sub.in) = 0; S(sub.in)--; text(&sub); ___mkd_emblock(&sub); Qwrite(T(sub.out), S(sub.out), f); ___mkd_freemmiot(&sub, f->footnotes); } /* * check the escape list for special cases */ static int escaped(MMIOT *f, char c) { struct escaped *thing = f->esc; while ( thing ) { if ( strchr(thing->text, c) ) return 1; thing = thing->up; } return 0; } /* * write out a url, escaping problematic characters */ static void puturl(char *s, int size, MMIOT *f, int display) { unsigned char c; while ( size-- > 0 ) { c = *s++; if ( c == '\\' && size-- > 0 ) { c = *s++; if ( !( ispunct(c) || isspace(c) ) ) Qchar('\\', f); } if ( c == '&' ) Qstring("&", f); else if ( c == '<' ) Qstring("<", f); else if ( c == '"' ) Qstring("%22", f); else if ( isalnum(c) || ispunct(c) || (display && isspace(c)) ) Qchar(c, f); else if ( c == 003 ) /* untokenize ^C */ Qstring(" ", f); else Qprintf(f, "%%%02X", c); } } /* advance forward until the next character is not whitespace */ static int eatspace(MMIOT *f) { int c; for ( ; ((c=peek(f, 1)) != EOF) && isspace(c); pull(f) ) ; return c; } /* (match (a (nested (parenthetical (string.))))) */ static int parenthetical(int in, int out, MMIOT *f) { int size, indent, c; for ( indent=1,size=0; indent; size++ ) { if ( (c = pull(f)) == EOF ) return EOF; else if ( (c == '\\') && (peek(f,1) == out || peek(f,1) == in) ) { ++size; pull(f); } else if ( c == in ) ++indent; else if ( c == out ) --indent; } return size ? (size-1) : 0; } /* extract a []-delimited label from the input stream. */ static int linkylabel(MMIOT *f, Cstring *res) { char *ptr = cursor(f); int size; if ( (size = parenthetical('[',']',f)) != EOF ) { T(*res) = ptr; S(*res) = size; return 1; } return 0; } /* see if the quote-prefixed linky segment is actually a title. */ static int linkytitle(MMIOT *f, char quote, Footnote *ref) { int whence = mmiottell(f); char *title = cursor(f); char *e; register int c; while ( (c = pull(f)) != EOF ) { e = cursor(f); if ( c == quote ) { if ( (c = eatspace(f)) == ')' ) { T(ref->title) = 1+title; S(ref->title) = (e-title)-2; return 1; } } } mmiotseek(f, whence); return 0; } /* extract a =HHHxWWW size from the input stream */ static int linkysize(MMIOT *f, Footnote *ref) { int height=0, width=0; int whence = mmiottell(f); int c; if ( isspace(peek(f,0)) ) { pull(f); /* eat '=' */ for ( c = pull(f); isdigit(c); c = pull(f)) width = (width * 10) + (c - '0'); if ( c == 'x' ) { for ( c = pull(f); isdigit(c); c = pull(f)) height = (height*10) + (c - '0'); if ( isspace(c) ) c = eatspace(f); if ( (c == ')') || ((c == '\'' || c == '"') && linkytitle(f, c, ref)) ) { ref->height = height; ref->width = width; return 1; } } } mmiotseek(f, whence); return 0; } /* extract a <...>-encased url from the input stream. * (markdown 1.0.2b8 compatibility; older versions * of markdown treated the < and > as syntactic * sugar that didn't have to be there. 1.0.2b8 * requires a closing >, and then falls into the * title or closing ) */ static int linkybroket(MMIOT *f, int image, Footnote *p) { int c; int good = 0; T(p->link) = cursor(f); for ( S(p->link)=0; (c = pull(f)) != '>'; ++S(p->link) ) { /* pull in all input until a '>' is found, or die trying. */ if ( c == EOF ) return 0; else if ( (c == '\\') && ispunct(peek(f,2)) ) { ++S(p->link); pull(f); } } c = eatspace(f); /* next nonspace needs to be a title, a size, or ) */ if ( ( c == '\'' || c == '"' ) && linkytitle(f,c,p) ) good=1; else if ( image && (c == '=') && linkysize(f,p) ) good=1; else good=( c == ')' ); if ( good ) { if ( peek(f, 1) == ')' ) pull(f); ___mkd_tidy(&p->link); } return good; } /* linkybroket */ /* extract a (-prefixed url from the input stream. * the label is either of the format ``, where I * extract until I find a >, or it is of the format * `text`, where I extract until I reach a ')', a quote, * or (if image) a '=' */ static int linkyurl(MMIOT *f, int image, Footnote *p) { int c; int mayneedtotrim=0; if ( (c = eatspace(f)) == EOF ) return 0; if ( c == '<' ) { pull(f); if ( !(f->flags & MKD_1_COMPAT) ) return linkybroket(f,image,p); mayneedtotrim=1; } T(p->link) = cursor(f); for ( S(p->link)=0; (c = peek(f,1)) != ')'; ++S(p->link) ) { if ( c == EOF ) return 0; else if ( (c == '"' || c == '\'') && linkytitle(f, c, p) ) break; else if ( image && (c == '=') && linkysize(f, p) ) break; else if ( (c == '\\') && ispunct(peek(f,2)) ) { ++S(p->link); pull(f); } pull(f); } if ( peek(f, 1) == ')' ) pull(f); ___mkd_tidy(&p->link); if ( mayneedtotrim && (T(p->link)[S(p->link)-1] == '>') ) --S(p->link); return 1; } /* prefixes for */ static struct _protocol { char *name; int nlen; } protocol[] = { #define _aprotocol(x) { x, (sizeof x)-1 } _aprotocol( "https:" ), _aprotocol( "http:" ), _aprotocol( "news:" ), _aprotocol( "ftp:" ), #undef _aprotocol }; #define NRPROTOCOLS (sizeof protocol / sizeof protocol[0]) static int isautoprefix(char *text, int size) { int i; struct _protocol *p; for (i=0, p=protocol; i < NRPROTOCOLS; i++, p++) if ( (size >= p->nlen) && strncasecmp(text, p->name, p->nlen) == 0 ) return 1; return 0; } /* * all the tag types that linkylinky can produce are * defined by this structure. */ typedef struct linkytype { char *pat; int szpat; char *link_pfx; /* tag prefix and link pointer (eg: "" */ char *text_sfx; /* text suffix (eg: "" */ int flags; /* reparse flags */ int kind; /* tag is url or something else? */ #define IS_URL 0x01 } linkytype; static linkytype imaget = { 0, 0, "\"",", MKD_NOIMAGE|MKD_TAGTEXT, IS_URL }; static linkytype linkt = { 0, 0, "", "", MKD_NOLINKS, IS_URL }; /* * pseudo-protocols for [][]; * * id: generates tag * class: generates tag * raw: just dump the link without any processing */ static linkytype specials[] = { { "id:", 3, "", "", 0, 0 }, { "raw:", 4, 0, 0, 0, 0, 0, MKD_NOHTML, 0 }, { "lang:", 5, "", "", 0, 0 }, { "abbr:", 5, "", "", 0, 0 }, { "class:", 6, "", "", 0, 0 }, } ; #define NR(x) (sizeof x / sizeof x[0]) /* see if t contains one of our pseudo-protocols. */ static linkytype * pseudo(Cstring t) { int i; linkytype *r; for ( i=0, r=specials; i < NR(specials); i++,r++ ) { if ( (S(t) > r->szpat) && (strncasecmp(T(t), r->pat, r->szpat) == 0) ) return r; } return 0; } /* print out the start of an `img' or `a' tag, applying callbacks as needed. */ static void printlinkyref(MMIOT *f, linkytype *tag, char *link, int size) { char *edit; if ( f->flags & IS_LABEL ) return; Qstring(tag->link_pfx, f); if ( tag->kind & IS_URL ) { if ( f->cb && f->cb->e_url && (edit = (*f->cb->e_url)(link, size, f->cb->e_data)) ) { puturl(edit, strlen(edit), f, 0); if ( f->cb->e_free ) (*f->cb->e_free)(edit, f->cb->e_data); } else puturl(link + tag->szpat, size - tag->szpat, f, 0); } else ___mkd_reparse(link + tag->szpat, size - tag->szpat, MKD_TAGTEXT, f, 0); Qstring(tag->link_sfx, f); if ( f->cb && f->cb->e_flags && (edit = (*f->cb->e_flags)(link, size, f->cb->e_data)) ) { Qchar(' ', f); Qstring(edit, f); if ( f->cb->e_free ) (*f->cb->e_free)(edit, f->cb->e_data); } } /* printlinkyref */ /* helper function for php markdown extra footnotes; allow the user to * define a prefix tag instead of just `fn` */ static char * p_or_nothing(p) MMIOT *p; { return p->ref_prefix ? p->ref_prefix : "fn"; } /* php markdown extra/daring fireball style print footnotes */ static int extra_linky(MMIOT *f, Cstring text, Footnote *ref) { if ( ref->flags & REFERENCED ) return 0; if ( f->flags & IS_LABEL ) ___mkd_reparse(T(text), S(text), linkt.flags, f, 0); else { ref->flags |= REFERENCED; ref->refnumber = ++ f->reference; Qprintf(f, "%d", p_or_nothing(f), ref->refnumber, p_or_nothing(f), ref->refnumber, ref->refnumber); } return 1; } /* extra_linky */ /* print out a linky (or fail if it's Not Allowed) */ static int linkyformat(MMIOT *f, Cstring text, int image, Footnote *ref) { linkytype *tag; if ( image ) tag = &imaget; else if ( tag = pseudo(ref->link) ) { if ( f->flags & (MKD_NO_EXT|MKD_SAFELINK) ) return 0; } else if ( (f->flags & MKD_SAFELINK) && T(ref->link) && (T(ref->link)[0] != '/') && !isautoprefix(T(ref->link), S(ref->link)) ) /* if MKD_SAFELINK, only accept links that are local or * a well-known protocol */ return 0; else tag = &linkt; if ( f->flags & tag->flags ) return 0; if ( f->flags & IS_LABEL ) ___mkd_reparse(T(text), S(text), tag->flags, f, 0); else if ( tag->link_pfx ) { printlinkyref(f, tag, T(ref->link), S(ref->link)); if ( tag->WxH ) { if ( ref->height ) Qprintf(f," height=\"%d\"", ref->height); if ( ref->width ) Qprintf(f, " width=\"%d\"", ref->width); } if ( S(ref->title) ) { Qstring(" title=\"", f); ___mkd_reparse(T(ref->title), S(ref->title), MKD_TAGTEXT, f, 0); Qchar('"', f); } Qstring(tag->text_pfx, f); ___mkd_reparse(T(text), S(text), tag->flags, f, 0); Qstring(tag->text_sfx, f); } else Qwrite(T(ref->link) + tag->szpat, S(ref->link) - tag->szpat, f); return 1; } /* linkyformat */ /* * process embedded links and images */ static int linkylinky(int image, MMIOT *f) { int start = mmiottell(f); Cstring name; Footnote key, *ref; int status = 0; int extra_footnote = 0; CREATE(name); memset(&key, 0, sizeof key); if ( linkylabel(f, &name) ) { if ( peek(f,1) == '(' ) { pull(f); if ( linkyurl(f, image, &key) ) status = linkyformat(f, name, image, &key); } else { int goodlink, implicit_mark = mmiottell(f); if ( isspace(peek(f,1)) ) pull(f); if ( peek(f,1) == '[' ) { pull(f); /* consume leading '[' */ goodlink = linkylabel(f, &key.tag); } else { /* new markdown implicit name syntax doesn't * require a second [] */ mmiotseek(f, implicit_mark); goodlink = !(f->flags & MKD_1_COMPAT); if ( (f->flags & MKD_EXTRA_FOOTNOTE) && (!image) && S(name) && T(name)[0] == '^' ) extra_footnote = 1; } if ( goodlink ) { if ( !S(key.tag) ) { DELETE(key.tag); T(key.tag) = T(name); S(key.tag) = S(name); } if ( ref = bsearch(&key, T(*f->footnotes), S(*f->footnotes), sizeof key, (stfu)__mkd_footsort) ) { if ( extra_footnote ) status = extra_linky(f,name,ref); else status = linkyformat(f, name, image, ref); } } } } DELETE(name); ___mkd_freefootnote(&key); if ( status == 0 ) mmiotseek(f, start); return status; } /* write a character to output, doing text escapes ( & -> &, * > -> > < -> < ) */ static void cputc(int c, MMIOT *f) { switch (c) { case '&': Qstring("&", f); break; case '>': Qstring(">", f); break; case '<': Qstring("<", f); break; default : Qchar(c, f); break; } } /* * convert an email address to a string of nonsense */ static void mangle(char *s, int len, MMIOT *f) { while ( len-- > 0 ) { Qstring("&#", f); Qprintf(f, COINTOSS() ? "x%02x;" : "%02d;", *((unsigned char*)(s++)) ); } } /* nrticks() -- count up a row of tick marks */ static int nrticks(int offset, int tickchar, MMIOT *f) { int tick = 0; while ( peek(f, offset+tick) == tickchar ) tick++; return tick; } /* nrticks */ /* matchticks() -- match a certain # of ticks, and if that fails * match the largest subset of those ticks. * * if a subset was matched, return the # of ticks * that were matched. */ static int matchticks(MMIOT *f, int tickchar, int ticks, int *endticks) { int size, count, c; int subsize=0, subtick=0; *endticks = ticks; for (size = 0; (c=peek(f,size+ticks)) != EOF; size ++) { if ( (c == tickchar) && ( count = nrticks(size+ticks,tickchar,f)) ) { if ( count == ticks ) return size; else if ( count ) { if ( (count > subtick) && (count < ticks) ) { subsize = size; subtick = count; } size += count; } } } if ( subsize ) { *endticks = subtick; return subsize; } return 0; } /* matchticks */ /* code() -- write a string out as code. The only characters that have * special meaning in a code block are * `<' and `&' , which * are /always/ expanded to < and & */ static void code(MMIOT *f, char *s, int length) { int i,c; for ( i=0; i < length; i++ ) if ( (c = s[i]) == 003) /* ^C: expand back to 2 spaces */ Qstring(" ", f); else if ( c == '\\' && (i < length-1) && escaped(f, s[i+1]) ) cputc(s[++i], f); else cputc(c, f); } /* code */ /* delspan() -- write out a chunk of text, blocking with ... */ static void delspan(MMIOT *f, int size) { Qstring("", f); ___mkd_reparse(cursor(f)-1, size, 0, f, 0); Qstring("", f); } /* codespan() -- write out a chunk of text as code, trimming one * space off the front and/or back as appropriate. */ static void codespan(MMIOT *f, int size) { int i=0; if ( size > 1 && peek(f, size-1) == ' ' ) --size; if ( peek(f,i) == ' ' ) ++i, --size; Qstring("", f); code(f, cursor(f)+(i-1), size); Qstring("", f); } /* codespan */ /* before letting a tag through, validate against * MKD_NOLINKS and MKD_NOIMAGE */ static int forbidden_tag(MMIOT *f) { int c = toupper(peek(f, 1)); if ( f->flags & MKD_NOHTML ) return 1; if ( c == 'A' && (f->flags & MKD_NOLINKS) && !isthisalnum(f,2) ) return 1; if ( c == 'I' && (f->flags & MKD_NOIMAGE) && strncasecmp(cursor(f)+1, "MG", 2) == 0 && !isthisalnum(f,4) ) return 1; return 0; } /* Check a string to see if it looks like a mail address * "looks like a mail address" means alphanumeric + some * specials, then a `@`, then alphanumeric + some specials, * but with a `.` */ static int maybe_address(char *p, int size) { int ok = 0; for ( ;size && (isalnum(*p) || strchr("._-+*", *p)); ++p, --size) ; if ( ! (size && *p == '@') ) return 0; --size, ++p; if ( size && *p == '.' ) return 0; for ( ;size && (isalnum(*p) || strchr("._-+", *p)); ++p, --size ) if ( *p == '.' && size > 1 ) ok = 1; return size ? 0 : ok; } /* The size-length token at cursor(f) is either a mailto:, an * implicit mailto:, one of the approved url protocols, or just * plain old text. If it's a mailto: or an approved protocol, * linkify it, otherwise say "no" */ static int process_possible_link(MMIOT *f, int size) { int address= 0; int mailto = 0; char *text = cursor(f); if ( f->flags & MKD_NOLINKS ) return 0; if ( (size > 7) && strncasecmp(text, "mailto:", 7) == 0 ) { /* if it says it's a mailto, it's a mailto -- who am * I to second-guess the user? */ address = 1; mailto = 7; /* 7 is the length of "mailto:"; we need this */ } else address = maybe_address(text, size); if ( address ) { Qstring("", f); mangle(text+mailto, size-mailto, f); Qstring("", f); return 1; } else if ( isautoprefix(text, size) ) { printlinkyref(f, &linkt, text, size); Qchar('>', f); puturl(text,size,f, 1); Qstring("", f); return 1; } return 0; } /* process_possible_link */ /* a < may be just a regular character, the start of an embedded html * tag, or the start of an . If it's an automatic * link, we also need to know if it's an email address because if it * is we need to mangle it in our futile attempt to cut down on the * spaminess of the rendered page. */ static int maybe_tag_or_link(MMIOT *f) { int c, size; int maybetag = 1; if ( f->flags & MKD_TAGTEXT ) return 0; for ( size=0; (c = peek(f, size+1)) != '>'; size++) { if ( c == EOF ) return 0; else if ( c == '\\' ) { maybetag=0; if ( peek(f, size+2) != EOF ) size++; } else if ( isspace(c) ) break; #if WITH_GITHUB_TAGS else if ( ! (c == '/' || c == '-' || c == '_' || isalnum(c) ) ) #else else if ( ! (c == '/' || isalnum(c) ) ) #endif maybetag=0; } if ( size ) { if ( maybetag || (size >= 3 && strncmp(cursor(f), "!--", 3) == 0) ) { /* It is not a html tag unless we find the closing '>' in * the same block. */ while ( (c = peek(f, size+1)) != '>' ) if ( c == EOF ) return 0; else size++; if ( forbidden_tag(f) ) return 0; Qchar('<', f); while ( ((c = peek(f, 1)) != EOF) && (c != '>') ) Qchar(pull(f), f); return 1; } else if ( !isspace(c) && process_possible_link(f, size) ) { shift(f, size+1); return 1; } } return 0; } /* autolinking means that all inline html is . A * autolink url is alphanumerics, slashes, periods, underscores, * the at sign, colon, and the % character. */ static int maybe_autolink(MMIOT *f) { register int c; int size; /* greedily scan forward for the end of a legitimate link. */ for ( size=0; (c=peek(f, size+1)) != EOF; size++ ) if ( c == '\\' ) { if ( peek(f, size+2) != EOF ) ++size; } else if ( isspace(c) || strchr("'\"()[]{}<>`", c) ) break; if ( (size > 1) && process_possible_link(f, size) ) { shift(f, size); return 1; } return 0; } /* smartyquote code that's common for single and double quotes */ static int smartyquote(int *flags, char typeofquote, MMIOT *f) { int bit = (typeofquote == 's') ? 0x01 : 0x02; if ( bit & (*flags) ) { if ( isthisnonword(f,1) ) { Qprintf(f, "&r%cquo;", typeofquote); (*flags) &= ~bit; return 1; } } else if ( isthisnonword(f,-1) && peek(f,1) != EOF ) { Qprintf(f, "&l%cquo;", typeofquote); (*flags) |= bit; return 1; } return 0; } static int islike(MMIOT *f, char *s) { int len; int i; if ( s[0] == '|' ) { if ( !isthisnonword(f, -1) ) return 0; ++s; } if ( !(len = strlen(s)) ) return 0; if ( s[len-1] == '|' ) { if ( !isthisnonword(f,len-1) ) return 0; len--; } for (i=1; i < len; i++) if (tolower(peek(f,i)) != s[i]) return 0; return 1; } static struct smarties { char c0; char *pat; char *entity; int shift; } smarties[] = { { '\'', "'s|", "rsquo", 0 }, { '\'', "'t|", "rsquo", 0 }, { '\'', "'re|", "rsquo", 0 }, { '\'', "'ll|", "rsquo", 0 }, { '\'', "'ve|", "rsquo", 0 }, { '\'', "'m|", "rsquo", 0 }, { '\'', "'d|", "rsquo", 0 }, { '-', "---", "mdash", 2 }, { '-', "--", "ndash", 1 }, { '.', "...", "hellip", 2 }, { '.', ". . .", "hellip", 4 }, { '(', "(c)", "copy", 2 }, { '(', "(r)", "reg", 2 }, { '(', "(tm)", "trade", 3 }, { '3', "|3/4|", "frac34", 2 }, { '3', "|3/4ths|", "frac34", 2 }, { '1', "|1/2|", "frac12", 2 }, { '1', "|1/4|", "frac14", 2 }, { '1', "|1/4th|", "frac14", 2 }, { '&', "�", 0, 3 }, } ; #define NRSMART ( sizeof smarties / sizeof smarties[0] ) /* Smarty-pants-style chrome for quotes, -, ellipses, and (r)(c)(tm) */ static int smartypants(int c, int *flags, MMIOT *f) { int i; if ( f->flags & (MKD_NOPANTS|MKD_TAGTEXT|IS_LABEL) ) return 0; for ( i=0; i < NRSMART; i++) if ( (c == smarties[i].c0) && islike(f, smarties[i].pat) ) { if ( smarties[i].entity ) Qprintf(f, "&%s;", smarties[i].entity); shift(f, smarties[i].shift); return 1; } switch (c) { case '<' : return 0; case '\'': if ( smartyquote(flags, 's', f) ) return 1; break; case '"': if ( smartyquote(flags, 'd', f) ) return 1; break; case '`': if ( peek(f, 1) == '`' ) { int j = 2; while ( (c=peek(f,j)) != EOF ) { if ( c == '\\' ) j += 2; else if ( c == '`' ) break; else if ( c == '\'' && peek(f, j+1) == '\'' ) { Qstring("“", f); ___mkd_reparse(cursor(f)+1, j-2, 0, f, 0); Qstring("”", f); shift(f,j+1); return 1; } else ++j; } } break; } return 0; } /* smartypants */ /* process a body of text encased in some sort of tick marks. If it * works, generate the output and return 1, otherwise just return 0 and * let the caller figure it out. */ static int tickhandler(MMIOT *f, int tickchar, int minticks, int allow_space, spanhandler spanner) { int endticks, size; int tick = nrticks(0, tickchar, f); if ( !allow_space && isspace(peek(f,tick)) ) return 0; if ( (tick >= minticks) && (size = matchticks(f,tickchar,tick,&endticks)) ) { if ( endticks < tick ) { size += (tick - endticks); tick = endticks; } shift(f, tick); (*spanner)(f,size); shift(f, size+tick-1); return 1; } return 0; } #define tag_text(f) (f->flags & MKD_TAGTEXT) static void text(MMIOT *f) { int c, j; int rep; int smartyflags = 0; while (1) { if ( (f->flags & MKD_AUTOLINK) && isalpha(peek(f,1)) && !tag_text(f) ) maybe_autolink(f); c = pull(f); if (c == EOF) break; if ( smartypants(c, &smartyflags, f) ) continue; switch (c) { case 0: break; case 3: Qstring(tag_text(f) ? " " : "
", f); break; case '>': if ( tag_text(f) ) Qstring(">", f); else Qchar(c, f); break; case '"': if ( tag_text(f) ) Qstring(""", f); else Qchar(c, f); break; case '!': if ( peek(f,1) == '[' ) { pull(f); if ( tag_text(f) || !linkylinky(1, f) ) Qstring("![", f); } else Qchar(c, f); break; case '[': if ( tag_text(f) || !linkylinky(0, f) ) Qchar(c, f); break; /* A^B -> AB */ case '^': if ( (f->flags & (MKD_NOSUPERSCRIPT|MKD_STRICT|MKD_TAGTEXT)) || (isthisnonword(f,-1) && peek(f,-1) != ')') || isthisspace(f,1) ) Qchar(c,f); else { char *sup = cursor(f); int len = 0; if ( peek(f,1) == '(' ) { int here = mmiottell(f); pull(f); if ( (len = parenthetical('(',')',f)) <= 0 ) { mmiotseek(f,here); Qchar(c, f); break; } sup++; } else { while ( isthisalnum(f,1+len) ) ++len; if ( !len ) { Qchar(c,f); break; } shift(f,len); } Qstring("",f); ___mkd_reparse(sup, len, 0, f, "()"); Qstring("", f); } break; case '_': /* Underscores don't count if they're in the middle of a word */ if ( !(f->flags & (MKD_NORELAXED|MKD_STRICT)) && isthisalnum(f,-1) && isthisalnum(f,1) ) { Qchar(c, f); break; } case '*': /* Underscores & stars don't count if they're out in the middle * of whitespace */ if ( isthisspace(f,-1) && isthisspace(f,1) ) { Qchar(c, f); break; } /* else fall into the regular old emphasis case */ if ( tag_text(f) ) Qchar(c, f); else { for (rep = 1; peek(f,1) == c; pull(f) ) ++rep; Qem(f,c,rep); } break; case '~': if ( (f->flags & (MKD_NOSTRIKETHROUGH|MKD_TAGTEXT|MKD_STRICT)) || ! tickhandler(f,c,2,0, delspan) ) Qchar(c, f); break; case '`': if ( tag_text(f) || !tickhandler(f,c,1,1,codespan) ) Qchar(c, f); break; case '\\': switch ( c = pull(f) ) { case '&': Qstring("&", f); break; case '<': c = peek(f,1); if ( (c == EOF) || isspace(c) ) Qstring("<", f); else { /* Markdown.pl does not escape <[nonwhite] * sequences */ Qchar('\\', f); shift(f, -1); } break; case '^': if ( f->flags & (MKD_STRICT|MKD_NOSUPERSCRIPT) ) { Qchar('\\', f); shift(f,-1); break; } Qchar(c, f); break; case ':': case '|': if ( f->flags & MKD_NOTABLES ) { Qchar('\\', f); shift(f,-1); break; } Qchar(c, f); break; case EOF: Qchar('\\', f); break; default: if ( escaped(f,c) || strchr(">#.-+{}]![*_\\()`", c) ) Qchar(c, f); else { Qchar('\\', f); shift(f, -1); } break; } break; case '<': if ( !maybe_tag_or_link(f) ) Qstring("<", f); break; case '&': j = (peek(f,1) == '#' ) ? 2 : 1; while ( isthisalnum(f,j) ) ++j; if ( peek(f,j) != ';' ) Qstring("&", f); else Qchar(c, f); break; default: Qchar(c, f); break; } } /* truncate the input string after we've finished processing it */ S(f->in) = f->isp = 0; } /* text */ /* print a header block */ static void printheader(Paragraph *pp, MMIOT *f) { #if WITH_ID_ANCHOR Qprintf(f, "hnumber); if ( f->flags & MKD_TOC ) { Qstring(" id=\"", f); mkd_string_to_anchor(T(pp->text->text), S(pp->text->text), (mkd_sta_function_t)Qchar, f, 1); Qchar('"', f); } Qchar('>', f); #else if ( f->flags & MKD_TOC ) { Qstring("
text->text), S(pp->text->text), (mkd_sta_function_t)Qchar, f, 1); Qstring("\">\n", f); } Qprintf(f, "", pp->hnumber); #endif push(T(pp->text->text), S(pp->text->text), f); text(f); Qprintf(f, "", pp->hnumber); } enum e_alignments { a_NONE, a_CENTER, a_LEFT, a_RIGHT }; static char* alignments[] = { "", " style=\"text-align:center;\"", " style=\"text-align:left;\"", " style=\"text-align:right;\"" }; typedef STRING(int) Istring; static int splat(Line *p, char *block, Istring align, int force, MMIOT *f) { int first, idx = p->dle, colno = 0; ___mkd_tidy(&p->text); if ( T(p->text)[S(p->text)-1] == '|' ) --S(p->text); Qstring("\n", f); while ( idx < S(p->text) ) { first = idx; if ( force && (colno >= S(align)-1) ) idx = S(p->text); else while ( (idx < S(p->text)) && (T(p->text)[idx] != '|') ) { if ( T(p->text)[idx] == '\\' ) ++idx; ++idx; } Qprintf(f, "<%s%s>", block, alignments[ (colno < S(align)) ? T(align)[colno] : a_NONE ]); ___mkd_reparse(T(p->text)+first, idx-first, 0, f, "|"); Qprintf(f, "\n", block); idx++; colno++; } if ( force ) while (colno < S(align) ) { Qprintf(f, "<%s>\n", block, block); ++colno; } Qstring("\n", f); return colno; } static int printtable(Paragraph *pp, MMIOT *f) { /* header, dashes, then lines of content */ Line *hdr, *dash, *body; Istring align; int hcols,start; char *p; enum e_alignments it; hdr = pp->text; dash= hdr->next; body= dash->next; if ( T(hdr->text)[hdr->dle] == '|' ) { /* trim leading pipe off all lines */ Line *r; for ( r = pp->text; r; r = r->next ) r->dle ++; } /* figure out cell alignments */ CREATE(align); for (p=T(dash->text), start=dash->dle; start < S(dash->text); ) { char first, last; int end; last=first=0; for (end=start ; (end < S(dash->text)) && p[end] != '|'; ++ end ) { if ( p[end] == '\\' ) ++ end; else if ( !isspace(p[end]) ) { if ( !first) first = p[end]; last = p[end]; } } it = ( first == ':' ) ? (( last == ':') ? a_CENTER : a_LEFT) : (( last == ':') ? a_RIGHT : a_NONE ); EXPAND(align) = it; start = 1+end; } Qstring("\n", f); Qstring("\n", f); hcols = splat(hdr, "th", align, 0, f); Qstring("\n", f); if ( hcols < S(align) ) S(align) = hcols; else while ( hcols > S(align) ) EXPAND(align) = a_NONE; Qstring("\n", f); for ( ; body; body = body->next) splat(body, "td", align, 1, f); Qstring("\n", f); Qstring("
\n", f); DELETE(align); return 1; } static int printblock(Paragraph *pp, MMIOT *f) { Line *t = pp->text; static char *Begin[] = { "", "

", "

" }; static char *End[] = { "", "

","

" }; while (t) { if ( S(t->text) ) { if ( t->next && S(t->text) > 2 && T(t->text)[S(t->text)-2] == ' ' && T(t->text)[S(t->text)-1] == ' ' ) { push(T(t->text), S(t->text)-2, f); push("\003\n", 2, f); } else { ___mkd_tidy(&t->text); push(T(t->text), S(t->text), f); if ( t->next ) push("\n", 1, f); } } t = t->next; } Qstring(Begin[pp->align], f); text(f); Qstring(End[pp->align], f); return 1; } static void printcode(Line *t, char *lang, MMIOT *f) { int blanks; Qstring("
", f);
    for ( blanks = 0; t ; t = t->next ) {
	if ( S(t->text) > t->dle ) {
	    while ( blanks ) {
		Qchar('\n', f);
		--blanks;
	    }
	    code(f, T(t->text), S(t->text));
	    Qchar('\n', f);
	}
	else blanks++;
    }
    Qstring("
", f); } static void printhtml(Line *t, MMIOT *f) { int blanks; for ( blanks=0; t ; t = t->next ) if ( S(t->text) ) { for ( ; blanks; --blanks ) Qchar('\n', f); Qwrite(T(t->text), S(t->text), f); Qchar('\n', f); } else blanks++; } static void htmlify(Paragraph *p, char *block, char *arguments, MMIOT *f) { ___mkd_emblock(f); if ( block ) Qprintf(f, arguments ? "<%s %s>" : "<%s>", block, arguments); ___mkd_emblock(f); while (( p = display(p, f) )) { ___mkd_emblock(f); Qstring("\n\n", f); } if ( block ) Qprintf(f, "", block); ___mkd_emblock(f); } static void definitionlist(Paragraph *p, MMIOT *f) { Line *tag; if ( p ) { Qstring("
\n", f); for ( ; p ; p = p->next) { for ( tag = p->text; tag; tag = tag->next ) { Qstring("
", f); ___mkd_reparse(T(tag->text), S(tag->text), 0, f, 0); Qstring("
\n", f); } htmlify(p->down, "dd", p->ident, f); Qchar('\n', f); } Qstring("
", f); } } static void listdisplay(int typ, Paragraph *p, MMIOT* f) { if ( p ) { Qprintf(f, "<%cl", (typ==UL)?'u':'o'); if ( typ == AL ) Qprintf(f, " type=\"a\""); Qprintf(f, ">\n"); for ( ; p ; p = p->next ) { htmlify(p->down, "li", p->ident, f); Qchar('\n', f); } Qprintf(f, "\n", (typ==UL)?'u':'o'); } } /* dump out a Paragraph in the desired manner */ static Paragraph* display(Paragraph *p, MMIOT *f) { if ( !p ) return 0; switch ( p->typ ) { case STYLE: case WHITESPACE: break; case HTML: printhtml(p->text, f); break; case CODE: printcode(p->text, p->lang, f); break; case QUOTE: htmlify(p->down, p->ident ? "div" : "blockquote", p->ident, f); break; case UL: case OL: case AL: listdisplay(p->typ, p->down, f); break; case DL: definitionlist(p->down, f); break; case HR: Qstring("
", f); break; case HDR: printheader(p, f); break; case TABLE: printtable(p, f); break; case SOURCE: htmlify(p->down, 0, 0, f); break; default: printblock(p, f); break; } return p->next; } /* dump out a list of footnotes */ static void mkd_extra_footnotes(MMIOT *m) { int j, i; Footnote *t; if ( m->reference == 0 ) return; Csprintf(&m->out, "\n
\n
\n
    \n"); for ( i=1; i <= m->reference; i++ ) { for ( j=0; j < S(*m->footnotes); j++ ) { t = &T(*m->footnotes)[j]; if ( (t->refnumber == i) && (t->flags & REFERENCED) ) { Csprintf(&m->out, "
  1. \n

    ", p_or_nothing(m), t->refnumber); Csreparse(&m->out, T(t->title), S(t->title), 0); Csprintf(&m->out, "", p_or_nothing(m), t->refnumber); Csprintf(&m->out, "

  2. \n"); } } } Csprintf(&m->out, "
\n
\n"); } /* return a pointer to the compiled markdown * document. */ int mkd_document(Document *p, char **res) { int size; if ( p && p->compiled ) { if ( ! p->html ) { htmlify(p->code, 0, 0, p->ctx); if ( p->ctx->flags & MKD_EXTRA_FOOTNOTE ) mkd_extra_footnotes(p->ctx); p->html = 1; } size = S(p->ctx->out); if ( (size == 0) || T(p->ctx->out)[size-1] ) EXPAND(p->ctx->out) = 0; *res = T(p->ctx->out); return size; } return EOF; }