TEXT   228
begin blank 0 tagged 1 informal
Guest on 5th December 2024 05:27:03 AM


  1. BEGIN {
  2.   blank = 0; tagged = 1; informal = 2; formal = 3; # mnemonics
  3.   curr = blank;
  4.   para = informal;
  5.   body = 0;
  6.     }
  7. {
  8.   prev = curr;
  9.   prev_para = para;
  10.  
  11.   if (!body && ($0 ~ /^[        ]*<body>/)) body = 1; # entered page body
  12.  
  13.   curr = informal;
  14.   if ($0 ~ /^[  ]*$/) curr = blank;  # empty lines, also spaces and tabs
  15.   if ($0 ~ /^[  ]*</) curr = tagged; # allow leading spaces and tabs
  16.  
  17.   # Allow in-line text formatting tags at start of line in informal paragraphs
  18.   if ($0 ~ /^[  ]*<b>/) curr = informal;
  19.   if ($0 ~ /^[  ]*<em>/) curr = informal;
  20.   if ($0 ~ /^[  ]*<tt>/) curr = informal;
  21.   if ($0 ~ /^[  ]*<var/) curr = informal;
  22.   if ($0 ~ /^[  ]*<font/) curr = informal;
  23.  
  24.   if (prev != informal && curr == informal && body) print "<p class=\"informal\">";
  25.   if (prev == informal && curr != informal && body) print "</p>";
  26.  
  27.   # Assign para just to determine when we need spacer between formal paras.
  28.   if (informal == curr) para = informal;
  29.   if ($0 ~ /^[  ]*<h/) para = tagged; # header tag like <h3> etc.
  30.   # Do not assign para at blank line - retain previous para through blank lines
  31.  
  32.   # New formal paragraph  
  33.   if (($0 ~ /<div class="zed">/) || ($0 ~ /<div class="mixed">/)) {
  34.     para = formal;
  35.     if (prev_para == formal) print "<p class=\"space\"></p>\n";
  36.   }
  37.  
  38.   print $0;
  39. }
  40. END {
  41.   if (curr == informal) print "</p>";  # informal last line, no blank after
  42. }

Raw Paste

Login or Register to edit or fork this paste. It's free.