Read the introduction to get an idea of what this script is about.

image: abstract doheem

This script presumes:

  • The arts & ego site structure
  • macos
  • some macport applications

It uses a number of other programs to check my code:

  • I use the free small team edition of perforce for version control. I’ve been using it since before git got established, and have found no reason to change.
  • Apache is used to compile the SHTML pages into HTML. This is a cheat; I have an old Mac Mini with Apache running, which hosts the code I’ve typed. Site.sh simply grabs the Apache output of changed pages, using curl.
  • TIDY checks the HTML for syntax errors. I have to kludge it slightly because it doesn’t know about HTML5’s <data> tag, but, beyond that, I find it very useful.
  • Hunspell checks spelling. I only run it over parts of the site, because it’s not the fastest tortoise on the block. Furthermore, it uses open source dictionaries, which seem to be slightly lacking in vocabulary, so I have to correct its output. All the same, given my tpyign & seplllnig, it is useful and necessary.
  • SWLC checks my links. I wrote this myself because the publicly available link checking software I’ve found is ridiculously slow, too impractical to use. SWLC is an order of magnitude faster than the best of the alternatives.
  • I need to find a product to check the metadata encoded in the site. I’ve found sites that are useful for checking the occasional page, but nothing performant suitable for checking an entire website.

Site.sh can also upload the compiled website to my public facing server. That runs OpenBSD’s native webserver, httpd, to present arts & ego as a static website to the big bad world.



#!/bin/sh

# check and upload to websites
# use -h for list of options

freshen_project () {
# 1. project
# 2. file to receive list of changes
# 3. tmp file directory
# 4. actually, set file list to all files

  cd $1
  ${PERFORCE} -p x.x.x.x:x sync ... > $2

  if [[ $4 -gt 0 ]] ; then
    find . -name \* -type f -print > $2
  elif [[ -e $2 ]] ; then

    grep "file(s) up-to-date" $2 > $3/p4.tmp

    if [[ $? -ne 0 ]] ; then
      rm -f $2
    fi

  fi

  rm -f $3/p4.tmp
  cd ~
}

freshen () {
# 1. live file list
# 2. corrupt file list
# 3. if 1, work on live
# 4. if 1, work on corrupt
# 5. tmp file directory
# 6. actually, set file list to all files

  if [[ ! -e ${ETC}/shush.txt ]] ; then
    echo "--- Cannot freshen from p4 depot, password unknown."
    return
  fi

  echo "Syncing p4 projects..."

    ${PERFORCE} -p x.x.x.x:x login < ${ETC}/shush.txt

  if [[ ${TEST} -gt 0 ]] ; then
    freshen_project "${CORRUPTSITE}" $2 $5 $6
    freshen_project "${LIVESITE}" $1 $5 $6
  elif [[ $4 -gt 0 ]] ; then
    freshen_project "${CORRUPTSITE}" $2 $5 $6
  elif [[ $3 -gt 0 ]] ; then
    freshen_project "${LIVESITE}" $1 $5 $6
  fi

  freshen_project "${DEPOT}/yyyy/yyyy" $5/tmp.lst $5 $6
  freshen_project "${DEPOT}/zzzz/zzzz" $5/tmp.lst $5 $6
  rm -f $5/tmp.lst
  cd ~
}

run_dictionary () {
# 1. subproject
# 2. file types
# 3. output file

  find "$1" -name $2 -exec ${HUNSPELL} -d en_GB-large -l -H {} \; >> $3
}

run_dictionary_on_shtml () {
# 1. subproject
# 2. output file

  run_dictionary "$1" "\*.shtml" $2
  run_dictionary "$1" "\*.inc" $2
}

run_dictionary_on_sundry () {
# 1. subproject
# 2. output file

  run_dictionary "$1" "\*.html" $2
  run_dictionary "$1" "\*.htm" $2
  run_dictionary "$1" "\*.asp" $2
  run_dictionary "$1" "\*.rss" $2
  run_dictionary "$1" "\*.php" $2
}

run_dictionary_filtered () {
# 1. subproject
# 2. output file

  find "$1" \( -name \*.shtml -and -not -name set\-\* -and -not -path \*concrete/category\* -and -not -name slide-\* \) -exec ${HUNSPELL} -d en_GB-large -l -H {} \; >> $2
  ${HUNSPELL} -d en_GB-large -l -H reflets/intro.shtml >> $2
  ${HUNSPELL} -d en_GB-large -l -H concrete/category/index.shtml >> $2
  ${HUNSPELL} -d en_GB-large -l -H concrete/category/index.inc >> $2
}

check_spelling () {
# 1. output file
# 2. live changes
# 3. corrupt changes
# 4. if 1, force checking even if no changes
# 5. if 1, work on live
# 6. if 1, work on corrupt

  echo "Spelling checks..."
  rm -f $1

  if [[ ${TEST} -gt 0 ]] ; then
    run_dictionary_filtered "${LIVESITE}${LIVESUB}" $1
    run_dictionary_on_sundry "${LIVESITE}${LIVESUB}" $1
    run_dictionary_on_shtml "${CORRUPTSITE}" $1
  elif [[ $5 -gt 0 ]] ; then
    if [[ $4 -gt 0 || -e $2 ]] ; then
      run_dictionary_filtered "${LIVESITE}${LIVESUB}" $1
      run_dictionary_on_sundry "${LIVESITE}${LIVESUB}" $1
    fi
  elif [[ $6 -gt 0 ]] ; then
    if [[ $4 -gt 0 || -e $3 ]] ; then
      run_dictionary_on_shtml "${CORRUPTSITE}" $1
    fi
  fi

  if [[ -e $1 ]] ; then
    sed -f ${DEPOT}/xx/xx/xx -i "" $1
    sed -e "/Users\/user/d" -i "" $1

    if [[ ! -s $1 ]] ; then
      rm $1
    else
      echo "*** Some spelling issues were found."
      cat $1
    fi
  fi
}

run_http_check () {
# 1. subproject
# 2. output file

  OLD=`pwd`
  echo "**** $1" >> "$2"
  cd "$1"
  grep -rI \"http: * >> "$2"
  cd "$OLD"
}

check_http () {
# 1. output file
# 2. live changes
# 3. corrupt changes
# 4. if 1, force checking even if no changes
# 5. if 1, work on live
# 6. if 1, work on corrupt

  echo "HTTP checks..."
  rm -f $1

  if [[ ${TEST} -gt 0 ]] ; then
    run_http_check "${LIVESITE}${LIVESUB}" $1
    run_http_check "${CORRUPTSITE}" $1
  elif [[ $5 -gt 0 ]] ; then
    if [[ $4 -gt 0 || -e $2 ]] ; then
      run_http_check "${LIVESITE}${LIVESUB}" $1
    fi
  elif [[ $6 -gt 0 ]] ; then
    if [[ $4 -gt 0 || -e $3 ]] ; then
      run_http_check "${CORRUPTSITE}" $1
    fi
  fi
}

check_site_links () {
# 1.yy site to check
# 2. error report file
# 3. site domain

  if [[ ${EXTERNAL} -gt 0 ]] ; then
    ${SWLC} -x shtml -x html -x htm -x asp -e -m -r -s $3 -i index.shtml $1 > "$2" 2>&1
  else
    ${SWLC} -x shtml -x html -x htm -x asp -s $3 -i index.shtml $1 > "$2" 2>&1
  fi

  if [[ $? != 0 ]] ; then
    echo "*** Some broken links were found at $1 ($3)."
    cat $2
  fi
}

check_links () {
# 1. live changes
# 2. corrupt changes
# 3. live errors
# 4. corrupt errors
# 5. if 1, work on live
# 6. if 1, work on corrupt

  echo "Link checks..."
  rm -f $3 $4

  if [[ ${TEST} -gt 0 ]] ; then
    check_site_links ~/www/lll $3 example.org
    check_site_links ~/www/ccc $4 example.com
  elif [[ $5 -gt 0 ]] ; then
    check_site_links ~/www/lll $3 example.org
  elif [[ $6 -gt 0 ]] ; then
    check_site_links ~/www/ccc $4 example.com
  fi
}

check_characters () {
# 1. report file

  grep -rIl "[^[:print:]]" "{$CORRUPTSITE}" > $1
  grep -rIl "[^[:print:]]" "{$LIVESITE}${LIVESUB}" >> $1

  if [[ ! -s $1 ]] ; then
    rm -f $1
  fi
}

check_HTML_inner () {
# 1. file
# 2. error file
# 3. source home directory

  cat "$3/$1" | ${TIDY} -q --new-blocklevel-tags data -f "$2"
}

check_HTML_outer () {
# 1. file
# 2. error file
# 3. source home directory

  check_HTML_inner `/bin/echo "$1" "$2" "$3" | /usr/bin/cut -f 2-20 -d / -`
}

check_SSI () {
# 1. file
# 2. error file

  if [[ `/usr/bin/curl "http://127.0.0.1/$1" | fgrep "error occurred while processing"` ]] ; then
    echo "$1" >> "$2"
  fi
}

check_HTML () {
# 1. file pattern

  for FILE in `find . -name "$1" -print` ; do

    if [[ ( "$FILE" != *_swlc.shtml ) &&  ( "$FILE" != *.inc ) &&  ( "$FILE" != *_pre.shtml ) && ( "$FILE" != *google*.html ) && ( "$FILE" != *y_key_*.html ) && ( "$FILE" != *SlideShow*html ) ]] ; then

      check_HTML_outer "$FILE" ~/sitecode.tmp "." > /dev/null 2> /dev/null
      # tidy is wrong for these two warnings; data is approved in HTML5, and summary is NOT defined in HTML3.2.
      sed -i "" -e "/Error: \<data\> is not recognized!/d" -e "/Warning: \<data\> is not approved by W3C/d" -e "/Warning: \<table\> lacks \"summary\" attribute/d" -e "/Warning: trimming empty \<span\>/d" ~/sitecode.tmp

      if [[ -s ~/sitecode.tmp ]] ; then
        /bin/echo "**** $FILE"
        cat ~/sitecode.tmp
      fi

      check_SSI "$FILE" ~/ssi.err > /dev/null 2> /dev/null

      if [[ ! -s ~/ssi.err ]] ; then
        rm -f ~/ssi.err
      fi

    fi

  done

  rm -rf ~/sitecode.tmp
}

check_sitecode () {
# 1. web directory
# 2. error report

  echo "Checking HTML ..."

  D7=`pwd`
  cd "$1"
  check_HTML \*.shtml > "$2" 2>&1
  check_HTML \*.html >> "$2" 2>&1
  check_HTML \*.htm >> "$2" 2>&1
  check_HTML \*.asp >> "$2" 2>&1
  cd $D
}

check () {
# 1. live changes
# 2. corrupt changes
# 3. live link errors
# 4. corrupt link errors
# 5. spelling errors
# 6. if 1, check spelling even if there are no changes
# 7. if 1, work on live
# 8. if 1, work on corrupt
# 9. format errors

  echo "Checking..."

  check_spelling $5 $1 $2 $6 $7 $8
  check_links $1 $2 $3 $4 $7 $8
  check_characters $9

  if [[ -e $3 || -e $4 || -e $5 ]] ; then
    echo "*** Not updating websites."
    exit
  fi
}

upload_using_rsync () {
# 1. source SHTML
# 2. destination
# 3. if 1, upload all, otherwise only newer

  if [[ $3 -gt 0 ]] ; then
    FLAGS=-rmpt4
  else
    FLAGS=-rumpt4
  fi

  rsync $FLAGS --inplace --delete-after --chmod=Fu+w --exclude='\.*' -e ssh $1 $2
}

upload_to_httpd2 () {
# 1. live source directory
# 2. destination directory
# 3. if 1, copy all
# 4. server

  echo "Uploading to live server..."
  upload_using_rsync "$1/*" "root@$4:$2" $3
  ssh "root@$4" "find $2 -type d ! -perm 755 -exec chmod 755 {} \;"
  ssh "root@$4" "find $2 -type f ! -perm 644 -exec chmod 644 {} \;"
}

check_include () {
# 1. source directory
# 2. target directory

  echo "Check include..."

  TALL=0
  MUSIC=0
  POEM=0
  POD=0
  BLOG=0
  FOTO=0
  PHOTO=0
  DIA=0
  CON=0
  REF=0
  DE=0
  EN=0
  FR=0
  LB=0
  NL=0
  RU=0
  YTEN=0
  YELF=0
  YTWEL=0
  YTHIR=0
  YFOUR=0
  YFIF=0
  YSIX=0

  if [[ "$1/plumbing/header.inc" -nt "$2/index.shtml" ]] ; then TALL=1 ; fi
  if [[ "$1/plumbing/under.inc" -nt "$2/index.shtml" ]] ; then TALL=1 ; fi
  if [[ "$1/plumbing/html.inc" -nt "$2/index.shtml" ]] ; then TALL=1 ; fi
  if [[ "$1/plumbing/html-end.inc" -nt "$2/index.shtml" ]] ; then TALL=1 ; fi
  if [[ "$1/plumbing/html-top-start.inc" -nt "$2/index.shtml" ]] ; then TALL=1 ; fi
  if [[ "$1/plumbing/html-top-end.inc" -nt "$2/index.shtml" ]] ; then TALL=1 ; fi
  if [[ "$1/plumbing/var.inc" -nt "$2/index.shtml" ]] ; then TALL=1 ; fi

  if [[ $TALL -eq 1 ]] ; then
    echo "Will compile all."
    find "$1" -name \*.shtml -exec touch {} \;
    return
  fi

  if [[ "$1/en/plumbing/html.inc" -nt "$2/index.shtml" ]] ; then EN=1 ; fi
  if [[ "$1/en/plumbing/html-end.inc" -nt "$2/index.shtml" ]] ; then EN=1 ; fi
  if [[ "$1/en/plumbing/html-top-start.inc" -nt "$2/index.shtml" ]] ; then EN=1 ; fi
  if [[ "$1/en/plumbing/html-top-end.inc" -nt "$2/index.shtml" ]] ; then EN=1 ; fi
  if [[ "$1/en/plumbing/var.inc" -nt "$2/index.shtml" ]] ; then EN=1 ; fi

  if [[ $EN -eq 1 ]] ; then
    echo "Will compile English."
    find "$1/and" -name \*.shtml -exec touch {} \;
    find "$1/blog" -name \*.shtml -exec touch {} \;
    find "$1/concrete" -name \*.shtml -exec touch {} \;
    find "$1/data" -name \*.shtml -exec touch {} \;
    find "$1/event" -name \*.shtml -exec touch {} \;
    find "$1/foto" -name \*.shtml -exec touch {} \;
    find "$1/goodies" -name \*.shtml -exec touch {} \;
    find "$1/music" -name \*.shtml -exec touch {} \;
    find "$1/photographs" -name \*.shtml -exec touch {} \;
    find "$1/podcast" -name \*.shtml -exec touch {} \;
    find "$1/poetry" -name \*.shtml -exec touch {} \;
    find "$1/prose" -name \*.shtml -exec touch {} \;
    find "$1/reflets" -name \*.shtml -exec touch {} \;
    find "$1/schema" -name \*.shtml -exec touch {} \;
    find "$1/en/foto" -name \*.shtml -exec touch {} \;
    find "$1/en/photographs" -name \*.shtml -exec touch {} \;
  fi

  if [[ "$1/de/plumbing/html.inc" -nt "$2/de/index.shtml" ]] ; then DE=1 ; fi
  if [[ "$1/de/plumbing/html-end.inc" -nt "$2/de/index.shtml" ]] ; then DE=1 ; fi
  if [[ "$1/de/plumbing/html-top-start.inc" -nt "$2/de/index.shtml" ]] ; then DE=1 ; fi
  if [[ "$1/de/plumbing/html-top-end.inc" -nt "$2/de/index.shtml" ]] ; then DE=1 ; fi
  if [[ "$1/de/plumbing/var.inc" -nt "$2/de/index.shtml" ]] ; then DE=1 ; fi

  if [[ $DE -eq 1 ]] ; then
    echo "Will compile German."
    find "$1/de" -name \*.shtml -exec touch {} \;
  fi

  if [[ "$1/fr/plumbing/html.inc" -nt "$2/fr/index.shtml" ]] ; then FR=1 ; fi
  if [[ "$1/fr/plumbing/html-end.inc" -nt "$2/fr/index.shtml" ]] ; then FR=1 ; fi
  if [[ "$1/fr/plumbing/html-top-start.inc" -nt "$2/fr/index.shtml" ]] ; then FR=1 ; fi
  if [[ "$1/fr/plumbing/html-top-end.inc" -nt "$2/fr/index.shtml" ]] ; then FR=1 ; fi
  if [[ "$1/fr/plumbing/var.inc" -nt "$2/fr/index.shtml" ]] ; then FR=1 ; fi

  if [[ $FR -eq 1 ]] ; then
    echo "Will compile French."
    find "$1/fr" -name \*.shtml -exec touch {} \;
  fi

  if [[ "$1/lb/plumbing/html.inc" -nt "$2/lb/index.shtml" ]] ; then LB=1 ; fi
  if [[ "$1/lb/plumbing/html-end.inc" -nt "$2/lb/index.shtml" ]] ; then LB=1 ; fi
  if [[ "$1/lb/plumbing/html-top-start.inc" -nt "$2/lb/index.shtml" ]] ; then LB=1 ; fi
  if [[ "$1/lb/plumbing/html-top-end.inc" -nt "$2/lb/index.shtml" ]] ; then LB=1 ; fi
  if [[ "$1/lb/plumbing/var.inc" -nt "$2/lb/index.shtml" ]] ; then LB=1 ; fi

  if [[ $LB -eq 1 ]] ; then
    echo "Will compile Lux."
    find "$1/lb" -name \*.shtml -exec touch {} \;
  fi

  if [[ "$1/nl/plumbing/html.inc" -nt "$2/nl/index.shtml" ]] ; then NL=1 ; fi
  if [[ "$1/nl/plumbing/html-end.inc" -nt "$2/nl/index.shtml" ]] ; then NL=1 ; fi
  if [[ "$1/nl/plumbing/html-top-start.inc" -nt "$2/nl/index.shtml" ]] ; then NL=1 ; fi
  if [[ "$1/nl/plumbing/html-top-end.inc" -nt "$2/nl/index.shtml" ]] ; then NL=1 ; fi
  if [[ "$1/nl/plumbing/var.inc" -nt "$2/nl/index.shtml" ]] ; then NL=1 ; fi

  if [[ $NL -eq 1 ]] ; then
    echo "Will compile Dutch."
    find "$1/nl" -name \*.shtml -exec touch {} \;
  fi

  if [[ "$1/ru/plumbing/html.inc" -nt "$2/ru/index.shtml" ]] ; then RU=1 ; fi
  if [[ "$1/ru/plumbing/html-end.inc" -nt "$2/ru/index.shtml" ]] ; then RU=1 ; fi
  if [[ "$1/ru/plumbing/html-top-start.inc" -nt "$2/ru/index.shtml" ]] ; then RU=1 ; fi
  if [[ "$1/ru/plumbing/html-top-end.inc" -nt "$2/ru/index.shtml" ]] ; then RU=1 ; fi
  if [[ "$1/ru/plumbing/var.inc" -nt "$2/ru/index.shtml" ]] ; then RU=1 ; fi

  if [[ $RU -eq 1 ]] ; then
    echo "Will compile Russian."
    find "$1/ru" -name \*.shtml -exec touch {} \;
  fi

  if [[ "$1/plumbing/music-section.inc" -nt "$2/index.shtml" ]] ; then MUSIC=1 ; fi
  if [[ "$1/plumbing/album-schema.inc" -nt "$2/index.shtml" ]] ; then MUSIC=1 ; fi
  if [[ "$1/plumbing/track-schema.inc" -nt "$2/index.shtml" ]] ; then MUSIC=1 ; fi
  if [[ "$1/plumbing/track-section.inc" -nt "$2/index.shtml" ]] ; then MUSIC=1 ; fi

  if [[ "$1/plumbing/audio_object.inc" -nt "$2/index.shtml" ]] ; then
    MUSIC=1
    POEM=1
  fi

  if [[ $MUSIC -eq 1 ]] ; then
    echo "Will compile music."
    find "$1/de/music" -name \*.shtml -exec touch {} \;
    find "$1/fr/music" -name \*.shtml -exec touch {} \;
    find "$1/lb/music" -name \*.shtml -exec touch {} \;
    find "$1/nl/music" -name \*.shtml -exec touch {} \;
    find "$1/music" -name \*.shtml -exec touch {} \;
  fi

  if [[ "$1/plumbing/poem-section.inc" -nt "$2/index.shtml" ]] ; then POEM=1 ; fi
  if [[ "$1/plumbing/poem-schema.inc" -nt "$2/index.shtml" ]] ; then POEM=1 ; fi
  if [[ "$1/plumbing/poem_object.inc" -nt "$2/index.shtml" ]] ; then POEM=1 ; fi
  if [[ "$1/plumbing/set_object.inc" -nt "$2/index.shtml" ]] ; then POEM=1 ; fi
  if [[ "$1/poetry/poetry/indices.inc" -nt "$2/index.shtml" ]] ; then POEM=1 ; fi

  if [[ $POEM -eq 1 ]] ; then
    echo "Will compile poetry."
    find "$1/fr/poetry" -name \*.shtml -exec touch {} \;
    find "$1/ru/poetry" -name \*.shtml -exec touch {} \;
    find "$1/poetry" -name \*.shtml -exec touch {} \;
  fi

  if [[ "$1/plumbing/product-section.inc" -nt "$2/index.shtml" ]] ; then
    echo "Will compile goodies."
    find "$1/goodies" -name \*.shtml -exec touch {} \;
    find "$1/de/goodies" -name \*.shtml -exec touch {} \;
    find "$1/fr/goodies" -name \*.shtml -exec touch {} \;
    find "$1/lb/goodies" -name \*.shtml -exec touch {} \;
    find "$1/nl/goodies" -name \*.shtml -exec touch {} \;
  fi

  if [[ "$1/plumbing/cast-section.inc" -nt "$2/index.shtml" ]] ; then POD=1 ; fi
  if [[ "$1/plumbing/vidcast-section.inc" -nt "$2/index.shtml" ]] ; then POD=1 ; fi
  if [[ "$1/plumbing/video_object.inc" -nt "$2/index.shtml" ]] ; then POD=1 ; fi

  if [[ $POD -eq 1 ]] ; then
    echo "Will compile podcasts."
    find "$1/podcast" -name \*.shtml -exec touch {} \;
  fi

  if [[ "$1/plumbing/blog-schema.inc" -nt "$2/index.shtml" ]] ; then BLOG=1 ; fi
  if [[ "$1/plumbing/blog-section.inc" -nt "$2/index.shtml" ]] ; then BLOG=1 ; fi
  if [[ "$1/plumbing/crumb-section.inc" -nt "$2/index.shtml" ]] ; then BLOG=1 ; fi
  if [[ "$1/blog/blog/indices.inc" -nt "$2/index.shtml" ]] ; then BLOG=1 ; fi

  if [[ $BLOG -eq 1 ]] ; then
    echo "Will compile blogs."
    find "$1/blog" -name \*.shtml -exec touch {} \;
    find "$1/prose" -name \*.shtml -exec touch {} \;
  fi

  if [[ "$1/plumbing/banner-section.inc" -nt "$2/index.shtml" ]] ; then FOTO=1 ; fi
  if [[ "$1/plumbing/banshot-section.inc" -nt "$2/index.shtml" ]] ; then FOTO=1 ; fi
  if [[ "$1/plumbing/collection-schema.inc" -nt "$2/index.shtml" ]] ; then FOTO=1 ; fi
  if [[ "$1/plumbing/img_object.inc" -nt "$2/index.shtml" ]] ; then FOTO=1 ; fi
  if [[ "$1/plumbing/photo-section.inc" -nt "$2/index.shtml" ]] ; then FOTO=1 ; fi
  if [[ "$1/plumbing/shot-section.inc" -nt "$2/index.shtml" ]] ; then FOTO=1 ; fi
  if [[ "$1/plumbing/thumb-section.inc" -nt "$2/index.shtml" ]] ; then FOTO=1 ; fi
  if [[ "$1/foto/foto/indices.inc" -nt "$2/index.shtml" ]] ; then FOTO=1 ; fi

  if [[ $FOTO -eq 1 ]] ; then
    echo "Will compile foto."
    find "$1/foto" -name \*.shtml -exec touch {} \;
    find "$1/de/foto" -name \*.shtml -exec touch {} \;
    find "$1/en/foto" -name \*.shtml -exec touch {} \;
    find "$1/fr/foto" -name \*.shtml -exec touch {} \;
    find "$1/lb/foto" -name \*.shtml -exec touch {} \;
    find "$1/nl/foto" -name \*.shtml -exec touch {} \;
    PHOTO=1
  else
    if [[ "$1/plumbing/chewed-section.inc" -nt "$2/index.shtml" ]] ; then PHOTO=1 ; fi
    if [[ "$1/plumbing/dad-section.inc" -nt "$2/index.shtml" ]] ; then PHOTO=1 ; fi
  fi

  if [[ $PHOTO -eq 1 ]] ; then
    echo "Will compile photo."
    find "$1/photographs" -name \*.shtml -exec touch {} \;
    find "$1/de/photographs" -name \*.shtml -exec touch {} \;
    find "$1/en/photographs" -name \*.shtml -exec touch {} \;
    find "$1/fr/photographs" -name \*.shtml -exec touch {} \;
    find "$1/lb/photographs" -name \*.shtml -exec touch {} \;
    find "$1/nl/photographs" -name \*.shtml -exec touch {} \;
  fi

  if [[ "$1/plumbing/sacon-section.inc" -nt "$2/index.shtml" ]] ; then REF=1 ; fi
  if [[ "$1/plumbing/sashot-section.inc" -nt "$2/index.shtml" ]] ; then REF=1 ; fi

  if [[ $REF -eq 1 ]] ; then
    echo "Will compile reflets."
    find "$1/reflets" -name \*.shtml -exec touch {} \;
    find "$1/de/reflets" -name \*.shtml -exec touch {} \;
    find "$1/fr/reflets" -name \*.shtml -exec touch {} \;
    find "$1/lb/reflets" -name \*.shtml -exec touch {} \;
    find "$1/nl/reflets" -name \*.shtml -exec touch {} \;
  fi

  if [[ "$1/plumbing/conshot-section.inc" -nt "$2/index.shtml" ]] ; then CON=1 ; fi
  if [[ "$1/plumbing/conthumb-section.inc" -nt "$2/index.shtml" ]] ; then CON=1 ; fi

  if [[ $CON -eq 1 ]] ; then
    echo "Will compile concrete."
    find "$1/concrete" -name \*.shtml -exec touch {} \;
    find "$1/de/concrete" -name \*.shtml -exec touch {} \;
    find "$1/fr/concrete" -name \*.shtml -exec touch {} \;
    find "$1/lb/concrete" -name \*.shtml -exec touch {} \;
    find "$1/nl/concrete" -name \*.shtml -exec touch {} \;
  fi

 if [[ "$1/dia/dia.inc" -nt "$2/index.shtml" ]] ; then DIA=1 ; fi
 if [[ "$1/dia/dp.inc" -nt "$2/index.shtml" ]] ; then DIA=1 ; fi

  if [[ $DIA -eq 1 ]] ; then
    echo "Will compile diapositive."
    find "$1/fr/dia" -name \*.shtml -exec touch {} \;
    find "$1/dia" -name \*.shtml -exec touch {} \;
  fi

  if [[ "$1/past/ae/2010/static.inc" -nt "$2/past/ae/2010/index.shtml" ]] ; then YTEN=1 ; fi
  if [[ "$1/past/ae/2010/header.inc" -nt "$2/past/ae/2010/index.shtml" ]] ; then YTEN=1 ; fi
  if [[ "$1/past/ae/2010/gallery.inc" -nt "$2/past/ae/2010/index.shtml" ]] ; then YTEN=1 ; fi

  if [[ $YTEN -eq 1 ]] ; then
    echo "Will compile 2010."
    find "$1/past/ae/2010" -name \*.shtml -exec touch {} \;
  fi

  if [[ "$1/past/ae/2011/static.inc" -nt "$2/past/ae/2011/index.shtml" ]] ; then YELF=1 ; fi
  if [[ "$1/past/ae/2011/header.inc" -nt "$2/past/ae/2011/index.shtml" ]] ; then YELF=1 ; fi
  if [[ "$1/past/ae/2011/gallery.inc" -nt "$2/past/ae/2011/index.shtml" ]] ; then YELF=1 ; fi

  if [[ $YELF -eq 1 ]] ; then
    echo "Will compile 2011."
    find "$1/past/ae/2011" -name \*.shtml -exec touch {} \;
  fi

  if [[ "$1/past/ae/2012/static.inc" -nt "$2/past/ae/2012/index.shtml" ]] ; then YTWEL=1 ; fi
  if [[ "$1/past/ae/2012/header.inc" -nt "$2/past/ae/2012/index.shtml" ]] ; then YTWEL=1 ; fi
  if [[ "$1/past/ae/2012/gallery.inc" -nt "$2/past/ae/2012/index.shtml" ]] ; then YTWEL=1 ; fi

  if [[ $YTWEL -eq 1 ]] ; then
    echo "Will compile 2012."
    find "$1/past/ae/2012" -name \*.shtml -exec touch {} \;
  fi

  if [[ "$1/past/ae/2013/static.inc" -nt "$2/past/ae/2013/index.shtml" ]] ; then YTHIR=1 ; fi
  if [[ "$1/past/ae/2013/header.inc" -nt "$2/past/ae/2013/index.shtml" ]] ; then YTHIR=1 ; fi
  if [[ "$1/past/ae/2013/gallery.inc" -nt "$2/past/ae/2013/index.shtml" ]] ; then YTHIR=1 ; fi

  if [[ $YTHIR -eq 1 ]] ; then
    echo "Will compile 2013."
    find "$1/past/ae/2013" -name \*.shtml -exec touch {} \;
  fi

  if [[ "$1/past/ae/2014/static.inc" -nt "$2/past/ae/2014/index.shtml" ]] ; then YFOUR=1 ; fi
  if [[ "$1/past/ae/2014/header.inc" -nt "$2/past/ae/2014/index.shtml" ]] ; then YFOUR=1 ; fi
  if [[ "$1/past/ae/2014/gallery.inc" -nt "$2/past/ae/2014/index.shtml" ]] ; then YFOUR=1 ; fi

  if [[ $YFOUR -eq 1 ]] ; then
    echo "Will compile 2014."
    find "$1/past/ae/2014" -name \*.shtml -exec touch {} \;
  fi

  if [[ "$1/past/ae/2015/static.inc" -nt "$2/past/ae/2015/and/and/index.shtml" ]] ; then YFIF=1 ; fi
  if [[ "$1/past/ae/2015/header.inc" -nt "$2/past/ae/2015/and/and/index.shtml" ]] ; then YFIF=1 ; fi
  if [[ "$1/past/ae/2015/gallery.inc" -nt "$2/past/ae/2015/and/and/index.shtml" ]] ; then YFIF=1 ; fi

  if [[ $YFIF -eq 1 ]] ; then
    echo "Will compile 2015."
    find "$1/past/ae/2015" -name \*.shtml -exec touch {} \;
  fi

  if [[ "$1/past/ae/2016/html-end.inc" -nt "$2/past/ae/2016/index.shtml" ]] ; then YSIX=1 ; fi
  if [[ "$1/past/ae/2016/header.inc" -nt "$2/past/ae/2016/index.shtml" ]] ; then YSIX=1 ; fi
  if [[ "$1/past/ae/2016/choice.inc" -nt "$2/past/ae/2016/index.shtml" ]] ; then YSIX=1 ; fi

  if [[ $YSIX -eq 1 ]] ; then
    echo "Will compile 2016."
    find "$1/past/ae/2016" -name \*.shtml -exec touch {} \;
  fi

  if [[ "$COMP" != "" ]] ; then
    if [[ ! -d "$1/$COMP" ]] ; then
      echo "Cannot find $COMP"
    else
      echo "Will compile $COMP."
      find "$1/$COMP" -name \*.shtml -exec touch {} \;
    fi
  fi

}

compile_shtml () {
# 1. source file list
# 2. source directory
# 3. target directory
# 4. if 1, apply to everything, not just changes
# 5. tmp directory
# 6. if 1, only work on SHTML
# 7. curl web server source

  echo "Compiling SHTML..."

  cd "$2"
  find . -name \* -type f -print > "$1"

  if [[ ! -s "$1" ]] ; then
    echo "Nothing to do."
    cd ~
    return
  fi

  sed -i "" -e "s/^\.\///" "$1"
  rm -f "$5/compile_shtml.sh" "$5/copy_site.sh"
  mkdir -p "$3"

  cp -f "$1" "$5/x.tmp"
  sed -i "" -e "/_inc\.shtml$/d" "$1"
  sed -i "" -e "/_swlc\.shtml$/d" "$1"
  sed -i "" -e "/\.shtml$/!d" "$1"
  sed -i "" -e "/\.shtml$/d" "$5/x.tmp"

  if [[ $6 -gt 0 ]] ; then
    rm -f "$5/x.tmp"
  fi

  if [[ -s "$5/x.tmp" ]] ; then

    echo "#!/bin/sh" > "$5/copy_site.sh"
    echo "# 1. file to copy" >> "$5/copy_site.sh"
    echo "TARGET=\"$3/\$1\"" >> "$5/copy_site.sh"

    if [[ $4 -eq 0 ]] ; then
      echo "if [[ ! -s \"\$TARGET\" || \$1 -nt \"\$TARGET\" ]] ; then" >> "$5/copy_site.sh"
      echo "  if [[ \$1 -nt \"\$TARGET\" ]] ; then" >> "$5/copy_site.sh"
    fi

    echo "    if [[ -e \"\$TARGET\" ]] ; then" >> "$5/copy_site.sh"
    echo "      chmod 600 \"\$TARGET\"" >> "$5/copy_site.sh"
    echo "      rm -f \"\$TARGET\"" >> "$5/copy_site.sh"
    echo "    else" >> "$5/copy_site.sh"
    echo "      mkdir -p \`dirname \"\$TARGET\"\`" >> "$5/copy_site.sh"
    echo "    fi" >> "$5/copy_site.sh"
    echo "    cp -fp \"\$1\" \"\$TARGET\"" >> "$5/copy_site.sh"

    if [[ $4 -eq 0 ]] ; then
      echo "  fi" >> "$5/copy_site.sh"
      echo "fi" >> "$5/copy_site.sh"
    fi

    chmod 700 "$5/copy_site.sh"
    xargs -n 1 "$5/copy_site.sh" < "$5/x.tmp"
    rm -f "$5/copy_site.sh"
  fi

  if [[ -s "$1" ]] ; then

    echo "#!/bin/sh" > "$5/compile_shtml.sh"
    echo "# 1. file to compile" >> "$5/compile_shtml.sh"
    echo "cd $2" >> "$5/compile_shtml.sh"
    echo "TARGET=\"$3/\$1\"" >> "$5/compile_shtml.sh"

    if [[ $4 -eq 0 ]] ; then
      echo "if [[ ! -s \"\$TARGET\" || \$1 -nt \"\$TARGET\" ]] ; then" >> "$5/compile_shtml.sh"
      echo "  if [[ \$1 -nt \"\$TARGET\" ]] ; then" >> "$5/compile_shtml.sh"
    fi

    echo "    if [[ -e \"\$TARGET\" ]] ; then" >> "$5/compile_shtml.sh"
    echo "      chmod 600 \"\$TARGET\"" >> "$5/compile_shtml.sh"
    echo "      rm -f \"\$TARGET\"" >> "$5/compile_shtml.sh"
    echo "    fi" >> "$5/compile_shtml.sh"
    echo "    curl --create-dirs -o \"\$TARGET\" \"$7/\$1\"" >> "$5/compile_shtml.sh"

    if [[ $4 -eq 0 ]] ; then
      echo "  fi" >> "$5/compile_shtml.sh"
      echo "fi" >> "$5/compile_shtml.sh"
    fi

    chmod 700 "$5/compile_shtml.sh"
    xargs -n 1 "$5/compile_shtml.sh" < "$1"
    rm -f "$5/compile_shtml.sh"
  fi

  grep -rIl "an error occurred while processing this directive" "$3" > ~/ssi.err
  cd ~
}

enact () {
# 1. depot
# 2. tmp directory
# 3. pure HTML directory
# 4. if 1, apply to everything, not just changes
# 5. if 1, work on live
# 6. if 1, work on corrupt
# 7. if 1, upload to canal

  mv -f control.err control.err.old 2> /dev/null
  mv -f corrupt.err corrupt.err.old 2> /dev/null
  mv -f code.err code.err.old 2> /dev/null
  mv -f live.err live.err.old 2> /dev/null
  mv -f ssi.err ssi.err.old 2> /dev/null
  mv -f yuk.err yuk.err.old 2> /dev/null
  mv -f http.out http.out.old 2> /dev/null

  freshen "$2/live.lst" "$2/corrupt.lst" $5 $6 $2 $4

  if [[ ${SPELLING} -ne 0 ]] ; then
    check_spelling ~/yuk.err "$2/live.lst" "$2/corrupt.lst" 1 $5 $6
  fi

  if [[ ${COMPILE} -gt 0 ]] ; then
    if [[ $5 -gt 0 ]] ; then

      if [[ $4 -gt 0 ]] ; then
        rm -rf $3/live/*
      else
        check_include "${LIVESITE}" $3/live
      fi
      compile_shtml $2/live.lst "${LIVESITE}" $3/live $4 $2 $ONLY $LIVESRC
    elif [[ $6 -gt 0 ]] ; then
      compile_shtml $2/corrupt.lst "${CORRUPTSITE}" $3/corrupt $4 $2 $ONLY $CORRUPTSRC
    fi
  fi

  if [[ ${LINK} -gt 0 ]] ; then
    if [[ ${LIVE} -eq 1 ]] ; then
      check_site_links ~/www/live ~/live.err example.org
    elif [[ ${CORRUPT} -eq 1 ]] ; then
      rm -f ~/corrupt.err
      check_site_links ~/www/corrupt ~/corrupt.err example.com
    else
      echo "Specify live or corrupt."
    fi
  fi

  if [[ ${TEST} -gt 0 ]] ; then
    check "$2/live.lst" "$2/corrupt.lst" ~/live.err ~/corrupt.err ~/yuk.err $4 $5 $6 ~/control.err
    check_sitecode "${HTML}/corrupt" ~/corrupt_code.err
    check_sitecode "${HTML}/live${LIVESUB}" ~/live_code.err
  fi

  if [[ ${CODE} -gt 0 ]] ; then
    if [[ ${LIVE} -eq 1 ]] ; then
      check_sitecode "${HTML}/live${LIVESUB}" ~/code.err
    elif [[ ${CORRUPT} -eq 1 ]] ; then
      check_sitecode "${HTML}/corrupt" ~/code.err
    else
      echo "Specify live or corrupt."
    fi
  fi

  if [[ ${HTTP} -gt 0 ]] ; then
    check_http ~/http.out "$2/live.lst" "$2/corrupt.lst" 1 $5 $6
  fi

  if [[ $5 -gt 0 ]] ; then

    if [[ $7 -gt 0 ]] ; then
      upload_to_httpd2 "$3/live" "${LIVESTATIC}" $4 "${CANALHOST}"
      if [[ ${RISKY} -eq 0 ]] ; then
        # wait until links can be checked before checking them
        check_links "$2/live.lst" "$2/corrupt.lst" ~/live.err ~/corrupt.err $5 $6
      fi
    fi

  elif [[ $6 -gt 0 ]] ; then

    if [[ $7 -gt 0 ]] ; then
      upload_to_httpd2 "$3/corrupt" "${CORRUPTSTATIC}" $4 "${CANALHOST}"
      if [[ ${RISKY} -eq 0 ]] ; then
        # wait until links can be checked before checking them
        check_links "$2/live.lst" "$2/corrupt.lst" ~/live.err ~/corrupt.err $5 $6
      fi
    fi

  fi
}

aid () {

  echo "site.sh takes a number of switches"
  echo "  -a compile"
  echo "  -A when compiling, don't copy non SHTML files"
  echo "  -b PATH compile live directory PATH"
  echo "  -c work on corrupt site"
  echo "  -C PATH work on corrupt site which is at PATH; default ${DEPOT}/web/corrupt"
  echo "  -d PATH sets the p4 depot to PATH; default is ~/depot"
  echo "  -e PATH sets the config directory to PATH; default is ~/etc"
  echo "  -f work on everything, not just changes"
  echo "  -h outputs this text"
  echo "  -H check HTML"
  echo "  -i upload to canal, static SHTML"
  echo "  -I upload to canal without compiling"
  echo "  -k just do an internal link test; specify live or corrupt"
  echo "  -K just do a full link test; specify live or corrupt"
  echo "  -l work on live site"
  echo "  -L PATH work on live site which is at PATH; default ${DEPOT}/web/live"
  echo "  -n test everything, do not upload"
  echo "  -r take risky approach by bypassing checks"
  echo "  -s SUB check only this live site subdirectory"
  echo "  -S just do a spelling test"
  echo "  -t PATH sets the tmp file directory to PATH; default is ~/tmp"
  echo "  -T report HTTP (e.g. not HTTPS) links"
  echo "  -u PATH upload to target path PATH on destination (live only)"
  echo "  -w PATH sets the plain HTML site directory to PATH; default is ~/www"
  echo "  -x when checking links, check external links too"
  echo "One of -c or -l must be specified."

}

verify () {

  if [[ "$1" == "" ]] ; then

    aid
    exit

  fi

  if [[ ! -e /usr/bin/curl ]] ; then
    echo "Please install curl."
    exit
  fi

  while [[ "${1:0:1}" == "-" ]] ; do

    case "${1:1:1}" in

     "a" )
       COMPILE=1
       echo "compile"
       ;;

     "A" )
       ONLY=1
       echo "don't copy non-compiled files"
       ;;

     "b" )
       COMPILE=1
       COMP=$2
       echo "recompile $2"
       shift
       ;;

     "c" )
       CORRUPT=1
       echo "work with corrupt"
       ;;

     "C" )
       CORRUPT=1
       CORRUPTSITE=$2
       echo "work with corrupt at $2"
       shift
       ;;

     "d" )
       DEPOT=$2
       LIVESITE=$2/web/live
       CORRUPTSITE=$2/web/corrupt
       echo "depot is $2"
       shift
       ;;

     "e" )
       ETC=$2
       echo "config is $2"
       shift
       ;;

     "f" )
       FULL=1
       echo "check everything, not just changes"
       ;;

     "h" )
       aid
       exit
       ;;

     "H" )
       CODE=1
       echo "validate HTML"
       ;;

     "i" )
       CANAL=1
       COMPILE=1
       echo "upload to canal, compile to HTML first"
       ;;

     "I" )
       CANAL=1
       echo "upload to canal without compilation"
       ;;

     "k" )
       ulimit -n 8192
       LINK=1
       echo "internal link test"
       ;;

     "K" )
       ulimit -n 8192
       LINK=1
       EXTERNAL=1
       echo "full link test"
       ;;

     "l" )
       LIVE=1
       echo "work with live"
       ;;

     "L" )
       LIVE=1
       LIVESITE=$2
       echo "work with live at $2"
       shift
       ;;

     "n" )
       LIVE=1
       CORRUPT=1
       RISKY=0
       TEST=1
       HTTP=1
       SPELLING=1
       echo "test only, do not upload"
       ;;

     "r" )
       RISKY=1
       echo "risky"
       ;;

     "s" )
       LIVESUB="/$2"
       echo "checking only live site subdirectory $LIVESUB"
       shift
       ;;

     "S" )
       SPELLING=1
       echo "spelling test"
       ;;

     "t" )
       TMP=$2
       echo "tmp directory is $2"
       shift
       ;;

     "T" )
       HTTP=1
       echo "report HTTP (not HTTPS) links"
       ;;

     "u" )
       LIVE=1
       LIVESTATIC=$2
       echo "upload live to $2"
       shift
       ;;

     "w" )
       HTML=$2
       echo "plain www directory is $2"
       shift
       ;;

     "x" )
       LINKSWITCH="--check-extern"
       echo "check external links"
       ;;

     * )
       aid
       exit
       ;;

    esac

    shift

  done

  if [[ ! -d "$DEPOT" ]] ; then

    echo "$DEPOT is not a depot."
    exit

  fi

  if [[ ${LIVE} -eq 0 && ${CORRUPT} -eq 0 ]] ; then

    echo "Please use either -l, -c, or both."
    exit

  fi

  mkdir -p ${TMP}
  mkdir -p ${HTML}
  enact ${DEPOT} ${TMP} ${HTML} ${FULL} ${LIVE} ${CORRUPT} ${CANAL}
  echo "Done."

}

KVD=`pwd`
cd ~

# program locations
HUNSPELL=/opt/local/bin/hunspell
TIDY=/opt/local/bin/tidy

# this script expects p4 password to be in ~/etc/p4c.txt
PERFORCE=/usr/local/bin/p4

#eat your own catfood
SWLC=/usr/local/bin/swlc

# parameters

DEPOT=~/depot
TMP=~/tmp
HTML=~/www
ETC=~/etc
FULL=0
CORRUPT=0
LIVE=0
RISKY=0
BLOG=0
ACTIVE=0
TEST=0
LINKSWITCH=
SPELLING=0
LINK=0
CODE=0
COMPILE=0
ONLY=0
HTTP=0
EXTERNAL=0

CANAL=0
SELF=0

LIVESTATIC="/lll"
CORRUPTSTATIC="/ccc"
LIVESITE=~/depot/lll
CORRUPTSITE=~/depot/ccc
CANALHOST="webhost.example.com"
LIVESRC=http://127.0.0.1/
CORRUPTSRC=http://127.0.0.1:999/
LIVESUB="/"
COMP=""

verify $@



See site: [ introduction, site.sh, utilities ].


I have replaced this script with the static site checker.