diff --git a/network/httrack-twiki b/network/httrack-twiki new file mode 100644 index 0000000..26a66d1 --- /dev/null +++ b/network/httrack-twiki @@ -0,0 +1,227 @@ +#!/bin/sh +# +# 2011 Nico Schottelius (nico-nsbin at schottelius.org) +# +# This file is part of nsbin. +# +# nsbin is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# nsbin is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with nsbin. If not, see . +# +# +# Some ideas and a working version to mirror a twiki via httrack +# +# + +# Use https +# Use /twiki/ + +httrack -%i -w https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/twiki/bin/viewauth/KutterFondsWiki/WebHome \ + -%P -N0 -s0 -p7 -a -K0 -%k -%s -%u \ + -F "Mozilla/4.5 (compatible; HTTrack 3.0x; Linux 2.6.42)" -%F '' \ + -B -%x0 \ + '+*.png' '+*.gif' '+*.jpg' '+*.css' '+*.js' '+*.pdf' '+fifthelement.inf.ethz.ch/*.pdf' \ + '+fifthelement.inf.ethz.ch/bin/view*/KutterFondsWiki/*' \ + '-fifthelement.inf.ethz.ch/bin/view*/KutterFondsWiki/*?raw=*' \ + '+fifthelement.inf.ethz.ch/bin/view*/KutterFondsWiki/*?raw=on' \ + '-fifthelement.inf.ethz.ch/bin/view*/KutterFondsWiki/*?skin=*' \ + '+fifthelement.inf.ethz.ch/bin/view*/KutterFondsWiki/*?skin=koalaprint' \ + '+fifthelement.inf.ethz.ch/pub/KutterFondsWiki/*' + +# baseurl: https://fifthelement.inf.ethz.ch/twiki/bin/view/KutterFondsWiki/WebHome + + +-------------------------------------------------------------------------------- + +httrack --preserve + + +-------------------------------------------------------------------------------- +[13:22] kr:7% httrack --preserve + +Welcome to HTTrack Website Copier (Offline Browser) 3.43-9+libhtsjava.so.2 +Copyright (C) Xavier Roche and other contributors +To see the option list, enter a blank line or try httrack --help + +Enter project name :kutterfonds + +Base path (return=/home/users/nico/websites/) :/home/users/nico/kutter/7/ + +Enter URLs (separated by commas or blank spaces) :https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/bin/viewauth/KutterFondsWiki/WebHome https://fifthelement.inf.ethz.ch/twiki/pub/KutterFondsWiki/ + +Action: +(enter) 1 Mirror Web Site(s) + 2 Mirror Web Site(s) with Wizard + 3 Just Get Files Indicated + 4 Mirror ALL links in URLs (Multiple Mirror) + 5 Test Links In URLs (Bookmark Test) + 0 Quit +: +Enter URLs (separated by commas or blank spaces) :https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/bin/viewauth/KutterFondsWiki/WebHome https://fifthelement.inf.ethz.ch/twiki/pub/KutterFondsWiki/ + +Action: +(enter) 1 Mirror Web Site(s) + 2 Mirror Web Site(s) with Wizard + 3 Just Get Files Indicated + 4 Mirror ALL links in URLs (Multiple Mirror) + 5 Test Links In URLs (Bookmark Test) + 0 Quit +: 2 + +Proxy (return=none) : + +You can define wildcards, like: -*.gif +www.*.com/*.zip -*img_*.zip +Wildcards (return=none) :+*.pdf + +You can define additional options, such as recurse level (-r), separed by blank spaces +To see the option list, type help +Additional options (return=none) :--preserve + +---> Wizard command line: httrack https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/bin/viewauth/KutterFondsWiki/WebHome https://fifthelement.inf.ethz.ch/twiki/pub/KutterFondsWiki/ -W -O "/home/users/nico/kutter/7/kutterfonds" -%v --preserve +*.pdf + +Ready to launch the mirror? (Y/n) : + +-------------------------------------------------------------------------------- +[13:45] kr:7% httrack --preserve https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/bin/viewauth/KutterFondsWiki/ "fifthelement.inf.ethz.ch/twiki/pub/KutterFondsWiki/*" + -> nur index + +[13:45] kr:7% rm -rf *; httrack --preserve https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/bin/viewauth/KutterFondsWiki/Webhome "fifthelement.inf.ethz.ch/twiki/pub/KutterFondsWiki/*" + -> nur index + +[13:46] kr:7% rm -rf *; httrack --preserve https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/bin/viewauth/KutterFondsWiki/Webhome + -> nur index + +NUR INDEX: weil von viewauth auf view verlinkt wird! + +Neuer Versuch mit https://fifthelement.inf.ethz.ch/twiki/bin/view/KutterFondsWiki/WebHome + +[13:50] kr:7% rm -rf *; httrack --preserve https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/twiki/bin/view/KutterFondsWiki/WebHome + -> falsche links durch --preserve + -%p preserve html files as is (identical to -K4 -%F "" ) (--preserve) + +NEUER VERSUCH: -%F "" und zweitem URL zum spiegeln + + [13:53] kr:7% rm -rf *; httrack -%F "" https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/twiki/bin/view/KutterFondsWiki/WebHome https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/twiki/pub/KutterFondsWiki/ + + + +[14:00] kr:7% rm -rf *; httrack -%F "" https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/twiki/bin/view/KutterFondsWiki/WebHome https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/twiki/pub/KutterFondsWiki/ + + -> sieht gut aus! + -> link zu viewauth ist kaputt, hinzufügen! + +Sieht so aus, als ob ohne --preserve sich der dokumententyp verändert + +Versuch: --preserve und danach -K0 + +[14:12] kr:7% rm -rf *; httrack --preserve -K0 https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/twiki/bin/view/KutterFondsWiki/WebHome https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/twiki/bin/viewauth/KutterFondsWiki/ https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/twiki/pub/KutterFondsWiki/ + + +[14:18] kr:7% permissions.public . && rsync -av --delete ./ wwwkf@free.inf.ethz.ch:/home/wwwkf/www/htdocs/oldwiki/2 + + +-------------------------------------------------------------------------------- +[14:18] kr:7% rm -rf *; httrack --preserve -K0 https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/twiki/bin/view/KutterFondsWiki/WebHome https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/twiki/bin/viewauth/KutterFondsWiki/ https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/twiki/pub/KutterFondsWiki/ "+*.gif" "+*.css" + +-------------------------------------------------------------------------------- +Err, nun ohne preserve! + +[14:41] kr:7% rm -rf *; httrack https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/twiki/bin/view/KutterFondsWiki/WebHome https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/twiki/bin/viewauth/KutterFondsWiki/ https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/twiki/pub/KutterFondsWiki/ "+*.gif" "+*.css" + + +Ausprobieren, ob locale Schuld ist... NICHT ZUTREFFEND - neuer Webserver liefrt utf8 +[14:11] kr:7% locale -a +C +de_CH.utf8 +de_DE.utf8 +en_US +en_US.iso88591 +en_US.utf8 +POSIX +[14:12] kr:7% locale +LANG=de_CH.UTF-8 +LC_CTYPE="de_CH.UTF-8" +LC_NUMERIC="de_CH.UTF-8" +LC_TIME="de_CH.UTF-8" +LC_COLLATE="de_CH.UTF-8" +LC_MONETARY="de_CH.UTF-8" +LC_MESSAGES="de_CH.UTF-8" +LC_PAPER="de_CH.UTF-8" +LC_NAME="de_CH.UTF-8" +LC_ADDRESS="de_CH.UTF-8" +LC_TELEPHONE="de_CH.UTF-8" +LC_MEASUREMENT="de_CH.UTF-8" +LC_IDENTIFICATION="de_CH.UTF-8" +LC_ALL=de_CH.UTF-8 +[14:12] kr:7% man locale +[14:12] kr:7% export LC_ALL=en_US.iso88591 + +-------------------------------------------------------------------------------- +[14:46] kr:7% rm -rf *; httrack https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/twiki/bin/view/KutterFondsWiki/WebHome https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/twiki/bin/viewauth/KutterFondsWiki/ https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/twiki/pub/KutterFondsWiki/ "+*.gif" "+*.css" +[14:46] kr:7% permissions.public . && rsync -av --delete ./ wwwkf@free.inf.ethz.ch:/home/wwwkf/www/htdocs/oldwiki/2 + +-------------------------------------------------------------------------------- +Einen URL höher, damit auch der viewauth reinkommt..err..komische idee + +[14:48] kr:7% rm -rf *; httrack https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/twiki/bin/view/KutterFondsWiki https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/twiki/bin/viewauth/KutterFondsWiki/ https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/twiki/pub/KutterFondsWiki/ "+*.gif" "+*.css" + + +--> rss feed ist noch von fifthelement +--> javascript ist noch von fifthelment + + https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/scripts/ hinzufügen und +*.js + + +-------------------------------------------------------------------------------- +--can-go-up-and-down hinzu: geht zu weit, nimmt alle wikis mit!!! + + +-------------------------------------------------------------------------------- +Final version (named "run"): + +set -e + +rm -rf * +httrack \ + https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/twiki/bin/view/KutterFondsWiki/ \ + https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/twiki/bin/viewauth/KutterFondsWiki \ + https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/twiki/pub/images \ + https://fifthelement.inf.ethz.ch/css \ + https://fifthelement.inf.ethz.ch/images \ + "+fifthelement.inf.ethz.ch/twiki/pub/KutterFondsWiki/*" \ + ; + + +permissions.public . + +rsync -av --delete ./ wwwkf@free.inf.ethz.ch:/home/wwwkf/www/htdocs/oldwiki/2 + +exit 0 + https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/twiki/pub/KutterFondsWiki \ + #https://fifthelement.inf.ethz.ch/twiki/bin/rss \ + + +# breaks: because http auth forgotten on view../ +# https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/twiki/bin/viewauth/KutterFondsWiki "+fifthelement.inf.ethz.ch/twiki/bin/view/KutterFondsWiki/*" + +# breaks: uses bin/* and retrieves other wikis as well +# https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/twiki/bin/view/KutterFondsWiki + +# works, but is missing pdfs, javascript, etc. +# https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/twiki/bin/view/KutterFondsWiki/ \ +# https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/twiki/bin/viewauth/KutterFondsWiki \ + + +# whatever... + ... /css hat nich die css bei viewauth geändert + "+fifthelement.inf.ethz.ch/twiki/pub/images/" \ + "+fifthelement.inf.ethz.ch/images/" \ + "+fifthelement.inf.ethz.ch/css/" \