801822d60e
Signed-off-by: Nico Schottelius <nico@kr.ethz.ch>
227 lines
10 KiB
Bash
227 lines
10 KiB
Bash
#!/bin/sh
|
|
#
|
|
# 2011 Nico Schottelius (nico-nsbin at schottelius.org)
|
|
#
|
|
# This file is part of nsbin.
|
|
#
|
|
# nsbin is free software: you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# nsbin is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with nsbin. If not, see <http://www.gnu.org/licenses/>.
|
|
#
|
|
#
|
|
# Some ideas and a working version to mirror a twiki via httrack
|
|
#
|
|
#
|
|
|
|
# Use https
|
|
# Use /twiki/
|
|
|
|
httrack -%i -w https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/twiki/bin/viewauth/KutterFondsWiki/WebHome \
|
|
-%P -N0 -s0 -p7 -a -K0 -%k -%s -%u \
|
|
-F "Mozilla/4.5 (compatible; HTTrack 3.0x; Linux 2.6.42)" -%F '<!-- Mirrored by your friendly Sysadmin (Nico) from %s%s by HTTrack Website Copier/3.x [XR&CO'"'"'2004], %s -->' \
|
|
-B -%x0 \
|
|
'+*.png' '+*.gif' '+*.jpg' '+*.css' '+*.js' '+*.pdf' '+fifthelement.inf.ethz.ch/*.pdf' \
|
|
'+fifthelement.inf.ethz.ch/bin/view*/KutterFondsWiki/*' \
|
|
'-fifthelement.inf.ethz.ch/bin/view*/KutterFondsWiki/*?raw=*' \
|
|
'+fifthelement.inf.ethz.ch/bin/view*/KutterFondsWiki/*?raw=on' \
|
|
'-fifthelement.inf.ethz.ch/bin/view*/KutterFondsWiki/*?skin=*' \
|
|
'+fifthelement.inf.ethz.ch/bin/view*/KutterFondsWiki/*?skin=koalaprint' \
|
|
'+fifthelement.inf.ethz.ch/pub/KutterFondsWiki/*'
|
|
|
|
# baseurl: https://fifthelement.inf.ethz.ch/twiki/bin/view/KutterFondsWiki/WebHome
|
|
|
|
|
|
--------------------------------------------------------------------------------
|
|
|
|
httrack --preserve
|
|
|
|
|
|
--------------------------------------------------------------------------------
|
|
[13:22] kr:7% httrack --preserve
|
|
|
|
Welcome to HTTrack Website Copier (Offline Browser) 3.43-9+libhtsjava.so.2
|
|
Copyright (C) Xavier Roche and other contributors
|
|
To see the option list, enter a blank line or try httrack --help
|
|
|
|
Enter project name :kutterfonds
|
|
|
|
Base path (return=/home/users/nico/websites/) :/home/users/nico/kutter/7/
|
|
|
|
Enter URLs (separated by commas or blank spaces) :https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/bin/viewauth/KutterFondsWiki/WebHome https://fifthelement.inf.ethz.ch/twiki/pub/KutterFondsWiki/
|
|
|
|
Action:
|
|
(enter) 1 Mirror Web Site(s)
|
|
2 Mirror Web Site(s) with Wizard
|
|
3 Just Get Files Indicated
|
|
4 Mirror ALL links in URLs (Multiple Mirror)
|
|
5 Test Links In URLs (Bookmark Test)
|
|
0 Quit
|
|
:
|
|
Enter URLs (separated by commas or blank spaces) :https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/bin/viewauth/KutterFondsWiki/WebHome https://fifthelement.inf.ethz.ch/twiki/pub/KutterFondsWiki/
|
|
|
|
Action:
|
|
(enter) 1 Mirror Web Site(s)
|
|
2 Mirror Web Site(s) with Wizard
|
|
3 Just Get Files Indicated
|
|
4 Mirror ALL links in URLs (Multiple Mirror)
|
|
5 Test Links In URLs (Bookmark Test)
|
|
0 Quit
|
|
: 2
|
|
|
|
Proxy (return=none) :
|
|
|
|
You can define wildcards, like: -*.gif +www.*.com/*.zip -*img_*.zip
|
|
Wildcards (return=none) :+*.pdf
|
|
|
|
You can define additional options, such as recurse level (-r<number>), separed by blank spaces
|
|
To see the option list, type help
|
|
Additional options (return=none) :--preserve
|
|
|
|
---> Wizard command line: httrack https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/bin/viewauth/KutterFondsWiki/WebHome https://fifthelement.inf.ethz.ch/twiki/pub/KutterFondsWiki/ -W -O "/home/users/nico/kutter/7/kutterfonds" -%v --preserve +*.pdf
|
|
|
|
Ready to launch the mirror? (Y/n) :
|
|
|
|
--------------------------------------------------------------------------------
|
|
[13:45] kr:7% httrack --preserve https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/bin/viewauth/KutterFondsWiki/ "fifthelement.inf.ethz.ch/twiki/pub/KutterFondsWiki/*"
|
|
-> nur index
|
|
|
|
[13:45] kr:7% rm -rf *; httrack --preserve https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/bin/viewauth/KutterFondsWiki/Webhome "fifthelement.inf.ethz.ch/twiki/pub/KutterFondsWiki/*"
|
|
-> nur index
|
|
|
|
[13:46] kr:7% rm -rf *; httrack --preserve https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/bin/viewauth/KutterFondsWiki/Webhome
|
|
-> nur index
|
|
|
|
NUR INDEX: weil von viewauth auf view verlinkt wird!
|
|
|
|
Neuer Versuch mit https://fifthelement.inf.ethz.ch/twiki/bin/view/KutterFondsWiki/WebHome
|
|
|
|
[13:50] kr:7% rm -rf *; httrack --preserve https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/twiki/bin/view/KutterFondsWiki/WebHome
|
|
-> falsche links durch --preserve
|
|
-%p preserve html files as is (identical to -K4 -%F "" ) (--preserve)
|
|
|
|
NEUER VERSUCH: -%F "" und zweitem URL zum spiegeln
|
|
|
|
[13:53] kr:7% rm -rf *; httrack -%F "" https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/twiki/bin/view/KutterFondsWiki/WebHome https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/twiki/pub/KutterFondsWiki/
|
|
|
|
|
|
|
|
[14:00] kr:7% rm -rf *; httrack -%F "" https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/twiki/bin/view/KutterFondsWiki/WebHome https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/twiki/pub/KutterFondsWiki/
|
|
|
|
-> sieht gut aus!
|
|
-> link zu viewauth ist kaputt, hinzufügen!
|
|
|
|
Sieht so aus, als ob ohne --preserve sich der dokumententyp verändert
|
|
|
|
Versuch: --preserve und danach -K0
|
|
|
|
[14:12] kr:7% rm -rf *; httrack --preserve -K0 https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/twiki/bin/view/KutterFondsWiki/WebHome https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/twiki/bin/viewauth/KutterFondsWiki/ https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/twiki/pub/KutterFondsWiki/
|
|
|
|
|
|
[14:18] kr:7% permissions.public . && rsync -av --delete ./ wwwkf@free.inf.ethz.ch:/home/wwwkf/www/htdocs/oldwiki/2
|
|
|
|
|
|
--------------------------------------------------------------------------------
|
|
[14:18] kr:7% rm -rf *; httrack --preserve -K0 https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/twiki/bin/view/KutterFondsWiki/WebHome https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/twiki/bin/viewauth/KutterFondsWiki/ https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/twiki/pub/KutterFondsWiki/ "+*.gif" "+*.css"
|
|
|
|
--------------------------------------------------------------------------------
|
|
Err, nun ohne preserve!
|
|
|
|
[14:41] kr:7% rm -rf *; httrack https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/twiki/bin/view/KutterFondsWiki/WebHome https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/twiki/bin/viewauth/KutterFondsWiki/ https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/twiki/pub/KutterFondsWiki/ "+*.gif" "+*.css"
|
|
|
|
|
|
Ausprobieren, ob locale Schuld ist... NICHT ZUTREFFEND - neuer Webserver liefrt utf8
|
|
[14:11] kr:7% locale -a
|
|
C
|
|
de_CH.utf8
|
|
de_DE.utf8
|
|
en_US
|
|
en_US.iso88591
|
|
en_US.utf8
|
|
POSIX
|
|
[14:12] kr:7% locale
|
|
LANG=de_CH.UTF-8
|
|
LC_CTYPE="de_CH.UTF-8"
|
|
LC_NUMERIC="de_CH.UTF-8"
|
|
LC_TIME="de_CH.UTF-8"
|
|
LC_COLLATE="de_CH.UTF-8"
|
|
LC_MONETARY="de_CH.UTF-8"
|
|
LC_MESSAGES="de_CH.UTF-8"
|
|
LC_PAPER="de_CH.UTF-8"
|
|
LC_NAME="de_CH.UTF-8"
|
|
LC_ADDRESS="de_CH.UTF-8"
|
|
LC_TELEPHONE="de_CH.UTF-8"
|
|
LC_MEASUREMENT="de_CH.UTF-8"
|
|
LC_IDENTIFICATION="de_CH.UTF-8"
|
|
LC_ALL=de_CH.UTF-8
|
|
[14:12] kr:7% man locale
|
|
[14:12] kr:7% export LC_ALL=en_US.iso88591
|
|
|
|
--------------------------------------------------------------------------------
|
|
[14:46] kr:7% rm -rf *; httrack https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/twiki/bin/view/KutterFondsWiki/WebHome https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/twiki/bin/viewauth/KutterFondsWiki/ https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/twiki/pub/KutterFondsWiki/ "+*.gif" "+*.css"
|
|
[14:46] kr:7% permissions.public . && rsync -av --delete ./ wwwkf@free.inf.ethz.ch:/home/wwwkf/www/htdocs/oldwiki/2
|
|
|
|
--------------------------------------------------------------------------------
|
|
Einen URL höher, damit auch der viewauth reinkommt..err..komische idee
|
|
|
|
[14:48] kr:7% rm -rf *; httrack https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/twiki/bin/view/KutterFondsWiki https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/twiki/bin/viewauth/KutterFondsWiki/ https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/twiki/pub/KutterFondsWiki/ "+*.gif" "+*.css"
|
|
|
|
|
|
--> rss feed ist noch von fifthelement
|
|
--> javascript ist noch von fifthelment
|
|
|
|
https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/scripts/ hinzufügen und +*.js
|
|
|
|
|
|
--------------------------------------------------------------------------------
|
|
--can-go-up-and-down hinzu: geht zu weit, nimmt alle wikis mit!!!
|
|
|
|
|
|
--------------------------------------------------------------------------------
|
|
Final version (named "run"):
|
|
|
|
set -e
|
|
|
|
rm -rf *
|
|
httrack \
|
|
https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/twiki/bin/view/KutterFondsWiki/ \
|
|
https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/twiki/bin/viewauth/KutterFondsWiki \
|
|
https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/twiki/pub/images \
|
|
https://fifthelement.inf.ethz.ch/css \
|
|
https://fifthelement.inf.ethz.ch/images \
|
|
"+fifthelement.inf.ethz.ch/twiki/pub/KutterFondsWiki/*" \
|
|
;
|
|
|
|
|
|
permissions.public .
|
|
|
|
rsync -av --delete ./ wwwkf@free.inf.ethz.ch:/home/wwwkf/www/htdocs/oldwiki/2
|
|
|
|
exit 0
|
|
https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/twiki/pub/KutterFondsWiki \
|
|
#https://fifthelement.inf.ethz.ch/twiki/bin/rss \
|
|
|
|
|
|
# breaks: because http auth forgotten on view../
|
|
# https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/twiki/bin/viewauth/KutterFondsWiki "+fifthelement.inf.ethz.ch/twiki/bin/view/KutterFondsWiki/*"
|
|
|
|
# breaks: uses bin/* and retrieves other wikis as well
|
|
# https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/twiki/bin/view/KutterFondsWiki
|
|
|
|
# works, but is missing pdfs, javascript, etc.
|
|
# https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/twiki/bin/view/KutterFondsWiki/ \
|
|
# https://USERNAME:PASSWORD@fifthelement.inf.ethz.ch/twiki/bin/viewauth/KutterFondsWiki \
|
|
|
|
|
|
# whatever... + ... /css hat nich die css bei viewauth geändert
|
|
"+fifthelement.inf.ethz.ch/twiki/pub/images/" \
|
|
"+fifthelement.inf.ethz.ch/images/" \
|
|
"+fifthelement.inf.ethz.ch/css/" \
|