TightURL

TightURL Git Source Tree

Root/tighturl.php

1<?php
2/**
3 * TightURL :: A blind redirection service
4 *
5 * Copyright (c) 2004-2008, Ron Guerin <ron@vnetworx.net>
6 * portions Copyright (c) 2002,2003 Free Software Foundation
7 *
8 * This file implements a blind redirection service named TightURL.
9 * TightURL is Free Software; you can redistribute it and/or modify it
10 * under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * TightURL is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
17 * See the GNU General Public License for more details.
18 *
19 * If you are not able to view the LICENSE, which should
20 * always be possible within a valid and working TightURL release,
21 * please write to the Free Software Foundation, Inc.,
22 * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 * to get a copy of the GNU General Public License or to report a
24 * possible license violation.
25 *
26 * @package TightURL
27 * @author Ron Guerin <ron@vnetworx.net>
28 * @license http://www.fsf.org/licenses/gpl.html GNU Public License
29 * @copyright Copyright &copy; 2004-2009 Ron Guerin
30 * @filesource
31 * @link http://tighturl.com TightURL
32 * @version 0.1.4
33 *
34 */
35
36define("VERSION", "0.1.4");
37define("REQUIRED_PHP_VERSION", "4.3.0");
38
39// System defaults, DO NOT EDIT THIS FILE
40// Edit tighturl.config.inc.php instead!
41
42global $copyright, $conn, $db, $os, $svcname;
43
44$dbhost = "localhost";
45$dbuser = "dbuser";
46$dbpass = "dbpass";
47$dbname = "tighturl";
48$dbtable = "urls";
49$FOFMethod=FALSE; //0=Full URL path or mod_rewrite, 1=404-Method compressed URLs
50$os="";
51
52// URIBL variables
53$uribl = array("multi.surbl.org", "black.uribl.com");
54$uriblurl = array("www.surbl.org", "www.uribl.com");
55
56// Bad Behavior variables
57$BB2 = true;
58$BBstats = true;
59$BBstrict = false;
60$BBverbose = true;
61$BBLogging = true;
62$bb2_settings_defaults = "";
63
64// Require submitted URLs to exist?
65$mustexist = true;
66
67// Text strings and style variables
68$svcname = "URLSquisher";
69$verbtext = "Squish";
70$pasttext = "Squished";
71$tagline = "Squish long URLs to make short ones";
72$headcolor = "#006600";
73$tablecolor = "#00CC99";
74$copystart = date("Y");
75$copyrightholder = "SquishURL Enterprises";
76
77// Reserved URLs
78$ReservedURL = array("x", "rest", "xmlrpc", "soap", "xml", "atom", "rss", "blog",
79 "faq", "help", "about", "api", "code", "source", "docs",
80 "git", "cvs", "arch", "url", "admin", "setup", "svn", "project",
81 "abuse", "cgi-sys", "exploited");
82
83// You REALLY don't want to edit below here unless you know what you're doing.
84
85// *************************************************************************
86
87 if (version_compare(phpversion(), REQUIRED_PHP_VERSION)<0) {
88 die_HTML($svcname, "Error: TightURL ".VERSION." needs PHP >= ".REQUIRED_PHP_VERSION." (you are using ".phpversion().")");
89 }
90
91 if (file_exists("tighturl-install.php")) die_HTML($svcname, "Error: You must remove tighturl-install.php before using $svcname.");
92
93 $os=strpos(strtolower(PHP_OS), "win")===false?"nix":"win";
94
95 $validurlpattern = "\:\/\/([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)"
96 . "*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])"
97 . "\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)"
98 . "\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)"
99 . "\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])"
100 . "|((([0-9A-F]{1,4}(((:[0-9A-F]{1,4}){5}::[0-9A-F]{1,4})|((:[0-9A-F]{1,4}){4}"
101 . "::[0-9A-F]{1,4}(:[0-9A-F]{1,4}){0,1})|((:[0-9A-F]{1,4}){3}::[0-9A-F]{1,4}"
102 . "(:[0-9A-F]{1,4}){0,2})|((:[0-9A-F]{1,4}){2}::[0-9A-F]{1,4}(:[0-9A-F]{1,4})"
103 . "{0,3})|(:[0-9A-F]{1,4}::[0-9A-F]{1,4}(:[0-9A-F]{1,4}){0,4})|(::[0-9A-F]{1,4}"
104 . "(:[0-9A-F]{1,4}){0,5})|(:[0-9A-F]{1,4}){7}))|(::[0-9A-F]{1,4}(:[0-9A-F]{1,4}"
105 . "){0,6}))|::)|((([0-9A-F]{1,4}(((:[0-9A-F]{1,4}){3}::([0-9A-F]{1,4}){1})"
106 . "|((:[0-9A-F]{1,4}){2}::[0-9A-F]{1,4}(:[0-9A-F]{1,4}){0,1})|((:[0-9A-F]{1,4})"
107 . "{1}::[0-9A-F]{1,4}(:[0-9A-F]{1,4}){0,2})|(::[0-9A-F]{1,4}(:[0-9A-F]{1,4}"
108 . "){0,3})|((:[0-9A-F]{1,4}){0,5})))|([:]{2}[0-9A-F]{1,4}(:[0-9A-F]{1,4}){0,4}))"
109 . ":|::)((25[0-5]|2[0-4][0-9]|[0-1]?[0-9]{0,2})\.){3}(25[0-5]|2[0-4][0-9]|"
110 . "[0-1]?[0-9]{0,2})"
111 . "|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.(com|edu|gov|int|mil|net|org"
112 . "|mobi|biz|arpa|info|name|pro|aero|coop|museum"
113 . "|[a-zA-Z]{2}))(\:[0-9]+)*(\/.($|[a-zA-Z0-9\.\:\,\?\'\(\)\\\*\+&%\$;|#\=~_\-\s@]*))*\/*";
114
115 $validipv4pattern = ":\/\/(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])\."
116 . "(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])"
117 . "\.(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])\.(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])\/*";
118
119 $forbid = "\.(cmd|bat|exe|scr|pif|vbs|js|pif|msi|cdr)";
120
121// ****** !All overridable configuration variables must go above this line! ******
122
123 if (! isset($antiabuse)) $antiabuse = true;
124 if (! isset($netchecks)) $netchecks = true;
125 if (! isset($mustexist)) $mustexist = true;
126
127 // Status: 0=Ok, 1=Warn, 2=Black, 3=Policy, 4=Complaints
128
129 if (file_exists("tighturl.urlpattern.inc.php")) include("tighturl.urlpattern.inc.php");
130 if (file_exists("tighturl.tltpattern.inc.php")) include("tighturl.tltpattern.inc.php");
131 if (file_exists("tighturl.redirpattern.inc.php")) include("tighturl.redirpattern.inc.php");
132 if (file_exists("tighturl.ptcpattern.inc.php")) include("tighturl.ptcpattern.inc.php");
133 if (file_exists("tighturl.blpattern.inc.php")) include("tighturl.blpattern.inc.php");
134 if (file_exists("tighturl.config.inc.php")) include("tighturl.config.inc.php");
135
136 // Figure out our copyright string
137 $thisyear = date("Y");
138 $copyright = $copystart;
139 if ($copystart != $thisyear) $copyright .= "-" . $thisyear;
140 $uribls = "";
141 for ($i=0; $i<count($uribl); $i++) {
142 if ($i > 0) $uribls .= ", ";
143 $uribls .= "<a href='http://" . $uriblurl[$i] . "'>" . $uribl[$i] . "</a>";
144 }
145
146 if (! $FOFMethod) $parm = "?i="; // We need the parameter tag
147
148 // Figure out correct self
149 if (strncmp($_SERVER['PHP_SELF'], $_SERVER['REQUEST_URI'], strlen($_SERVER['PHP_SELF'])) != 0) {
150 if (preg_match("|(.*)/.*$|",$_SERVER['PHP_SELF'],$matches)) $self = $matches[1];
151 if (! preg_match("|.*/$|", $self)) $self .= "/";
152 }
153 else {
154 $self = $_SERVER['PHP_SELF']; // We need the script name
155 if (! preg_match("|.*/$|", $self)) $self .= "/";
156 }
157
158 // Connect to MySQL, open database.
159 $conn = @mysql_connect($dbhost, $dbuser, $dbpass) or die_HTML($svcname, "Error: Cannot connect to database.");
160 $db = mysql_select_db($dbname, $conn) or die_HTML($svcname, "Error: Cannot select database. ". mysql_error());
161
162 // When in doubt, turn Bad Behavior on, set it to FALSE in the config to turn it off.
163 if (! isset($BB2)) $BB2 = true;
164
165 // If user has not turned off Bad Behavior in the config, use BB2 (highly recommended) TODO: warn user if $BB2 but not BB2
166 if ($BB2 && file_exists("bad-behavior/bad-behavior-tighturl.php")) require_once("bad-behavior/bad-behavior-tighturl.php");
167 else $BB2 = FALSE;
168
169 // Figure out what kind of request this is and service it.
170
171 // This is klugey. Clean up later.
172 // also I think data should be sanitized immediately
173 if ((isset($_REQUEST['save']) && $_REQUEST['save'] == 'y')
174 && (isset($_REQUEST['url']) && ! empty($_REQUEST['url']) && trim($_REQUEST['url']) != ""
175 && (preg_match("/^.*url=(.*)$/", $_SERVER['QUERY_STRING'], $matches) != 0))) {
176 $url = trim($matches[1]);
177 if (preg_match("/^(.*)&tighturlaction.*$/", $matches[1], $matches)) $url = $matches[1];
178 save_URL(urldecode($url));
179 }
180 elseif (isset($_REQUEST['i']) && !empty($_REQUEST['i'])) {
181 lookup_ID($_REQUEST['i']);
182 }
183 elseif (isset($pbi) && !empty($pbi) && isset($_REQUEST[$pbi]) && !empty($_REQUEST[$pbi])) {
184 PolicyBan_ID($_REQUEST[$pbi]);
185 }
186 elseif ($FOFMethod && preg_match("/^\/+([a-zA-Z0-9]+)\/*(.*)\/*$/", $_SERVER['REQUEST_URI'], $matches)) {
187 lookup_ID($matches[1]);
188 }
189 elseif ($FOFMethod && $_SERVER['REQUEST_URI'] != "/") {
190 display_HTML("", "", "Error: Couldn't find a valid " . $svcname . " URI.");
191 }
192 else {
193 display_HTML("", "main");
194 }
195 exit;
196
197// *************************************************************************
198
199
200/**
201 * sanitize a string for SQL input
202 */
203function sanitize_sql_string($string) {
204 return(mysql_real_escape_string($string));
205}
206
207/**
208 * Counts the number of times a substring is contained in a given string.
209 */
210function countSubstrs($haystack, $needle) {
211 return (($p = strpos($haystack, $needle)) === false) ? 0 : (1 + countSubstrs(substr($haystack, $p+1), $needle));
212}
213
214/**
215 * Checks to see if a given URI is on a URI blacklist.
216 * Currently this means SURBL (http://www.surbl.org) and URIBL (http://www.uribl.com)
217 *
218 * Returns TRUE if the domain is listed on any configured URIBLs, returns FALSE if
219 * anything goes wrong or the anti-abuse system is turned off.
220 *
221 * A companion URI extractor must be written for the below issues
222 * Must be changed to do full resolution of redirections on URI, simulating a browser
223 * Must be changed to do IPv6 lookups
224 * Must be changed to check multiple URIs (maybe a wrapper instead)
225 * Must be changed to optionally check HTML entity encoded versions of URIs
226 * Must be changed to handle URIBL's inclusion of some third-level domains.
227 *
228 */
229function URI_on_URIBL($uri) {
230
231// This code does not yet properly implement a correct and efficient querying
232// of URI BL data.
233
234 global $uribl, $uribluri, $validschemes, $validurlpattern, $validipv4pattern,
235 $antiabuse, $netchecks, $tltlds;
236
237 // Everything gets a pass if antiabuse or network tests are off.
238 if ((! $antiabuse) || (! $netchecks)) return(false);
239
240 $uribls = "";
241
242 if ($uri) {
243 // Test for IPv4 address, reverse the quads if found
244 if (preg_match("/^".$validschemes.$validipv4pattern."/", $uri, $matches)) {
245 $domain=$matches[5] . "." . $matches[4] . "." . $matches[3] . "." . $matches[2];
246 }
247 else {
248 // strip out second-level domain name, *unless* on exception list,
249 // in which case, strip out third level also and test that instead.
250 // FIX: when testing uribl.com lists, also test additional level. First hit wins.
251
252 preg_match("/^".$validschemes.$validurlpattern."$/", $uri, $matches);
253 $domain = $matches[4];
254 if (preg_match("/".$tltlds."$/", $domain, $matches)) {$levels = 2;} else {$levels = 1;}
255
256 // klugey stripping routine to reduce domain to base domain name
257 // expect regex wojuld be better
258
259 $ss = countSubstrs($domain, ".");
260 while ($ss > $levels) {
261 $chop = strpos($domain, ".");
262 $domain = substr($domain, $chop + 1);
263 $ss = countSubstrs($domain, ".");
264 }
265 }
266
267 // Query URI blacklists to see if domain/IP appears as target in known spam
268 // or something involved in a malware/phishing attack.
269 for ($i=0; $i<count($uribl); $i++) {
270 $fqdn = $domain . "." . $uribl[$i];
271 $recexists = gethostbyname($fqdn); // ghbn weirdly returns the name on failure
272 if (($recexists != $fqdn) && preg_match("<^127\.>", $recexists)) {
273 if ($i > 0) $uribls .= ", ";
274 $uribls .= $uribl[$i];
275 }
276 }
277 return ($uribls); // change to return an array of indexes into the URIBL array
278 }
279}
280
281/**
282 * Checks to see if a given URL is a Reserved URL.
283 *
284 * Returns TRUE if the ID is listed as a Reserved URL.
285 */
286function on_Reserve($decimal) {
287 global $ReservedURL;
288
289 $res=FALSE;
290
291 if ($decimal) {
292 $sexatrigesimal = base_convert($decimal, 10, 36);
293 for ($i=0; $i<count($ReservedURL); $i++) {
294 if ($sexatrigesimal == strtolower($ReservedURL[$i])) return TRUE;
295 }
296 return FALSE;
297 }
298}
299
300/**
301 * Save the given URL in the database if unique and return the ID or return an existing ID for given URL.
302 * The ID returned will be a sexatrigesimal (Base-36) number.
303 *
304 * Saves the URL in the database, converts the decimal ID value returned by the database to
305 * a sexatrigesimal value, and displays the generated TightURL.
306 */
307function save_URL($url) {
308 global $dbtable, $svcname, $FOFMethod, $validschemes, $validurlpattern, $pasttext, $self, $redir, $bl,
309 $antiabuse, $netchecks, $ptc, $forbid, $mustexist;
310
311 if ($_SERVER['HTTP_USER_AGENT'] == "Python-urllib/2.5") {
312 display_HTML("", "save", "", $url, "http://tighturl.com/haibot");
313 exit;
314 }
315
316 $selfref = "|^(http://" . $_SERVER['HTTP_HOST'] . $self . ")|i";
317 $aliasref = "|^(http://www." . $_SERVER['HTTP_HOST'] . $self . ")|i";
318 $remote = $_SERVER['REMOTE_ADDR'];
319
320 if (preg_match($selfref, $url) || preg_match($aliasref, $url)) {
321 display_HTML("", "", "Error: A " . $svcname . " URL cannot point to another URL within " . $_SERVER['HTTP_HOST'] . $self . " .", $url, "", $url);
322 }
323 elseif (! preg_match("/^".$validschemes.$validurlpattern."$/", $url)) {
324 display_HTML("", "", "Error: That URL (".htmlspecialchars(strip_tags($url)).") is not valid.", $url, "", $url);
325 }
326 elseif (($forbid != "") && (preg_match("!.*".$forbid."$!i", $url))) {
327 display_HTML("", "", "Error: Executable URIs are not accepted here due to phishing/malware abuse.", $url, "", $url);
328 }
329// This code was never meant to be in a release. oops
330// elseif (preg_match("!.*\.gif$!", $url)) { // replace this with image comparison
331// display_HTML("", "", "Error: URL rejected for service abuse.", $url, "", $url);
332// }
333//RR elseif (isset($mustexist) && $mustexist && isset($netchecks) && $netchecks && (! Resolve_URL($url, $chain))) {
334//RR display_HTML("", "", "Error: Submitted URL does not exist on the public Internet.", $url, "", $url);
335//RR }
336 elseif (isset($bl) && preg_match("/.*".$bl.".*/i", $url)) { // delete this crap
337// echo "url: $url bl: $bl\n";
338 display_HTML("", "", "Error: URL rejected for violating our terms of use.", $url, "", $url);
339 }
340 elseif (preg_match("<.*".$redir.".*>i", $url)) {
341 display_HTML("", "", "Error: ".$svcname." is not a URL obfuscation service, and does not accept redirection links.", $url, "", $url);
342 }
343 elseif (isset($ptc) && preg_match("/.*".$ptc.".*/i", $url)) {
344 display_HTML("", "", "Error: ".$svcname." does not accept PTC (Pay To Click) links due to spamming abuse.", $url, "", $url);
345 }
346 else {
347 $safeurl = sanitize_sql_string($url);
348 $result = mysql_query("SELECT MAX(id) FROM $dbtable") or display_HTML("", "", "Error: $svcname system error.", $url, "", $url);
349 $lastid = mysql_result($result, 0) + 1;
350 $guesssexatrigesimal = base_convert($lastid, 10, 36);
351 $guessurl = "http://" . $_SERVER['HTTP_HOST'] . $self;
352 if (! $FOFMethod) $guessurl .= "?i="; // We need the parameter tag
353 $guessurl .= $guesssexatrigesimal; // Append the Base-36 ID to the URL
354 if ( strlen($guessurl) >= strlen($url) ) {
355 display_HTML("", "", "Fail: That URL cannot be shortened by $svcname. Sorry!", $url, "", $url);
356 }
357 else {
358 if ($antiabuse && $netchecks) {$lists = URI_on_URIBL($url);} else {$lists = false;}
359 if (! $lists) {
360 $rows=0; $srows=0; $testurl=$safeurl;
361 if (preg_match("/\/$/", $testurl)) $testurl = rtrim($testurl,"/");
362 $req = "SELECT * FROM $dbtable WHERE url = '$testurl/';";
363 $res = mysql_query($req);
364 $srows = @mysql_num_rows($res) or $srows = 0;
365 if ($srows == 0) {
366 $req = "SELECT * FROM $dbtable WHERE url = '$testurl';";
367 $res = mysql_query($req);
368 $rows = @mysql_num_rows($res) or $rows = 0;
369 }
370 if ($rows == 0 && $srows == 0) {
371 do {
372 $req ="INSERT INTO $dbtable (id, url, adddate, addip) ";
373 $req .= "VALUES ('', '$safeurl', NOW(), '$remote');";
374 if (mysql_query($req)) {
375 $decimal = mysql_insert_id();
376 }
377 else {
378 die_HTML($svcname, "Error: Database failure.");
379 }
380 $reserved_id = on_Reserve($decimal);
381 if ($reserved_id) {
382 // Delete this record so it doesn't override the reserved ID. (?)
383 $req = "DELETE FROM $dbtable WHERE id = '$decimal';";
384 $res = mysql_query($req) or die_HTML($svcname, "Error: Database failure.");
385 }
386 } while ($reserved_id);
387 }
388 else {
389 // Return existing ID for this duplicate request
390 $decimal = mysql_result($res, 0, "id");
391 }
392 $sexatrigesimal = base_convert($decimal, 10, 36);
393 $address = "http://" . $_SERVER['HTTP_HOST'] . $self;
394 if (! $FOFMethod) $address .= "?i="; // We need the parameter tag
395 $address .= $sexatrigesimal; // Append the Base-36 ID to the URL
396 display_HTML("", "save", "", $url, $address);
397 }
398 else {
399 display_HTML("HTTP/1.0 403 Forbidden", "", "Error: Submitted URL (" . $url . ") is listed in " . $lists . ". You may not create a " . $svcname . " link for it.");
400 }
401 }
402 }
403}
404
405/**
406 *
407 */
408function PolicyBan_ID($sexatrigesimal) {
409 global $dbtable, $svcname;
410
411 // First, convert unsafe user input sexatrigesimal to decimal, which will be safe.
412 $decimal = base_convert ($sexatrigesimal, 36, 10);
413
414 $req = "SELECT * FROM $dbtable WHERE id = '$decimal';";
415 $res = mysql_query($req) or die_HTML($svcname, "Error: Query failed");
416
417 $rows = mysql_num_rows($res);
418 if (($rows != 0) && (mysql_result($res, 0, "url") != "")) {
419 $req ="update $dbtable set status='4' where id='$decimal';";
420 $res = mysql_query($req);
421 die_HTML($svcname, "ID: " . $sexatrigesimal . " banned for policy violation.", $code="HTTP/1.0 200 OK");
422 }
423}
424
425/**
426 * Looks up given ID in the database and redirects, displays template, or
427 * displays error page. Expects the ID to be a sexatrigesimal (Base-36) number,
428 * which is the format used by TightURLs.
429 *
430 * We convert the ID to decimal before looking it up in the database, as the
431 * ID field is a MySQL autoincrement decimal value.
432 */
433function lookup_ID($sexatrigesimal) {
434 global $dbtable, $svcname;
435
436 // First, convert unsafe user input sexatrigesimal to decimal, which will be safe.
437 $decimal = base_convert ($sexatrigesimal, 36, 10);
438
439 $req = "SELECT * FROM $dbtable WHERE id = '$decimal';";
440 $res = mysql_query($req) or die_HTML($svcname, "Error: Query failed");
441
442 $rows = mysql_num_rows($res);
443 if (($rows != 0) && (mysql_result($res, 0, "url") != "")) {
444 // Change this logic to display a templated page instead?
445 switch (mysql_result($res, 0, "status")) {
446 case 5:
447 display_HTML("HTTP/1.0 403 Forbidden", "complaints");
448 return;
449 break;
450 case 4:
451 display_HTML("HTTP/1.0 403 Forbidden", "policy");
452 return;
453 break;
454 case 3:
455 display_HTML("HTTP/1.0 403 Forbidden", "blacklist");
456 return;
457 break;
458 default:
459 $url = stripslashes(mysql_result($res, 0, "url"));
460 break;
461 }
462 $req ="update $dbtable set lasthit=NOW(), hits=hits+1 where id='$decimal';";
463 $res = mysql_query($req);
464 header("HTTP/1.0 301 Moved Permanently");
465 header("Location: $url");
466 }
467 elseif (! on_Reserve($decimal)) { // Not found, Not on reserve
468 display_HTML("HTTP/1.0 404 Not Found", "", "Error: That " . $svcname . " ID is not in our database.");
469 }
470 else { // It's a(n implied) Reserved URL
471 // Is this a template or an API?
472 $sexatrigesimal = strtolower($sexatrigesimal);
473 switch ($sexatrigesimal) {
474 case "rest":
475 api_REST();
476 break;
477 case "xmlrpc":
478 api_XMLRPC();
479 break;
480 case "soap":
481 api_SOAP();
482 break;
483 default:
484 display_HTML("", $sexatrigesimal);
485 }
486 }
487}
488
489function api_REST() {
490 die_HTML($svcname, "Error: REST API not implemented yet.", "HTTP/1.0 501 Not Implemented");
491}
492
493function api_XMLRPC() {
494 die_HTML($svcname, "Error: XML-RPC API not implemented yet.", "HTTP/1.0 501 Not Implemented");
495}
496
497function api_SOAP() {
498 die_HTML($svcname, "Error: SOAP API not implemented yet.", "HTTP/1.0 501 Not Implemented");
499}
500
501/**
502 * Display HTML page using template and template variables.
503 *
504 * Reads in the main system template file (tighturl.tmpl) into $html .
505 *
506 * $code
507 * HTTP 1.0 status code and message.
508 *
509 * $template
510 * Checks for the existence of a subtemplate named tighturl.$template.tmpl
511 * and replaces template variable $HTML in the main template tighturl.tmpl
512 * with the contents of tighturl.$template.tmpl if any.
513 *
514 * Then any remaining $HTML from the only or inner template is replaced by $content,
515 * along with $url, $tighturl, and $input. A variety of other replacements are
516 * made using various global variables.
517 *
518 * $content
519 * HTML content to be replace template variable $HTML
520 *
521 * $url
522 * URL submitted to TightURL
523 *
524 * $tighturl
525 * TightURL generated for $url
526 *
527 * $input
528 * When submitted URL does not validate it is passed back as $input
529 *
530 * Template variables are words in all capital letters that start with a
531 * $ symbol, such as $TEMPLATEVARIABLE. TightURL now supports at least
532 * 20 template variables. At runtime, these template variables are replaced
533 * by program variables.
534 * - $HTML : HTML passed into the function as $input by the program or an inner template
535 * - $PARM : Parameter tag when not using 404-Method
536 * - $URL : URL submitted to TightURL
537 * - $URLLEN : Length of the submitted URL
538 * - $TIGHTURL : TightURL generated for the submitted URL
539 * - $TIGHTURLLEN : Length of generated TightURL
540 * - $DIFF : Difference in length between submitted and TightURLs
541 * - $INPUT : Bad input URL being passed back to output form
542 * - $SVCNAME : Name of the TightURL service
543 * - $HEADCOLOR : Color of the H1 Header tag
544 * - $TABLECOLOR : Color of the table containing URL input field
545 * - $TAGLINE : Tagline of the TightURL service
546 * - $CPASTTEXT : Capitalized past-tense word for tightening URLs
547 * - $PASTTEXT : Non-Capitalized past-tense word for tightening URLs
548 * - $CVERBTEXT : Capitalized action word for tightening URLs
549 * - $VERBTEXT : Non-Capitalized action word for tightening URLs
550 * - $COPYRIGHT : Copyright duration string generated from $copystart global variable,
551 * will be current 4-digit year if $copystart not defined.
552 * - $COPYRIGHTHOLDER : Name of copyright holder
553 * - $URLBLS : HTML string of URIBLs TightURL is checking
554 * - $HOST : Hostname TightURL is running on
555 * - $SELF : Name TightURL is invoked as
556 */
557function display_HTML ($code, $template, $content="", $url="", $tighturl="", $input="") {
558 global $svcname, $verbtext, $pasttext, $tagline, $uribls, $parm,
559 $headcolor, $tablecolor, $copyright, $copyrightholder, $self, $BB2;
560
561// $url = htmlspecialchars($url);
562 if ($code="") $code = "HTTP/1.0 200 OK";
563 if (preg_match("/\/$/", $template)) $template = rtrim($template,"/");
564 if (file_exists("tighturl.tmpl")) {
565 $html = file_get_contents("tighturl.tmpl");
566 if (($template != "") && file_exists("tighturl." . $template . ".tmpl")) {
567 $template = file_get_contents("tighturl." . $template . ".tmpl");
568 $html = preg_replace("/\\\$HTML/", $template, $html);
569 }
570 elseif ($template != "") {
571 die_HTML($svcname, "Error: Template file tighturl." . $template . ".tmpl cannot be found.");
572 }
573 if (substr($content, 0, 6) == "Error:") {
574 $content = preg_replace("/Error:/", "<big><font color='red'>Error:", $content)."</font></big>";
575 }
576 if ($content) $content .= "<br />\n";
577 // Always replace longer similar tokens before shorter ones. Things won't work the
578 // way you expect if you replace $URL first, and then replace $URLMORELETTERS.
579 $html = preg_replace("/\\\$HTML/", $content, $html);
580 $html = preg_replace("/\\\$PARM/", $parm, $html);
581 $html = preg_replace("/\\\$URLLEN/", strlen($url), $html);
582 $html = preg_replace("/\\\$URL/", htmlspecialchars(strip_tags($url), ENT_QUOTES), $html);
583 $html = preg_replace("/\\\$INPUT/", $input, $html);
584 $html = preg_replace("/\\\$TIGHTURLVER/", VERSION, $html);
585 $html = preg_replace("/\\\$TIGHTURLLEN/", strlen($tighturl), $html);
586 $html = preg_replace("/\\\$TIGHTURL/", $tighturl, $html);
587 $html = preg_replace("/\\\$DIFF/", strlen($url)-strlen($tighturl), $html);
588 $html = preg_replace("/\\\$SVCNAME/", $svcname, $html);
589 $html = preg_replace("/\\\$HEADCOLOR/", $headcolor, $html);
590 $html = preg_replace("/\\\$TABLECOLOR/", $tablecolor, $html);
591 $html = preg_replace("/\\\$TAGLINE/", $tagline, $html);
592 $html = preg_replace("/\\\$CPASTTEXT/", $pasttext, $html);
593 $html = preg_replace("/\\\$PASTTEXT/", strtolower($pasttext), $html);
594 $html = preg_replace("/\\\$VERBTEXT/", strtolower($verbtext), $html);
595 $html = preg_replace("/\\\$CVERBTEXT/", $verbtext, $html);
596 $html = preg_replace("/\\\$COPYRIGHTHOLDER/", $copyrightholder, $html);
597 $html = preg_replace("/\\\$COPYRIGHT/", $copyright, $html);
598 $html = preg_replace("/\\\$URIBLS/", $uribls, $html);
599 $html = preg_replace("/\\\$HOST/", $_SERVER['HTTP_HOST'], $html);
600 $html = preg_replace("/\\\$SELF/", $self, $html);
601 $html = preg_replace("/\\\$__/", "$", $html); // Template Variables shown as text instead of substituted
602 if (preg_match("|<title>(.*)</title>|is", $html, $matches))
603 $html = preg_replace("|<title>(.*)</title>|is", "<title>" . strip_tags($matches[1]) . "</title>", $html);
604 if ($BB2) {
605 $html = preg_replace("/\\\$BBSTATS/", bb2_insert_stats(), $html);
606 $bb2code = bb2_insert_head();
607 if (preg_match("|<head>(.*)</head>|is", $html, $matches))
608 $html = preg_replace("|<head>(.*)</head>|is", "<head>\n" . $bb2code . $matches[1] . "</head>", $html);
609 }
610 else {
611 $html = preg_replace("/\\\$BBSTATS/", "", $html);
612 }
613 header($code);
614 echo $html;
615 }
616 else {
617 die_HTML($svcname, "Error: <big><font color='red'>Error: TightURL Redirection service (" . $svcname . ") site template not found.</font></big>");
618 }
619}
620
621/**
622 * Die in an HTML-friendly way, without the benefit of a template.
623 * Use display_HTML to "die" using the TightURL site template.
624 */
625function die_HTML($svcname, $errmsg, $code="HTTP/1.0 500 Internal Server Error") {
626
627 header($code);
628 echo "<html>\n <head>\n <title>" . $svcname . "</title>\n </head>\n <body>\n";
629 echo " " . $errmsg . "<br />\n";
630 echo " </body>\n</html>";
631 die;
632}
633
634/**
635 * Verifies the existence and accessibility of a resource in a given URL.
636 *
637 * Returns FALSE if the resource does not exist or cannot be accessed using
638 * supplied authentication information, else returns the resolved and verified
639 * URL. Given URL is returned as resolved to itself if $netchecks are off.
640 *
641 * Will recurse through redirection chains up to 12 times by default. This
642 * value is preferably selectable by the user in a configuration screen
643 * somewhere, but probably should not be lower than 12, as attempts are made
644 * to detect HTML and JavaScript redirects in addition to HTTP redirects, and
645 * a dozen redirects to find the end is quite possible.
646 *
647 * Unfortunately Google's GFE server erroneously returns 404 errors when
648 * they should be returning something like a 405, making it impossible to
649 * use HTTP HEAD to verify the existence of resources front-ended by GFE.
650 * Additionally Amazon throws a 405 attempting to HEAD some of their resources
651 * so this function does not attempt to use HEAD at all.
652 *
653 * BUG: Presently only does HTTP
654 *
655 */
656//function Resolve_URL ($url, &$resolvedchain=array(""), $chainlen=12) {
657function Resolve_URL ($url, &$resolvedchain, $chainlen=12) {
658// $resolvedchain = array($url);
659
660 if (! isset($resolvedchain)) $resolvedchain=array("");
661
662 global $netchecks;
663
664 if (! $netchecks) {
665 // If network checks off, accept submitted URL as resolved.
666 return($url);
667 }
668
669 $parsed = parse_url($url);
670
671 $pre = isset($parsed['scheme']) ? $parsed['scheme'].':'.((strtolower($parsed['scheme']) == 'mailto') ? '' : '//') : '';
672 $pre .= isset($parsed['user']) ? $parsed['user'].(isset($parsed['pass']) ? ':'.$parsed['pass'] : '').'@' : '';
673 $pre .= isset($parsed['host']) ? $parsed['host'] : '';
674 $pre .= isset($parsed['port']) ? ':'.$parsed['port'] : '';
675 if(isset($parsed['path']))
676 $post = (substr($parsed['path'], 0, 1) == '/') ? $parsed['path'] : ('/'.$parsed['path']);
677 else
678 $post = "/";
679 $post .= isset($parsed['query']) ? '?'.$parsed['query'] : '';
680 $post .= isset($parsed['fragment']) ? '#'.$parsed['fragment'] : '';
681
682 $resolved = false;
683
684 // Change this to support all protocols TightURL supports, not just HTTP
685 if (! isset($parsed['port']) || $parsed['port'] == 0) $parsed['port'] = 80;
686//if($connection = @fsockopen ($parsed['host'], $parsed['port'], $errno, $errstr, 5)) {
687 $ip = gethostbyname($parsed['host']); // This is supposed to avoid unnecessary DNS lookups
688 if($connection = @fsockopen ($ip, $parsed['port'], $errno, $errstr, 5)) {
689 stream_set_timeout($connection, 5);
690 // HTTP send Connection: Close so we don't have to wait
691 // Google's GFE handling of HEAD is broken, and Amazon returns 405 on HEAD so had to use GET
692 fwrite($connection, "GET ".$post." HTTP/1.0\r\nHost: ".$parsed['host']."\r\nConnection: Close\r\n\r\n");
693 while (!feof($connection)) {
694 $line_read=fgets($connection);
695 if ($line_read == "") break; //blank line is header delimiter, if you see it you're done here
696 //Fix: change this and start parsing the body for HTML-based redirections.
697
698 if (preg_match("/HTTP\/\S* +(\S*) /", $line_read, $matches)) { // Look for certain HTTP status codes
699 switch ($matches[1]) {
700 case 200: // Ok, we have a final destination (as far as HTTP is concerned)
701 case 201: // Created, we have a final destination
702 case 202: // Accepted, we have a final destination
703 case 203: // Non-authoritative reply, we have a final destination
704 case 204: // No content, we have a final destination
705 case 205: // Reset content, we have a final destination
706 case 206: // Partial content, we have a final destination
707 case 207: // Multi-status, we have a final destination
708 case 304: // Not Modified (this is ok)
709 case 401: // Authorization required (this is ok)
710 case 402: // Payment required (this is ok)
711 case 403: // Forbidden (but also ok)
712 case 405: // Method not allowed (but also ok)
713 case 406: // Not acceptable (acceptable here unless someone tells us otherwise)
714 case 409: // Conflict (acceptable unless someone tells us otherwise)
715 case 421: // Too many connections (fail ok)
716 case 426: // Use TLS (fail ok)
717 case 500: // Internal server error (fail ok)
718 case 502: // Bad gateway (fail ok)
719 case 503: // Service unavailable (fail ok)
720 case 504: // Gateway timeout (fail ok)
721 case 505: // HTTP version not supported (fail ok)
722 case 509: // Bandwidth exceeded pseudo code (fail ok)
723 $resolved = $url;
724 $resolvedchain[] = $url;
725 break 2;
726 case 300:
727 case 301:
728 case 302:
729 case 307:
730 break;
731 case 404: // Not found
732 case 408: // Request timeout (this URL will never work again)
733 case 410: // Gone (and not coming back)
734 break 2;
735 default:
736 $resolved = $url;
737 $resolvedchain[] = $url;
738 break 2;
739 }
740 }
741
742 // If this is a redirect (300, 301, 302, 307), follow it if the chain isn't too long
743 if (preg_match("/Location: (.*)\r\n/", $line_read, $matches)) {
744 fclose($connection);
745 $connection = false;
746 $resolvedchain[] = $url;
747 if ($chainlen > 0 ) $resolved = Resolve_URL($matches[1], $resolvedchain, $chainlen - 1);
748 break;
749 }
750
751 }
752 // Parse body here?
753 if ($connection) fclose($connection);
754 }
755 return($resolved);
756}
757?>

Archive Download this file

Branches