<?php
if ($_POST)
{
$ch = curl_init();
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
echo "
<table border='1'>
<thead>
<tr>
<th>URL</th>
<th>Title</th>
<th>Description</th>
<th>Keywords</th>
</tr>
</thead>
<tbody>";
foreach (explode(PHP_EOL, $_POST['textarea']) as $url)
{
$url = trim($url);
if (!preg_match("/^https?:\/\//i", $url))
{
continue;
}
else
{
curl_setopt($ch, CURLOPT_URL, $url);
$html = curl_exec($ch);
if (!$html)
continue;
$data = parse_page($html);
// secure the data for printing
$url = htmlentities($url, ENT_QUOTES);
foreach ($data as $key => $value)
$data[$key] = htmlentities($value, ENT_QUOTES, "UTF-8");
echo "<tr>";
echo "<td>{$url}</td>";
echo "<td>{$data['title']}</td>";
echo "<td>{$data['description']}</td>";
echo "<td> {$data['keywords']}</td>";
echo "</tr>";
}
}
curl_close($ch);
echo "
</tbody>
</table>";
}
function parse_page($html)
{
/* get page's title */
preg_match("/<title>(.+)<\/title>/siU", $html, $matches);
$title = $matches ? $matches[1] : null;
/* get page's keywords */
$re="<meta\s+name=['\"]??keywords['\"]??\s+content=['\"]??(.+)['\"]??\s*\/?>";
preg_match("/$re/siU", $html, $matches);
$keywords = $matches ? $matches[1] : null;
/* get page's description */
$re="<meta\s+name=['\"]??description['\"]??\s+content=['\"]??(.+)['\"]??\s*\/?>";
preg_match("/$re/siU", $html, $matches);
$desc = $matches ? $matches[1] : null;
/* parse links */
$re="<a\s[^>]*href\s*=\s*(['\"]??)([^'\">]*?)\\1[^>]*>(.*)<\/a>";
preg_match_all("/$re/siU", $html, $matches);
$links = $matches ? $matches[2] : null;
return array(
"title" => $title,
"description" => $desc,
"keywords" => $keywords,
);
}
?>
<form method="post" action="?">
<textarea name="textarea" cols="45" rows="5"><?php echo @htmlentities($_POST['textarea'], ENT_QUOTES, "UTF-8")?></textarea><br />
<input type="submit" name="button" id="button" value="Submit" />
</form>