import System.Data; import System.Windows.Forms; public class Parser { /////////////////////////////////// /////////////////////////////////// // START CONFIGURABLE PARAMETERS // /////////////////////////////////// /////////////////////////////////// // SET THIS TO 1 (ONE) TO REMOVE HTML TAGS, OR 0 (ZERO) TO LEAVE THEM IN public static var iStripTags = 1; // THIS IS A WORD THAT IF FOUND THE CONTENT PEICE WONT BE ADDED // SET THIS TO "" OR SOMETHING LIKE "ABCDEFG01010101" TO DISABLE public static var sDontShow = "Google Home"; // HTML IS SPLIT INTO PEICES DEPENDING ON THIS TAG // THIS MEANS PULL PEICES OF CONTENT BETWEEN
TAGS BY DEFAULT
public static var sTag = "p";
// URL OF YOUR CHOICE, CAN BE USED AS {URL} IN CONTENT TEMPLATE
public static var sContentURL = "http://your.domain.com/";
// TITLE OF YOUR CHOICE, OR "{KEYWORD}"
// CAN BE USED IN CONTENT TEMPLATE AS {TITLE}
public static var sContentTitle = "{KEYWORD}";
////////////////////////////////////
////////////////////////////////////
// END OF CONFIGURABLE PARAMETERS //
////////////////////////////////////
////////////////////////////////////
// DO NOT MODIFY BELOW THIS LINE! //
////////////////////////////////////
public static var ds : DataSet = new DataSet();
public static function SetupDataSet() {
var dt : DataTable = new DataTable("Content");
dt.Columns.Add("Body");
dt.Columns.Add("Date");
dt.Columns.Add("Title");
dt.Columns.Add("URL");
ds.Tables.Add(dt);
}
public static function AddRow(sBody) {
var d = new Date();
var gd = d.getFullYear()+"-"+d.getMonth()+"-"+d.getDate();
if (ds.Tables.Count > 0) {
var dr : DataRow = ds.Tables["Content"].NewRow();
dr["Body"] = sBody;
dr["Date"] = gd;
dr["Title"] = sContentTitle;
dr["URL"] = sContentURL;
ds.Tables["Content"].Rows.Add(dr);
} else {
MessageBox.Show("Table not initialized.", "JScript Error");
}
}
public static function GetExpressionForTagContents (sTagName)
{
var sPatternTag;
if (sTagName == "!")
sPatternTag = "";
else if (sTagName.localeCompare("!doctype") == 0)
sPatternTag = "";
else if (sTagName.localeCompare("p") == 0)
sPatternTag = "<\\s*(" + sTagName + ")(>|\\s+[^>]*>)(.*(?!<\\s*/?\\s*(\\1)(>|\\s+)))";
else
if (sTagName.localeCompare("br") == 0)
sPatternTag = "
";
else
sPatternTag = "<\\s*(" + sTagName + ")(>|\\s+[^>]*>)(.*?)<\\s*/\\s*\\1\\s*>";
return(sPatternTag);
}
public static function GetTagByName (sTagName, sSource)
{
var sPatternTag = GetExpressionForTagContents(sTagName);
var sPatternTagNoClose = "<" + sTagName + "(>|\\s+[^>]*>)[^<]";
var p;
var re = /&[a-z0-9]+;/ig;
sSource = sSource.replace(re, " ");
re = new RegExp(sPatternTag, "mig");
if ((p = re.exec(sSource)))
while (p) {
if (sDontShow.length > 0 && p[3].toLowerCase().indexOf(sDontShow.toLowerCase()) == -1) {
if (iStripTags == 1)
AddRow(StripHTMLTags(p[3]));
else
AddRow(p[3]);
}
p = re.exec(sSource);
}
}
public static function StripHTMLTags(s) {
if (s == null) return(s);
if (s.Length == 0) return(s);
s = "" + s + "";
return s.replace(new RegExp("(\<[\/]?)([^>]*\>)", "gim"), " ");
}
public static function Parse(sInput) : DataSet {
SetupDataSet();
GetTagByName(sTag, sInput);
return(ds);
}
}