class MiniCrawler {
// , static string FindLink(string htmlstr,
ref int startloc) {
Int i;
int start, end; string uri = null;
i = htmlstr.IndexOf("href=\"http", startloc,
StringComparison.OrdinallgnoreCase);
if(i!= -1) {
start = htmlstr. IndexOf (1111, i) + 1; end = htmlstr. IndexOf (1111, start); uri = htmlstr.Substring(start, end-start); startloc = end;
}
Return uri;
}
static void Main(string[] args) { string link = null; string str; string answer;
int curloc; // if(args.Length!= 1) {
Console.WriteLine (": MiniCrawler <uri>"); return;
}
string uristr = args[0]; // URI HttpWebResponse resp = null; try { do {
Console.WriteLine (" 11 + uristr);
// WebRequest URI. HttpWebRequest req = (HttpWebRequest)
WebRequest.Create(uristr);
uristr = null; // URI
// , resp = (HttpWebResponse) req.GetResponse();
Stream istrm = resp.GetResponseStream ();
// StreamReader. StreamReader rdr = new StreamReader(istrm);
// , str = rdr.ReadToEndO;
curloc = 0;
do {
// URI , link = FindLink(str, ref curloc);
if(link!= null) {
Console.WriteLine(" : " + link);
Console.Write(" , , ?"); answer = Console.ReadLine();
if(string.Equals(answer, "",
StringComparison.OrdinallgnoreCase)) {
uristr = string.Copy(link); break;
} else if(string.Equals(answer, "B",
StringComparison.OrdinallgnoreCase)) { break;
} else if(string.Equals(answer, "",
StringComparison.OrdinallgnoreCase)) {
Console.WriteLine(" .");
}
} else {
Console.WriteLine(" ."); break;
}
} while(link.Length > 0);
// , if(resp!= null) resp.Close();
} while(uristr!= null);
} catch(WebException exc) {
Console.WriteLine(" : " + exc.Message +
"\ : " + exc.Status);
} catch(ProtocolViolationException exc) {
Console.WriteLine(" : " + exc.Message);
} catch(UriFormatException exc) {
Console.WriteLine(" URI: " + exc.Message);
} catch(NotSupportedException exc) {
Console.WriteLine(" : " + exc.Message);
} catch(IOException exc) {
Console.WriteLine(" -: " + exc.Message);
} finally {
if(resp!= null) resp.Close();
Console.WriteLine(" MiniCrawler.");
}
}
, www.McGraw-Hill. com. , .
|
|
http://mcgraw-hill.com
: http://sti.mcgraw-hill.com:9000/cgi-bin/query?mss=search&pg=aq , , ? .
: http: //investor.mcgraw-hill. com/phoenix. zhtml?c=96562&p=irol-irhome ,' , ?
http://investor.mcgraw-hill.com/phoenix. zhtml?c=96562&p=irol-irhome
: http://www.mcgraw-hill.com/index.html
, , ?
http://www.mcgraw-hill.com/index.html
: http://sti.mcgraw-hill.com:9000/cgi-bin/query?mss=search&pg=aq , , ? MiniCrawler.
MiniCrawler. URI . Main () URI uristr. URI , uristr , , URI . . , GetResponseStream () StreamReader. ReadToEnd (), .
. FindLink (), MiniCrawler. , . FindLink () htmlstr startloc . , startloc ref. FindLink () , href="http, . , URI uri, startloc . startloc ref, Main (), , . uri. , , , .
, FindLink (), , Main (), . : , <>, , <>, , <>. <>, . . , .
MiniCrawler, , , . . , , , , , . , , . , URI URL , , Stack. , .
|
|
WebClient
WebClient. , WebClient WebRequest WebResponse , . WebClient , , .