crawler4j compile error with class CrawlConfig - VariableDeclaratorId Expected -
the code not compile. changed jre 1.7. compiler not highlight class in eclipse , crawlconfig appears fail in compiler. class should run command line in linux.
any ideas?
compiler error - description resource path location type syntax error on token "crawlstoragefolder", variabledeclaratorid expected after token zeocrawler.java /zeowebcrawler/src/main/java/com/example line 95 java problem
import edu.uci.ics.crawler4j.crawler.crawlconfig; import edu.uci.ics.crawler4j.crawler.crawlcontroller; import edu.uci.ics.crawler4j.crawler.page; import edu.uci.ics.crawler4j.crawler.webcrawler; import edu.uci.ics.crawler4j.fetcher.pagefetcher; import edu.uci.ics.crawler4j.parser.htmlparsedata; import edu.uci.ics.crawler4j.robotstxt.robotstxtconfig; import edu.uci.ics.crawler4j.robotstxt.robotstxtserver; import edu.uci.ics.crawler4j.url.weburl; public class controller { string crawlstoragefolder = "/data/crawl/root"; int numberofcrawlers = 7; crawlconfig config = new crawlconfig(); config.setcrawlstoragefolder(crawlstoragefolder); pagefetcher pagefetcher = new pagefetcher(config); robotstxtconfig robotstxtconfig = new robotstxtconfig(); robotstxtserver robotstxtserver = new robotstxtserver(robotstxtconfig, pagefetcher); crawlcontroller controller = new crawlcontroller(config, pagefetcher, robotstxtserver); controller.addseed("http://www.senym.com"); controller.addseed("http://www.merrows.co.uk"); controller.addseed("http://www.zeoic.com"); controller.start(mycrawler.class, numberofcrawlers); } public urlconnection connecturl(string strurl) { urlconnection conn =null; try { url inputurl = new url(strurl); conn = inputurl.openconnection(); int test = 0; }catch(malformedurlexception e) { system.out.println("please input valid url"); }catch(ioexception ioe) { system.out.println("can not connect url"); } return conn; } public static void updatelongurl() { // system.out.println("short url: "+ shorturl); // urlconn = connecturl(shorturl); // urlconn.getheaderfields(); // system.out.println("original url: "+ urlconn.geturl()); /* connecturl - function take valid url , return url object representing url address. */ } public class mycrawler extends webcrawler { private pattern filters = pattern.compile(".*(\\.(css|js|bmp|gif|jpe?g" + "|png|tiff?|mid|mp2|mp3|mp4" + "|wav|avi|mov|mpeg|ram|m4v|pdf" + "|rm|smil|wmv|swf|wma|zip|rar|gz))$"); /** * should implement function specify whether * given url should crawled or not (based on * crawling logic). */ @override public boolean shouldvisit(weburl url) { string href = url.geturl().tolowercase(); return !filters.matcher(href).matches() && href.startswith("http://www.ics.uci.edu/"); } /** * function called when page fetched , ready * processed program. */ @override public void visit(page page) { string url = page.getweburl().geturl(); system.out.println("url: " + url); if (page.getparsedata() instanceof htmlparsedata) { htmlparsedata htmlparsedata = (htmlparsedata) page.getparsedata(); string text = htmlparsedata.gettext(); string html = htmlparsedata.gethtml(); list<weburl> links = htmlparsedata.getoutgoingurls(); system.out.println("text length: " + text.length()); system.out.println("html length: " + html.length()); system.out.println("number of outgoing links: " + links.size()); } } }
this pretty strange error since code seems clean. try start eclipse -clean option on command line.
Comments
Post a Comment