win nt下调用nutch的脚本,可实现自动运行咯。

现在正对搜索引擎有兴趣,
我的搜索基于nutch,并结合了ICTCLAS,分词和速度都不错。
这样做可以不用crywin来模拟linux
下面是在win nt下调用nutch的脚本,
你可以自己改一下,这样就可以很方便的实现自动运行了。
有兴趣的朋友可以用一下,大大方便了操作。


nutch.bat
@cmd /V:on /c %~dp0nutch1.bat %*

nutch1.bat
@echo on
rem *********************************************************************
rem * A script to launch nutch on Windows 2000/XP System.
rem *
rem * Written by babatu
rem * babatu@gmail.com blog: blog.babatu.com
rem *
rem * Because delayed environment is used, cmd /V:on should be used to
rem * run this script.
rem ****************************** ****************************** *****
if "%OS%"=="Windows_NT" @setlocal
if "%OS%"=="WINNT" @setlocal

if "%1" == "" goto :msg
goto :begin
:msg
echo "Usage: nutch COMMAND"
 echo "where COMMAND is one of:"
 echo "  crawl             one-step crawler for intranets"
 echo "  readdb            read / dump crawl db"
 echo "  readlinkdb        read / dump link db"
 echo "  inject            inject new urls into the database"
<script type="text/javascript"> </script>  echo "  generate          generate new segments to fetch"
 echo "  fetch             fetch a segment's pages"
 echo "  parse             parse a segment's pages"
 echo "  segread           read / dump segment data"
 echo "  updatedb          update crawl db from segments after fetching"
 echo "  invertlinks       create a linkdb from parsed segments"
 echo "  index             run the indexer on parsed segments and linkdb"
 echo "  merge             merge several segment indexes"
 echo "  dedup             remove duplicates from a set of segment indexes"
 echo "  plugin            load a plugin and run one of its classes main()"
 echo "  server            run a search server"
 echo " or"
 echo "  CLASSNAME         run the class named CLASSNAME"
 echo "Most commands print help when invoked w/o parameters."
pause
goto :end

:begin
rem %~dp0 这个脚本的扩展path   ( expanded pathname of the current script under NT)
set DEFAULT_NUTCH_HOME=%~dp0..
rem set DEFAULT_NUTCH_HOME=..

if "%NUTCH_HOME%"=="" set NUTCH_HOME=%DEFAULT_NUTCH_HOME %
set DEFAULT_NUTCH_HOME=""
rem 设置默认DEFAULT_NUTCH_HOME

echo %NUTCH_HOME%

rem set _USE_CLASSPATH=yes

if "%CLASSPATH%"=="" ( set CLASSPATH=%JAVA_HOME%/lib /tools.jar) ELSE set
CLASSPATH=%CLASSPATH%;%JAVA _HOME%/lib/tools.jar
<script type="text/javascript"> </script> set CLASSPATH=%CLASSPATH%;%NUTCH _HOME%/conf;
echo %CLASSPATH%
echo before other

rem for developers, add plugins, job & test code to CLASSPATH
if exist %NUTCH_HOME%/build/plugins set
CLASSPATH=%CLASSPATH%;%NUTCH _HOME%/build

for /R %NUTCH_HOME%/build %%i in (nutch*.job) do set
CLASSPATH=!CLASSPATH!;%%i
if exist %NUTCH_HOME%/build/test /classes set
CLASSPATH=%CLASSPATH%;%NUTCH _HOME%/build/test/classes

rem for releases, add Nutch job to CLASSPATH
for /R %NUTCH_HOME% %%i in (nutch*.job) do set CLASSPATH=!CLASSPATH!;%%i
rem add plugins to classpath
if exist %NUTCH_HOME%/plugins set CLASSPATH=%CLASSPATH%;%NUTCH _HOME%
rem add libs to CLASSPATH
for /R %NUTCH_HOME%/lib %%f in (*.jar) do set CLASSPATH=!CLASSPATH!;%%f


echo %CLASSPATH%

rem translate command
if "%1"=="crawl" set CLASS=org.apache.nutch.crawl.Crawl
if "%1"=="inject" set   CLASS=org.apache.nutch.crawl.Injecto r
if "%1"=="generate" set   CLASS=org.apache.nutch.crawl.Generat or
if "%1"=="fetch" set   CLASS=org.apache.nutch.fetcher .Fetcher
if "%1"=="parse" set   CLASS=org.apache.nutch.parse.ParseSe gment
if "%1"=="readdb" set   CLASS=org.apache.nutch.crawl.CrawlDb Reader
if "%1"=="readlinkdb" set   CLASS=org.apache.nutch.crawl.LinkDbR eader
if "%1"=="segread" set   CLASS=org.apache.nutch.segment .SegmentReader
if "%1"=="updatedb" set   CLASS=org.apache.nutch.crawl.CrawlDb
if "%1"=="invertlinks" set   CLASS=org.apache.nutch.crawl.LinkDb
if "%1"=="index" set   CLASS=org.apache.nutch.indexer .Indexer
if "%1"=="dedup" set   CLASS=org.apache.nutch.indexer <script type="text/javascript"> </script> .DeleteDuplicates
if "%1"=="merge" set   CLASS=org.apache.nutch.indexer .IndexMerger
if "%1"=="plugin" set   CLASS=org.apache.nutch.plugin .PluginRepository
if "%1"=="server" set CLASS='
org.apache.nutch.searcher .DistributedSearch$Server'
if "%CLASS%"=="" set CLASS=%1

%JAVA_HOME%/bin/java -cp %CLASSPATH% %CLASS% %*


if "%OS%"=="Windows_NT" @endlocal
if "%OS%"=="WINNT" @endlocal

:end
评论 4
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值