NodeFilter 部分设置方法

本文介绍了一种使用DOM过滤器的方法,通过组合不同的过滤条件如标签名和属性来精确选取网页元素。这些技术对于爬虫开发和数据抓取尤为重要。

 NodeFilter titleFilter = new TagNameFilter("h1");
            NodeFilter contentFilter = new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "sohu_content"));
            NodeFilter newsdateFilter = new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("class", "c"));
            NodeFilter newsauthorFilter = new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "editUsr"));

package com.heytap.rjkf; import android.speech.tts.UtteranceProgressListener; import android.os.Bundle; import android.speech.tts.TextToSpeech; import android.app.Activity; import android.webkit.WebView; import android.webkit.WebViewClient; import android.content.res.Configuration; import android.widget.Button; import android.widget.Toast; import android.webkit.ValueCallback; import android.app.AlertDialog; import android.content.DialogInterface; import android.content.SharedPreferences; import android.graphics.Color; import android.os.Build; import android.view.View; import android.view.WindowManager; import java.util.Locale; import java.util.UUID; import android.os.Handler; import java.util.regex.Pattern; public class MainActivity extends Activity implements TextToSpeech.OnInitListener { private WebView mWebView; private TextToSpeech tts; private StringBuilder textBuilder = new StringBuilder(); private boolean isSpeaking = false; private boolean isPaused = false; private int currentSentenceIndex = 0; private int currentCharIndex = 0; private String[] sentences; private Handler handler = new Handler(); private static final String PREFS_NAME = "ReadProgress"; private static final String KEY_SENTENCE_INDEX = "sentenceIndex"; private static final String KEY_CHAR_INDEX = "charIndex"; private boolean shouldHighlight = false; private static final Pattern BIBLE_REF_PATTERN = Pattern.compile( "\\b(?:创|亚|玛|太|可|启|但|何)\\s*?[一二百]*\\d+([、,]\\s*\\d+)*([~下]*\\d*[上]*)?[节]?\\b|" + "\\s*?[一二十百]*\\d+([、,]\\s*\\d+)*([~下]*\\d*[上]*)?[节]?|" + "(引用文|参\\s*(?:创|但何)\\s*?[一二三九十百]*\\d+([、,]\\s*\\d+)*([~下]*\\d*[上]*)?)" ); private static final Pattern SENTENCE_DELIMITER = Pattern.compile("(?<=[ 。!??!])"); @Override protected void onCreate(Bundle savedInstanceState) { super.onCreate(savedInstanceState); setContentView(R.layout.activity_main); // 硬件加速设置 if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.HONEYCOMB) { // 启用硬件加速(如果支持的话) getWindow().setFlags( WindowManager.LayoutParams.FLAG_HARDWARE_ACCELERATED, WindowManager.LayoutParams.FLAG_HARDWARE_ACCELERATED ); } // 检查保存的进度 checkSavedProgress(); initTextToSpeech(); setupWebView(); loadWebContent("file:///android_asset/index.html"); setupButtons(); } private void initTextToSpeech() { tts = new TextToSpeech(this, this); setUtteranceProgressListener(); } private void checkSavedProgress() { SharedPreferences prefs = getSharedPreferences(PREFS_NAME, MODE_PRIVATE); int savedSentenceIndex = prefs.getInt(KEY_SENTENCE_INDEX, -1); int savedCharIndex = prefs.getInt(KEY_CHAR_INDEX, -1); if (savedSentenceIndex != -1 && savedCharIndex != -1) { new AlertDialog.Builder(this) .setTitle("继续朗读?") .setMessage("检测到上次未完成的朗读进度,是否继续?") .setPositiveButton("是", new DialogInterface.OnClickListener() { public void onClick(DialogInterface dialog, int which) { currentSentenceIndex = savedSentenceIndex; currentCharIndex = savedCharIndex; shouldHighlight = true; } }) .setNegativeButton("否", new DialogInterface.OnClickListener() { public void onClick(DialogInterface dialog, int which) { clearProgress(); } }) .setCancelable(false) .show(); } } private class JavaScriptInterface { @android.webkit.JavascriptInterface public void highlightSentence(int sentenceIndex, int charIndex) { runOnUiThread(() -> { try { String js = String.format("highlightSentence(%d, %d)", sentenceIndex, charIndex); mWebView.evaluateJavascript(js, null); } catch (Exception e) { Toast.makeText(MainActivity.this, "高亮错误: " + e.getMessage(), Toast.LENGTH_SHORT).show(); } }); } } private void setupWebView() { mWebView = findViewById(R.id.mWebView); // 设置背景色防止黑屏 mWebView.setBackgroundColor(Color.WHITE); // 确保WebView可见 mWebView.setVisibility(View.VISIBLE); mWebView.getSettings().setJavaScriptEnabled(true); mWebView.getSettings().setDomStorageEnabled(true); mWebView.addJavascriptInterface(new JavaScriptInterface(), "AndroidInterface"); // 硬件加速兼容处理 if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.KITKAT) { WebView.setWebContentsDebuggingEnabled(true); } // 对于某些设备,可能需要尝试软件渲染层 if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.HONEYCOMB) { mWebView.setLayerType(View.LAYER_TYPE_HARDWARE, null); // 如果黑屏问题仍然存在,尝试切换到软件渲染: // mWebView.setLayerType(View.LAYER_TYPE_SOFTWARE, null); } mWebView.setWebViewClient(new WebViewClient() { @Override public void onPageFinished(WebView view, String url) { super.onPageFinished(view, url); // 注入高亮JS injectHighlightJS(); // 文本提取(不修改DOM结构) extractTextFromWebPage(); // 如果需要恢复高亮 if (shouldHighlight) { highlightCurrentPosition(); shouldHighlight = false; } // 强制刷新WebView(解决一些渲染问题) view.postDelayed(() -> { if (view != null) { view.setVisibility(View.INVISIBLE); view.setVisibility(View.VISIBLE); } }, 100); } }); } private void injectHighlightJS() { String highlightJS = "javascript:(function() {" + " if (typeof window.highlightSentence !== 'function') {" + " window.highlightSentence = function(sentenceIndex, charIndex) {" + " console.log('Highlighting:', sentenceIndex, charIndex);" + " const allSpans = document.querySelectorAll('.sentence-highlight');" + " allSpans.forEach(span => {" + " span.classList.remove('sentence-highlight');" + " const text = span.textContent;" + " const parent = span.parentNode;" + " // 恢复原始文本节点" + " const textNode = document.createTextNode(text);" + " parent.replaceChild(textNode, span);" + " parent.normalize();" + " });" + "" + " const textNodes = [];" + " const treeWalker = document.createTreeWalker(document.body, NodeFilter.SHOW_TEXT, null, false);" + " while (treeWalker.nextNode()) {" + " textNodes.push(treeWalker.currentNode);" + " }" + "" + " if (sentenceIndex < textNodes.length) {" + " const node = textNodes[sentenceIndex];" + " const text = node.textContent;" + " if (text && text.length > 0) {" + " const parent = node.parentNode;" + " const span = document.createElement('span');" + " span.className = 'sentence-highlight';" + " span.style.backgroundColor = '#FFF59D';" + " parent.replaceChild(span, node);" + " span.appendChild(node);" + " span.scrollIntoView({ behavior: 'smooth', block: 'center' });" + " }" + " }" + " };" + " }" + "})()"; mWebView.evaluateJavascript(highlightJS, null); } private void extractTextFromWebPage() { // 不再修改DOM,而是通过TreeWalker提取文本 mWebView.evaluateJavascript("(function() {" + " var textContent = '';" + " var textNodes = [];" + " var walker = document.createTreeWalker(document.body, NodeFilter.SHOW_TEXT, {" + " acceptNode: function(node) {" + " // 跳过脚本和样式等" + " if (node.parentNode.tagName === 'SCRIPT' || node.parentNode.tagName === 'STYLE') {" + " return NodeFilter.FILTER_REJECT;" + " }" + " return NodeFilter.FILTER_ACCEPT;" + " }" + " }, false);" + " while (walker.nextNode()) {" + " var text = walker.currentNode.nodeValue.trim();" + " if (text) {" + " textNodes.push(text);" + " }" + " }" + " return textNodes.join('|');" + "})()", new ValueCallback<String>() { @Override public void onReceiveValue(String value) { runOnUiThread(() -> { try { if (value != null && !value.isEmpty() && !"null".equals(value)) { textBuilder.setLength(0); String rawText = value.replaceAll("^\"|\"$", ""); textBuilder.append(rawText); String filteredText = BIBLE_REF_PATTERN.matcher(textBuilder.toString()).replaceAll(""); sentences = SENTENCE_DELIMITER.split(filteredText); currentSentenceIndex = 0; currentCharIndex = 0; if (shouldHighlight) { highlightCurrentPosition(); shouldHighlight = false; } } else { Toast.makeText(MainActivity.this, "页面内容为空", Toast.LENGTH_SHORT).show(); } } catch (Exception e) { Toast.makeText(MainActivity.this, "文本处理错误: " + e.getMessage(), Toast.LENGTH_SHORT).show(); } }); } }); } private void highlightCurrentPosition() { if (mWebView != null) { String js = String.format("highlightSentence(%d, %d)", currentSentenceIndex, currentCharIndex); mWebView.evaluateJavascript(js, null); } } private void saveProgress() { SharedPreferences.Editor editor = getSharedPreferences(PREFS_NAME, MODE_PRIVATE).edit(); editor.putInt(KEY_SENTENCE_INDEX, currentSentenceIndex); editor.putInt(KEY_CHAR_INDEX, currentCharIndex); editor.apply(); } private void clearProgress() { SharedPreferences.Editor editor = getSharedPreferences(PREFS_NAME, MODE_PRIVATE).edit(); editor.remove(KEY_SENTENCE_INDEX); editor.remove(KEY_CHAR_INDEX); editor.apply(); } private void setupButtons() { Button speakButton = findViewById(R.id.speakButton); speakButton.setOnClickListener(v -> { if (sentences == null || sentences.length == 0) { Toast.makeText(MainActivity.this, "请先加载内容", Toast.LENGTH_SHORT).show(); return; } if (isPaused) { isPaused = false; speakFromCurrentPosition(); Toast.makeText(MainActivity.this, "朗读继续", Toast.LENGTH_SHORT).show(); } else { if (isSpeaking) { Toast.makeText(MainActivity.this, "正在朗读中", Toast.LENGTH_SHORT).show(); } else { currentSentenceIndex = 0; currentCharIndex = 0; speakFromCurrentPosition(); Toast.makeText(MainActivity.this, "朗读开始", Toast.LENGTH_SHORT).show(); } } }); Button stopButton = findViewById(R.id.stopButton); stopButton.setOnClickListener(v -> { stopSpeaking(); Toast.makeText(MainActivity.this, "朗读已停止", Toast.LENGTH_SHORT).show(); }); Button pauseButton = findViewById(R.id.pauseButton); pauseButton.setOnClickListener(v -> { if (isSpeaking) { tts.stop(); isSpeaking = false; isPaused = true; Toast.makeText(MainActivity.this, "朗读已暂停", Toast.LENGTH_SHORT).show(); } }); } private void stopSpeaking() { if (tts != null) { tts.stop(); } isSpeaking = false; isPaused = false; } private void setUtteranceProgressListener() { tts.setOnUtteranceProgressListener(new UtteranceProgressListener() { @Override public void onStart(String utteranceId) { isSpeaking = true; isPaused = false; } @Override public void onDone(String utteranceId) { handler.post(() -> { if (currentSentenceIndex < sentences.length) { currentCharIndex = sentences[currentSentenceIndex].length(); } handler.postDelayed(MainActivity.this::speakFromCurrentPosition, 100); }); } @Override public void onError(String utteranceId) { isSpeaking = false; handler.post(() -> Toast.makeText(MainActivity.this, "朗读错误", Toast.LENGTH_SHORT).show() ); } }); } private void speakFromCurrentPosition() { if (tts == null || isPaused || sentences == null) return; try { while (currentSentenceIndex < sentences.length && (sentences[currentSentenceIndex] == null || sentences[currentSentenceIndex].trim().isEmpty())) { currentSentenceIndex++; currentCharIndex = 0; } if (currentSentenceIndex >= sentences.length) { isSpeaking = false; Toast.makeText(this, "播放完成", Toast.LENGTH_SHORT).show(); return; } String currentSentence = sentences[currentSentenceIndex]; if (currentCharIndex >= currentSentence.length()) { currentSentenceIndex++; currentCharIndex = 0; speakFromCurrentPosition(); return; } String partToSpeak = currentSentence.substring(currentCharIndex); if (!partToSpeak.trim().isEmpty()) { Bundle params = new Bundle(); String utteranceId = UUID.randomUUID().toString(); params.putCharSequence(TextToSpeech.Engine.KEY_PARAM_UTTERANCE_ID, utteranceId); tts.speak(partToSpeak, TextToSpeech.QUEUE_FLUSH, params, utteranceId); currentCharIndex = currentSentence.length(); } else { currentSentenceIndex++; currentCharIndex = 0; speakFromCurrentPosition(); } } catch (Exception e) { Toast.makeText(this, "朗读异常: " + e.getMessage(), Toast.LENGTH_SHORT).show(); } } private void loadWebContent(String url) { try { mWebView.loadUrl(url); } catch (Exception e) { Toast.makeText(this, "内容加载失败: " + e.getMessage(), Toast.LENGTH_SHORT).show(); } } @Override public void onConfigurationChanged(Configuration newConfig) { super.onConfigurationChanged(newConfig); } @Override public void onBackPressed() { if (mWebView.canGoBack()) { mWebView.goBack(); } else { super.onBackPressed(); } } @Override public void onInit(int status) { if (status == TextToSpeech.SUCCESS) { int result = tts.setLanguage(Locale.CHINESE); if (result == TextToSpeech.LANG_MISSING_DATA || result == TextToSpeech.LANG_NOT_SUPPORTED) { Toast.makeText(this, "语言不支持", Toast.LENGTH_SHORT).show(); } else { tts.setPitch(1.0f); tts.setSpeechRate(1.0f); } } else { Toast.makeText(this, "TTS初始化失败", Toast.LENGTH_SHORT).show(); } } @Override protected void onResume() { super.onResume(); if (mWebView != null) { mWebView.onResume(); mWebView.resumeTimers(); } } @Override protected void onPause() { super.onPause(); if (mWebView != null) { mWebView.onPause(); mWebView.pauseTimers(); } // 如果正在朗读,保存进度 if (isSpeaking || isPaused) { saveProgress(); } } @Override protected void onDestroy() { if (isSpeaking || isPaused) { saveProgress(); } else { clearProgress(); } if (tts != null) { tts.stop(); tts.shutdown(); } if (mWebView != null) { mWebView.destroy(); mWebView = null; } super.onDestroy(); } }
10-10
评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值