pub struct IndexerService {
config: Arc<Config>,
storage_service: Arc<StorageService>,
search_service: Arc<SearchService>,
client: Client,
force_process_flag: Arc<AtomicBool>,
}
Fields§
§config: Arc<Config>
§storage_service: Arc<StorageService>
§search_service: Arc<SearchService>
§client: Client
§force_process_flag: Arc<AtomicBool>
Implementations§
Source§impl IndexerService
impl IndexerService
Sourcefn url_to_id(url: &str) -> String
fn url_to_id(url: &str) -> String
Generate a deterministic ID from a URL to prevent duplicate crawl items
pub async fn new( config: Arc<Config>, storage_service: Arc<StorageService>, search_service: Arc<SearchService>, ) -> Result<Self>
Sourcepub async fn queue_domains(&self, domains: &[String]) -> Result<usize>
pub async fn queue_domains(&self, domains: &[String]) -> Result<usize>
Queue domains for crawling
Sourcepub async fn queue_domains_with_check(
&self,
domains: &[String],
check_last_indexed: bool,
) -> Result<usize>
pub async fn queue_domains_with_check( &self, domains: &[String], check_last_indexed: bool, ) -> Result<usize>
Queue domains for crawling with optional last-indexed time checking If check_last_indexed is true, domains that were indexed recently will be skipped
Sourcepub async fn start_periodic_indexing(&self) -> Result<()>
pub async fn start_periodic_indexing(&self) -> Result<()>
Start the periodic indexing service that checks for stale domains and re-indexes them This runs in a loop, checking every 6 hours for domains that need re-indexing
Sourcepub async fn start_periodic_duplicate_removal(&self) -> Result<()>
pub async fn start_periodic_duplicate_removal(&self) -> Result<()>
Start the periodic duplicate removal service This runs in a loop, checking for and removing duplicates at configured intervals
Sourcepub fn trigger_force_process_queue(&self) -> Result<()>
pub fn trigger_force_process_queue(&self) -> Result<()>
Trigger immediate queue processing by setting the force flag
async fn check_and_queue_stale_domains(&self) -> Result<usize>
pub async fn process_crawl_queue(&self) -> Result<()>
Sourceasync fn log_crawl_queue_status(
&self,
items_processed: usize,
total_time: Duration,
)
async fn log_crawl_queue_status( &self, items_processed: usize, total_time: Duration, )
Log detailed crawl queue status for monitoring
async fn process_crawl_item( item: CrawlQueue, storage_service: Arc<StorageService>, search_service: Arc<SearchService>, config: Arc<Config>, client: Client, ) -> Result<()>
async fn crawl_and_index_page( item: &CrawlQueue, storage_service: &Arc<StorageService>, search_service: &Arc<SearchService>, config: &Arc<Config>, client: &Client, ) -> Result<()>
fn extract_text_content(document: &Html) -> String
fn extract_text_recursive(element: ElementRef<'_>, text_parts: &mut Vec<String>)
fn generate_snippet(content: &str, max_length: usize) -> String
async fn is_allowed_by_robots( url: &str, _user_agent: &str, client: &Client, ) -> Result<bool>
Auto Trait Implementations§
impl Freeze for IndexerService
impl !RefUnwindSafe for IndexerService
impl Send for IndexerService
impl Sync for IndexerService
impl Unpin for IndexerService
impl !UnwindSafe for IndexerService
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more