Matches in SemOpenAlex for { <https://semopenalex.org/work/W2101877405> ?p ?o ?g. }
- W2101877405 endingPage "437" @default.
- W2101877405 startingPage "429" @default.
- W2101877405 abstract "This paper presents a novel crawling strategy to locate bilingual sites. It does so by focusing on the Web graph neighborhood of these sites and exploring the patterns of the links in this region to guide its visitation policy. A sub-task in the problem of bilingual site discovery is the job of detecting bilingual sites, i.e., given a Web site, verify whether it is bilingual or not. We perform this task by combining supervised learning and language identification. Experimental results demonstrate that our crawler outperforms previous crawling approaches and produces a high-quality collection of bilingual sites, which we evaluate in the context of machine translation in the tourism and hospitality domain. The parallel text obtained using our novel crawling strategy results in a relative improvement of 22% in BLEU score (English-to-Spanish) over an out-ofdomain seed translation model trained on the European parliamentary proceedings." @default.
- W2101877405 created "2016-06-24" @default.
- W2101877405 creator A5000683692 @default.
- W2101877405 creator A5061105309 @default.
- W2101877405 creator A5073713351 @default.
- W2101877405 date "2011-11-01" @default.
- W2101877405 modified "2023-09-26" @default.
- W2101877405 title "Crawling Back and Forth: Using Back and Out Links to Locate Bilingual Sites" @default.
- W2101877405 cites W1485536830 @default.
- W2101877405 cites W1489992655 @default.
- W2101877405 cites W1490123432 @default.
- W2101877405 cites W1533946607 @default.
- W2101877405 cites W1534714852 @default.
- W2101877405 cites W157432847 @default.
- W2101877405 cites W1618905105 @default.
- W2101877405 cites W1636405317 @default.
- W2101877405 cites W1807664792 @default.
- W2101877405 cites W1976232673 @default.
- W2101877405 cites W1996718000 @default.
- W2101877405 cites W200075660 @default.
- W2101877405 cites W2017726337 @default.
- W2101877405 cites W2047295649 @default.
- W2101877405 cites W2070150502 @default.
- W2101877405 cites W2101096097 @default.
- W2101877405 cites W2101105183 @default.
- W2101877405 cites W2107695330 @default.
- W2101877405 cites W2110034127 @default.
- W2101877405 cites W2116713744 @default.
- W2101877405 cites W2124673015 @default.
- W2101877405 cites W2124807415 @default.
- W2101877405 cites W2129765547 @default.
- W2101877405 cites W2149327368 @default.
- W2101877405 cites W2151626491 @default.
- W2101877405 cites W2155011048 @default.
- W2101877405 cites W2170296207 @default.
- W2101877405 cites W22168010 @default.
- W2101877405 cites W2394530649 @default.
- W2101877405 cites W2911388033 @default.
- W2101877405 hasPublicationYear "2011" @default.
- W2101877405 type Work @default.
- W2101877405 sameAs 2101877405 @default.
- W2101877405 citedByCount "2" @default.
- W2101877405 countsByYear W21018774052012 @default.
- W2101877405 crossrefType "proceedings-article" @default.
- W2101877405 hasAuthorship W2101877405A5000683692 @default.
- W2101877405 hasAuthorship W2101877405A5061105309 @default.
- W2101877405 hasAuthorship W2101877405A5073713351 @default.
- W2101877405 hasConcept C100368936 @default.
- W2101877405 hasConcept C105702510 @default.
- W2101877405 hasConcept C127413603 @default.
- W2101877405 hasConcept C132525143 @default.
- W2101877405 hasConcept C136764020 @default.
- W2101877405 hasConcept C13743948 @default.
- W2101877405 hasConcept C154945302 @default.
- W2101877405 hasConcept C166957645 @default.
- W2101877405 hasConcept C201995342 @default.
- W2101877405 hasConcept C203005215 @default.
- W2101877405 hasConcept C204321447 @default.
- W2101877405 hasConcept C205649164 @default.
- W2101877405 hasConcept C23123220 @default.
- W2101877405 hasConcept C2779343474 @default.
- W2101877405 hasConcept C2780451532 @default.
- W2101877405 hasConcept C41008148 @default.
- W2101877405 hasConcept C71924100 @default.
- W2101877405 hasConcept C80444323 @default.
- W2101877405 hasConceptScore W2101877405C100368936 @default.
- W2101877405 hasConceptScore W2101877405C105702510 @default.
- W2101877405 hasConceptScore W2101877405C127413603 @default.
- W2101877405 hasConceptScore W2101877405C132525143 @default.
- W2101877405 hasConceptScore W2101877405C136764020 @default.
- W2101877405 hasConceptScore W2101877405C13743948 @default.
- W2101877405 hasConceptScore W2101877405C154945302 @default.
- W2101877405 hasConceptScore W2101877405C166957645 @default.
- W2101877405 hasConceptScore W2101877405C201995342 @default.
- W2101877405 hasConceptScore W2101877405C203005215 @default.
- W2101877405 hasConceptScore W2101877405C204321447 @default.
- W2101877405 hasConceptScore W2101877405C205649164 @default.
- W2101877405 hasConceptScore W2101877405C23123220 @default.
- W2101877405 hasConceptScore W2101877405C2779343474 @default.
- W2101877405 hasConceptScore W2101877405C2780451532 @default.
- W2101877405 hasConceptScore W2101877405C41008148 @default.
- W2101877405 hasConceptScore W2101877405C71924100 @default.
- W2101877405 hasConceptScore W2101877405C80444323 @default.
- W2101877405 hasLocation W21018774051 @default.
- W2101877405 hasOpenAccess W2101877405 @default.
- W2101877405 hasPrimaryLocation W21018774051 @default.
- W2101877405 hasRelatedWork W2246212392 @default.
- W2101877405 hasRelatedWork W2250438750 @default.
- W2101877405 hasRelatedWork W2251104783 @default.
- W2101877405 hasRelatedWork W2251559475 @default.
- W2101877405 hasRelatedWork W2251841523 @default.
- W2101877405 hasRelatedWork W2740180825 @default.
- W2101877405 hasRelatedWork W2809054113 @default.
- W2101877405 hasRelatedWork W2947053256 @default.
- W2101877405 hasRelatedWork W2956506767 @default.
- W2101877405 hasRelatedWork W2970978814 @default.
- W2101877405 hasRelatedWork W2998174483 @default.
- W2101877405 hasRelatedWork W3039539959 @default.