Matches in SemOpenAlex for { <https://semopenalex.org/work/W2007695588> ?p ?o ?g. }
- W2007695588 endingPage "619" @default.
- W2007695588 startingPage "609" @default.
- W2007695588 abstract "Abstract The nature of the contents of academic Web sites is of direct relevance to the new field of scientific Web intelligence, and for search engine and topic‐specific crawler designers. We analyze word frequencies in national academic Webs using the Web sites of three English‐speaking nations: Australia, New Zealand, and the United Kingdom. Strong regularities were found in page size and word frequency distributions, but with significant anomalies. At least 26% of pages contain no words. High frequency words include university names and acronyms, Internet terminology, and computing product names: not always words in common usage away from the Web. A minority of low frequency words are spelling mistakes, with other common types including nonwords, proper names, foreign language terms or computer science variable names. Based upon these findings, recommendations for data cleansing and filtering are made, particularly for clustering applications." @default.
- W2007695588 created "2016-06-24" @default.
- W2007695588 creator A5034823602 @default.
- W2007695588 date "2005-02-04" @default.
- W2007695588 modified "2023-10-10" @default.
- W2007695588 title "Text characteristics of English language university Web sites" @default.
- W2007695588 cites W1596751695 @default.
- W2007695588 cites W1699349104 @default.
- W2007695588 cites W1832160134 @default.
- W2007695588 cites W1974339500 @default.
- W2007695588 cites W1983179002 @default.
- W2007695588 cites W1992419399 @default.
- W2007695588 cites W2008620264 @default.
- W2007695588 cites W2009405785 @default.
- W2007695588 cites W2025214515 @default.
- W2007695588 cites W2026551074 @default.
- W2007695588 cites W2038316408 @default.
- W2007695588 cites W2039842373 @default.
- W2007695588 cites W2045108252 @default.
- W2007695588 cites W2047295649 @default.
- W2007695588 cites W2069613886 @default.
- W2007695588 cites W2069979223 @default.
- W2007695588 cites W2081580037 @default.
- W2007695588 cites W2085600442 @default.
- W2007695588 cites W2098162425 @default.
- W2007695588 cites W2109856998 @default.
- W2007695588 cites W2112642950 @default.
- W2007695588 cites W2118009636 @default.
- W2007695588 cites W2118131693 @default.
- W2007695588 cites W2118996379 @default.
- W2007695588 cites W2124416056 @default.
- W2007695588 cites W2147152072 @default.
- W2007695588 cites W2149556349 @default.
- W2007695588 cites W2154589581 @default.
- W2007695588 cites W2170521549 @default.
- W2007695588 cites W2175110005 @default.
- W2007695588 cites W2479921454 @default.
- W2007695588 cites W3123181842 @default.
- W2007695588 cites W4235541403 @default.
- W2007695588 cites W4298364342 @default.
- W2007695588 cites W4377862659 @default.
- W2007695588 doi "https://doi.org/10.1002/asi.20126" @default.
- W2007695588 hasPublicationYear "2005" @default.
- W2007695588 type Work @default.
- W2007695588 sameAs 2007695588 @default.
- W2007695588 citedByCount "12" @default.
- W2007695588 crossrefType "journal-article" @default.
- W2007695588 hasAuthorship W2007695588A5034823602 @default.
- W2007695588 hasConcept C110875604 @default.
- W2007695588 hasConcept C136764020 @default.
- W2007695588 hasConcept C13743948 @default.
- W2007695588 hasConcept C138885662 @default.
- W2007695588 hasConcept C154945302 @default.
- W2007695588 hasConcept C158154518 @default.
- W2007695588 hasConcept C175293574 @default.
- W2007695588 hasConcept C17744445 @default.
- W2007695588 hasConcept C199539241 @default.
- W2007695588 hasConcept C202444582 @default.
- W2007695588 hasConcept C204321447 @default.
- W2007695588 hasConcept C21959979 @default.
- W2007695588 hasConcept C23123220 @default.
- W2007695588 hasConcept C2777530160 @default.
- W2007695588 hasConcept C2777801307 @default.
- W2007695588 hasConcept C33923547 @default.
- W2007695588 hasConcept C41008148 @default.
- W2007695588 hasConcept C41895202 @default.
- W2007695588 hasConcept C547195049 @default.
- W2007695588 hasConcept C73555534 @default.
- W2007695588 hasConcept C9652623 @default.
- W2007695588 hasConceptScore W2007695588C110875604 @default.
- W2007695588 hasConceptScore W2007695588C136764020 @default.
- W2007695588 hasConceptScore W2007695588C13743948 @default.
- W2007695588 hasConceptScore W2007695588C138885662 @default.
- W2007695588 hasConceptScore W2007695588C154945302 @default.
- W2007695588 hasConceptScore W2007695588C158154518 @default.
- W2007695588 hasConceptScore W2007695588C175293574 @default.
- W2007695588 hasConceptScore W2007695588C17744445 @default.
- W2007695588 hasConceptScore W2007695588C199539241 @default.
- W2007695588 hasConceptScore W2007695588C202444582 @default.
- W2007695588 hasConceptScore W2007695588C204321447 @default.
- W2007695588 hasConceptScore W2007695588C21959979 @default.
- W2007695588 hasConceptScore W2007695588C23123220 @default.
- W2007695588 hasConceptScore W2007695588C2777530160 @default.
- W2007695588 hasConceptScore W2007695588C2777801307 @default.
- W2007695588 hasConceptScore W2007695588C33923547 @default.
- W2007695588 hasConceptScore W2007695588C41008148 @default.
- W2007695588 hasConceptScore W2007695588C41895202 @default.
- W2007695588 hasConceptScore W2007695588C547195049 @default.
- W2007695588 hasConceptScore W2007695588C73555534 @default.
- W2007695588 hasConceptScore W2007695588C9652623 @default.
- W2007695588 hasIssue "6" @default.
- W2007695588 hasLocation W20076955881 @default.
- W2007695588 hasOpenAccess W2007695588 @default.
- W2007695588 hasPrimaryLocation W20076955881 @default.
- W2007695588 hasRelatedWork W1506122440 @default.
- W2007695588 hasRelatedWork W1999548128 @default.
- W2007695588 hasRelatedWork W2102475112 @default.
- W2007695588 hasRelatedWork W2113184419 @default.