-- @atlcompiler emftvm -- @nsURI CSV=http://www.emftext.org/language/csv -- @nsURI TEAMPUB=http://soft.vub.ac.be/teampub -- Converts a VUB team_pub.php screenscrape to a CSV-formatted publication table. -- $Id$ module teampub2csv; create OUT : CSV from IN : TEAMPUB; uses teampublib; rule Document { from s : TEAMPUB!Document to t : CSV!CSVDocument ( rows <- s.sectionRange ->collect(s1 | s1.publications)->flatten() ->prepend(titlerow)), titlerow : CSV!Row ( values <- Sequence{year, cat, title, authors, ref, url, auth_count, soft_auth_count, zap_auth_count}), year : CSV!Value (text <- 'Year'), cat : CSV!Value (text <- 'Category'), title : CSV!Value (text <- 'Title'), authors : CSV!Value (text <- 'Authors'), ref : CSV!Value (text <- 'Reference'), url : CSV!Value (text <- 'URL'), auth_count : CSV!Value (text <- 'Number of authors'), soft_auth_count : CSV!Value (text <- 'Number of SOFT authors'), zap_auth_count : CSV!Value (text <- 'Number of SOFT ZAP authors') } rule Publication { from s : TEAMPUB!Publication (s.section.isInRange) using { authors_enc : Sequence(String) = s.authors->collect(a | a.encode()); authors_enc_set : Set(String) = authors_enc->asSet(); } to t : CSV!Row ( values <- Sequence{year, cat, title, authors, ref, url, auth_count, soft_auth_count, zap_auth_count}), year : CSV!Value ( text <- s.section.year), cat : CSV!Value ( text <- s.section.category), title : CSV!Value ( text <- s.title.encode()), authors : CSV!Value ( text <- authors_enc->iterate(a; acc : String = '' | if acc = '' then a else acc + ', ' + a endif)), ref : CSV!Value ( text <- s.reference.encode()), url : CSV!Value ( text <- s.url.encode()), auth_count : CSV!Value ( text <- s.authors->size().toString()), soft_auth_count : CSV!Value ( text <- authors_enc_set ->intersection( thisModule.softAuthors->union(thisModule.softZapAuthors)) ->size().toString()), zap_auth_count : CSV!Value ( text <- authors_enc_set ->intersection(thisModule.softZapAuthors) ->size().toString()) }