-- @atlcompiler emftvm -- @nsURI CSV=http://www.emftext.org/language/csv -- @nsURI TEAMPUB=http://soft.vub.ac.be/teampub -- Converts a VUB team_pub.php screenscrape to a CSV-formatted publication table. -- $Id$ module teampub2authorcsv; create OUT : CSV from IN : TEAMPUB; uses teampublib; rule Document { from s : TEAMPUB!Document using { allPublications : Sequence(TEAMPUB!Publication) = s.sectionRange->collect(s1 | s1.publications)->flatten(); } to t : CSV!CSVDocument ( rows <- thisModule.years->asSequence() ->collect(y | thisModule.softZapAuthors->asSequence() ->union(thisModule.softAuthors->asSequence()) ->collect(sa | allPublications ->select(p | p.section.year.toInteger() = y and p.authors->exists(a | a.encode() = sa)) ->collect(p1 | thisModule.AuthorPublication(sa, p1))) ->flatten()) ->flatten() ->prepend(titlerow)), titlerow : CSV!Row ( values <- Sequence{author, year, cat, title, authors, ref, url}), author : CSV!Value (text <- 'Author'), year : CSV!Value (text <- 'Year'), cat : CSV!Value (text <- 'Category'), title : CSV!Value (text <- 'Title'), authors : CSV!Value (text <- 'Authors'), ref : CSV!Value (text <- 'Reference'), url : CSV!Value (text <- 'URL') } rule AuthorPublication(author : String, pub : TEAMPUB!Publication) { using { authors_enc : Sequence(String) = pub.authors->collect(a | a.encode()); authors_enc_set : Set(String) = authors_enc->asSet(); } to t : CSV!Row ( values <- Sequence{auth, year, cat, title, authors, ref, url}), auth : CSV!Value ( text <- author), year : CSV!Value ( text <- pub.section.year), cat : CSV!Value ( text <- pub.section.category), title : CSV!Value ( text <- pub.title.encode()), authors : CSV!Value ( text <- authors_enc->iterate(a; acc : String = '' | if acc = '' then a else acc + ', ' + a endif)), ref : CSV!Value ( text <- pub.reference.encode()), url : CSV!Value ( text <- pub.url.encode()) do { t; } }