c# - How to copy only hilighted text from pdf using itextsharp library? -
i have problem, not extract highlighted text pdf file. str variable empty. can me?
my code:
private static string getpdfhighlightext(string file, int page) { string nv = ""; pdfreader reader = new pdfreader(file); (int x = 1; x < reader.numberofpages; x++) { pdfdictionary pagedict = reader.getpagen(x); pdfarray annots = pagedict.getasarray(pdfname.annots); if (annots != null) { (int = 1; <= annots.size; ++i) { pdfdictionary annotationdic = (pdfdictionary)pdfreader.getpdfobject(annots[i]); pdfname subtype = (pdfname)annotationdic.get(pdfname.subtype); if (subtype.equals(pdfname.highlight)) { pdfstring str = annots.getasstring(i); nv = nv + str; } } } } return nv; }
i'm using itextsharp library. pflibrary itextsharp.text.pdf namespace.
i want sweep pages pdf , extract highlighted texts, 245 pages put filter per page. can identify highlight annotations not returned string text highlighted
i managed solve problem following code:
public string getpdflinks(string file, int pgini, int pgfim) { progresso = 0; //open our reader pdfreader r = new pdfreader(file); list<string> ret = new list<string>(); (int = pgini; <= pgfim; i++) { //get current page pdfdictionary pagedictionary = r.getpagen(i); //get of annotations current page pdfarray annots = pagedictionary.getasarray(pdfname.annots); //make sure have if ((annots == null) || (annots.length == 0)) return null; //kjkjjj //loop through each annotation foreach (pdfobject in annots.arraylist) { //convert itext-specific object generic pdf object pdfdictionary annotationdictionary = (pdfdictionary)pdfreader.getpdfobject(a); //make sure annotation has link if (!annotationdictionary.get(pdfname.subtype).equals(pdfname.link)) continue; //make sure annotation has action if (annotationdictionary.get(pdfname.a) == null) continue; //get action current annotation pdfdictionary annotationaction = (pdfdictionary)annotationdictionary.get(pdfname.a); //test if uri action (there tons of other types of actions, of might mimic uri, such javascript, need handled seperately) if (annotationaction.get(pdfname.s).equals(pdfname.uri)) { pdfstring destination = annotationaction.getasstring(pdfname.uri); if (destination != null) ret.add(destination.tostring()); } } progresso++; } foreach (string link in ret) { resultado = resultado + link + "\n "; } return resultado; }
Comments
Post a Comment