(* \part{CamlPDF}\chaptertitle{PDF}{Representing PDF files} *)

(* This module declares a data type which represents an Adobe PDF document,
and defines various simple operations on it. *)
open Utility
open Pdfio

(* \section{Data Type for Representing PDF Documents} *)

(* Predicate on characters delimiting entities. *)
let is_delimiter = function
  | '(' | ')' | '<' | '>' | '[' | ']' | '{' | '}' | '%' | '/' -> true
  | _ -> false

(* \intf Streams of binary data, byte-addressable, can either be in memory ([Got]) or
still in an input channel ([ToGet]). *)
type stream =
  | Got of bytestream
  | ToGet of input * int64 * int64 (*r input, position, length *)

(* \intf Type for individual PDF objects. A [Name] includes the initial `/'. A
[Stream] consists of a reference to a pair of the stream dictionary (another
[pdfobject]) and a [stream]. Thus a [pdfobject] is technically mutable.  However,
at the user level, it is intended to be immutable: changes should be limited to
encoding and decoding of the stream.

Note that pdfobjects are not always amenable to polymorphic equality testing,
since the [Io.input] in the [ToGet] part of a [stream] contains functional
values. *)
type pdfobject =
  | Null
  | Boolean of bool
  | Integer of int
  | Real of float
  | String of string
  | Name of string 
  | Array of pdfobject list
  | Dictionary of (string * pdfobject) list
  | Stream of (pdfobject * stream) ref
  | Indirect of int

(*IF-OCAML*)
(* Pdf objects are stored in an efficient map structure. *)
module PdfObjMap =
  Map.Make
    (struct
       type t = int
       let compare = compare
    end)

let pdfobjmap_find = PdfObjMap.find
let pdfobjmap_mapi = PdfObjMap.mapi
let pdfobjmap_iter = PdfObjMap.iter
let pdfobjmap_remove = PdfObjMap.remove
let pdfobjmap_add = PdfObjMap.add
let pdfobjmap_empty = PdfObjMap.empty

(* An object is either lexed, or needs to be lexed from a position in the
input. *)
type objectdata =
  | Parsed of pdfobject
  | ToParse

(* We hold the maximum object number in use, [maxobjnum] to allow easy
production of new keys for the map. *)
type pdfobjects =
  {mutable maxobjnum : int;
   mutable parse : (PdfObjMap.key -> pdfobject) option;
   mutable objectsfromstream : int list;
   mutable pdfobjects : (objectdata ref * int) PdfObjMap.t} (*r int is generation *)
(*ENDIF-OCAML*)

(*i*)(*F#

(* An object is either lexed, or needs to be lexed from a position in the
input. *)
type objectdata =
  | Parsed of pdfobject
  | ToParse

let PdfObjMap : (int, objectdata ref * int) Map.Provider = Map.Make compare

let pdfobjmap_find = PdfObjMap.find
let pdfobjmap_mapi = PdfObjMap.mapi
let pdfobjmap_iter = PdfObjMap.iter
let pdfobjmap_remove = PdfObjMap.remove
let pdfobjmap_add = PdfObjMap.add
let pdfobjmap_empty = PdfObjMap.empty

type pdfobjects =
  {mutable maxobjnum : int;
   mutable parse : (int -> pdfobject) option;
   mutable objectsfromstream : int list;
   mutable pdfobjects : Tagged.Map<int, (objectdata ref * int), System.Collections.Generic.IComparer<int>>} (*r int is generation *)

F#*)(*i*)


(* \intf PDF Document. The major and minor version numbers, the root object number,
the list of objects and the trailer dictionary.

This represents the contents of a PDF file's user objects (object streams and
other mechanisms involved only in reading and writing are abstracted away). *)
type pdfdoc =
  {mutable major : int; 
   mutable minor : int;
   mutable root : int;
   mutable objects : pdfobjects; 
   mutable trailerdict : pdfobject} 

let set_streamobjects pdf os =
  pdf.objects.objectsfromstream <- os

let get_streamobjects pdf =
  pdf.objects.objectsfromstream

(* \intf The null PDF document. *)
let empty () =
  {major = 1;
   minor = 0;
   root = 0;
   objects = {maxobjnum = 0; parse = None; objectsfromstream = []; pdfobjects = pdfobjmap_empty};
   trailerdict = Dictionary []}

(* \intf General exception for low-level errors. *)
exception PDFError of string

(* \section{Utility functions} *)

(* \intf Predicate on those characters considered whitespace in PDF files. *)
let is_whitespace = function
  | '\000' | '\009' | '\010' | '\012' | ' ' | '\013' -> true
  | _ -> false

(* \intf Get a stream from disk if it hasn't already been got. *)
let getstream = function
  | Stream ({contents = (d, ToGet (i, o, l))} as stream) ->
      if l = 0L then stream := (d, Got (mkstream 0)) else
        let s = mkstream (i64toi l) in
          begin try
            (*IF-OCAML*)i.seek_in o; (*ENDIF-OCAML*)
            (*i*)(*F#i.seek_in (i64toi o); F#*)(*i*)
            for c = 0 to i64toi l - 1 do
              match i.input_byte () with
              | b when b = Pdfio.no_more -> dpr "H"; raise End_of_file
              | b -> sset s c b
            done;
            stream := (d, Got s)
          with
            End_of_file ->
              raise (PDFError "Pdf.getstream: can't read stream.")
          end
  | Stream _ -> ()
  | _ -> raise (PDFError "Pdf.getstream: not a stream")

let recurse_array (f : pdfobject -> pdfobject) elts =
  Array (map f elts)

(* \intf Similarly for dictionaries. *)
let recurse_dict (f : pdfobject -> pdfobject) elts =
  let names, objects = split elts in
    let objects' = map f objects in
      Dictionary (combine names objects')

(* \intf Return a float from a PDF number. *)
let getnum = function
  | Real a -> a
  | Integer a -> float a
  | _ -> raise (PDFError "Pdf.getnum: not a number")

(* \intf Parse a PDF rectangle data structure. Returns min x, min y, max x, max y. *)
let parse_rectangle = function
  | Array [a; b; c; d] ->
      begin try
        let x, y, x', y' =
          getnum a, getnum b, getnum c, getnum d
        in
          fmin x x', fmin y y', fmax x x', fmax y y'
      with
        PDFError _ -> raise (PDFError "Pdf.parse_rectangle: bad rectangle")
      end
  | _ -> raise (PDFError "Pdf.parse_rectangle: not a rectangle")

let change_obj doc i obj =
  match fst (pdfobjmap_find i doc.objects.pdfobjects) with
  | {contents = Parsed _} -> assert false
  | {contents = ToParse} as r -> r := Parsed obj

(* Parse an object [n] in document [pdf], updating the object in the document so
it is ready-parsed should it be required again. *)
let parse_lazy pdf n =
  match pdf.objects.parse with
  | None -> raise (Assert_failure ("Pdf.parse_lazy", 0, 0))
  | Some f ->
      let obj = f n in
        change_obj pdf n obj;
        obj

(* \intf Look up an object. On an error return [Pdf.Null] *)
let lookup_obj doc i =
  try
    match fst (pdfobjmap_find i doc.objects.pdfobjects) with
    | {contents = Parsed obj} -> obj
    | {contents = ToParse} -> parse_lazy doc i
  with
    Not_found -> dpr "2H"; Null

let catalog_of_pdf pdf =
  try lookup_obj pdf pdf.root with
    Not_found -> raise (PDFError "No catalog")

(* \intf Given any pdf document and object, follow indirections to yield a
direct object. A hanging indirect is defined as [Null]. *)
let rec direct pdf = function
  | Indirect i ->
      begin try
        match fst (pdfobjmap_find i pdf.objects.pdfobjects) with
        | {contents = Parsed pdfobject} -> direct pdf pdfobject
        | {contents = ToParse} -> parse_lazy pdf i
      with
        Not_found -> dpr "2I"; Null
      end
  | obj -> obj

(* \intf Apply a function on Stream objects to all streams in a PDF document. We
assume stream dictionaries don't have indirect references to an object which
itself contains a stream. *)
let map_stream f pdf =
  let rec map_stream_inner f i = function
    | {contents = Parsed (Stream _ as stream)}, g -> ref (Parsed (f stream)), g
    | {contents = Parsed obj}, g -> ref (Parsed (obj)), g
    | {contents = ToParse}, g -> map_stream_inner f i (ref (Parsed (parse_lazy pdf i)), g)
  in
    let objects' =
      {pdf.objects with
         pdfobjects = pdfobjmap_mapi (map_stream_inner f) pdf.objects.pdfobjects}
    in
      {pdf with objects = objects'}

(* \intf Iterate over a stream. *)
let iter_stream f pdf =
  let rec iter_stream_inner f i = function
    | {contents = Parsed (Stream _ as stream)}, g -> f stream
    | {contents = ToParse} as r, g ->
        r := Parsed (parse_lazy pdf i);
        iter_stream_inner f i (r, g)
    | _ -> ()
  in
    pdfobjmap_iter (iter_stream_inner f) pdf.objects.pdfobjects

(* \intf Lookup a key in a dictionary, following indirect references,  returning
[None] on any failure. This works on both plain dictionaries and streams. *)
let lookup_direct pdf key dict =
  match direct pdf dict with
  | Dictionary d | Stream {contents = (Dictionary d, _)} ->
      begin match lookup key d with
      | None -> None
      | Some o -> Some (direct pdf o)
      end
  | _ -> None

(* \intf Look up under a key and its alternate. Return the value associated with the key that worked, or [None] if neither did. *)
let lookup_direct_orelse pdf k k' d =
  match lookup_direct pdf k d with
  | None -> lookup_direct pdf k' d
  | result -> result

(* \intf Look something up in a dictionary, failing with given exception if not
found. We make direct both the dictionary and the result of the lookup. This
also allows us to look things up in a stream dictionary transparently. *)
let lookup_exception (exp : exn) pdf key dict =
  let dict' =
    match direct pdf dict with
    | Dictionary d | Stream {contents = Dictionary d, _} -> d
    | o -> raise (PDFError "not a dictionary")
  in
    match lookup key dict' with
    | None -> dpr "G"; raise exp
    | Some v -> direct pdf v

(* \intf A specialised one raising [PDFError]. *)
let lookup_fail text =
  lookup_exception (PDFError text)

(* \intf Parse a matrix. *)
let parse_matrix pdf name dict =
  match lookup_direct pdf name dict with
  | None -> Transform.i_matrix
  | Some (Array [a; b; c; d; e; f]) ->
      let a = getnum a and b = getnum b and c = getnum c
      and d = getnum d and e = getnum e and f = getnum f in
        {Transform.a = a; Transform.b = b; Transform.c = c;
         Transform.d = d; Transform.e = e; Transform.f = f}
  | _ -> raise (PDFError "Malformed matrix")

(* \intf Make a matrix *)
let make_matrix tr =
  Array
    [Real tr.Transform.a; Real tr.Transform.b; Real tr.Transform.c;
     Real tr.Transform.d; Real tr.Transform.e; Real tr.Transform.f]

(* \intf Iterate over the objects in a document, in order of increasing object
number. *)
let objiter f doc =
  let f' k v =
    match v with
    | {contents = Parsed obj}, _ -> f k obj
    | {contents = ToParse}, _ -> f k (parse_lazy doc k)
  in
    pdfobjmap_iter f' doc.objects.pdfobjects

(* \intf Same, but also pass generation number. *)
let objiter_gen f doc =
  let f' k v =
    match v with
    | {contents = Parsed obj}, g -> f k g obj
    | {contents = ToParse}, g -> f k g (parse_lazy doc k)
  in
    pdfobjmap_iter f' doc.objects.pdfobjects

(* \intf Map on objects. *)
let objmap f doc =
  let f' i = function
    | {contents = Parsed obj}, g -> ref (Parsed (f obj)), g
    | {contents = ToParse}, g -> ref (Parsed (parse_lazy doc i)), g
  in
    doc.objects <-
       {doc.objects with
          pdfobjects = pdfobjmap_mapi f' doc.objects.pdfobjects}

let maxobjnum pdf =
  pdf.objects.maxobjnum

(* Return a list of object numbers. *)
let objnumbers pdf =
  let keys = ref [] in
    objiter (fun k _ -> keys =| k) pdf;
    rev !keys

(* \intf Cardinality of object set. O(n). *)
let objcard pdf =
  let card = ref 0 in
    objiter (fun _ _ -> incr card) pdf;
    !card

(* Remove an object. *)
let removeobj doc o =
  {doc with objects =
    {doc.objects with pdfobjects = pdfobjmap_remove o doc.objects.pdfobjects}}

(* Return a list of (k, v) pairs. *)
let list_of_objs doc =
  let objs = ref [] in
    objiter (fun k v -> objs =| (k, Parsed v)) doc;
    !objs

(* \intf Add an object, given an object number. *)
let addobj_given_num doc (num, obj) =
  doc.objects.maxobjnum <- max doc.objects.maxobjnum num;
  doc.objects.pdfobjects <- pdfobjmap_add num (ref (Parsed obj), 0) doc.objects.pdfobjects

(* \intf Add an object. We use the first number larger than the maxobjnum, and update that. *)
let addobj doc obj =
  let num = doc.objects.maxobjnum + 1 in
    addobj_given_num doc (num, obj);
    num

(* Make a objects entry from a list of (number, object) pairs. *)
let objects_of_list parse l =
  let maxobj = ref 0
  and map = ref pdfobjmap_empty in
    iter
      (fun (k, v) ->
         maxobj := max !maxobj k;
         map := pdfobjmap_add k v !map)
      l;
    {parse = parse; pdfobjects = !map; objectsfromstream = []; maxobjnum = !maxobj}

(* Find the page reference numbers, given the top level node of the page tree *)
let rec page_reference_numbers_inner pdf pages_node node_number =
  match lookup_direct pdf "/Type" pages_node with
  | Some (Name "/Pages") | None ->
      begin match lookup_direct pdf "/Kids" pages_node with
      | Some (Array elts) ->
          flatten
            (map
              (function
               | Indirect i ->
                   page_reference_numbers_inner
                     pdf (direct pdf (Indirect i)) i
               | _ -> raise (PDFError "badly formed page tree A"))
              elts)
      | _ -> raise (PDFError "badly formed page tree B")
      end
  | Some (Name "/Page") -> [node_number]
  | _ -> raise (PDFError "badly formed page tree C")

let page_reference_numbers pdf =
  let root = lookup_obj pdf pdf.root in
    let pages_node =
        match lookup_direct pdf "/Pages" root with
        | Some p -> p
        | None -> raise (PDFError "badly formed page tree D")
    in
      page_reference_numbers_inner pdf pages_node ~-1

(* Renumber an object given a change table (A hash table mapping old to new
numbers). *)
let rec renumber_object_parsed (pdf : pdfdoc) changes obj =
  match obj with
  | Indirect i ->
      let i' =
        match tryfind changes i with
        | Some x -> x
        | None -> i (*r A dangling indirect is valid. *)
      in
        Indirect i'
  | Array a ->
      recurse_array (renumber_object_parsed pdf changes) a
  | Dictionary d ->
      recurse_dict (renumber_object_parsed pdf changes) d
  | Stream {contents = (p, s)} ->
      Stream {contents = renumber_object_parsed pdf changes p, s}
  | pdfobject -> pdfobject

let renumber_object pdf changes objnum = function
  | ToParse ->
      renumber_object_parsed pdf changes (parse_lazy pdf objnum)
  | Parsed obj ->
      renumber_object_parsed pdf changes obj

(* Renumber a PDF's objects to [1]\ldots [n]. *)

(* Calculate the substitutions required to renumber the document. *)
let changes pdf =
  let card = objcard pdf in
    let order = ilist_fail_null 1 card
    and change_table = Hashtbl.create card in
      List.iter2 (Hashtbl.add change_table) (objnumbers pdf) order;
      change_table
      
(* Perform all renumberings given by a change table. *)
let renumber change_table pdf =
  let root' =
    match tryfind change_table pdf.root with Some x -> x | None -> pdf.root
  and trailerdict' =
    renumber_object pdf change_table 0 (Parsed pdf.trailerdict)
  and objects' =
    let nums, objs = split (list_of_objs pdf) in
      let objs' =
        map2 (renumber_object pdf change_table) nums objs
      and nums' =
        map (function k -> match tryfind change_table k with Some x -> x | None -> k) nums
      in
        objects_of_list
          pdf.objects.parse
          (combine nums' (map (fun x -> ref (Parsed x), 0) objs'))
  in
    {pdf with
     root = root';
     objects = objects';
     trailerdict = trailerdict'}
 
(* \intf Renumber the objects (including root and trailer dictionary) in a list of
pdfs so they are mutually exclusive. We iterate over the key lists to build
a list of change tables which are applied to the input PDFs. NOTE: This can't
be used on PDFs where the generation numbers still matter (i.e before
decryption). *)
let renumber_pdfs pdfs =
  let keylists = map objnumbers pdfs
  and bse = ref 1
  and tables = ref [] in
    iter
      (fun k ->
         let length = length k in
           let table = Hashtbl.create length in
             List.iter2 (Hashtbl.add table) k (ilist !bse (!bse + length - 1));
             tables =| table;
             bse += length)
      keylists;
    map2 renumber (rev !tables) pdfs

(* Used for sets of object numbers. *)

(*IF-OCAML*)
module RefSet =
  Set.Make
    (struct
       type t = int
       let compare = compare
    end)

let refset_add = RefSet.add
let refset_empty = RefSet.empty
let refset_elements = RefSet.elements
(*ENDIF-OCAML*)

(*i*)(*F#
let RefSet : int Set.Provider = Set.Make compare

let refset_add = RefSet.add
let refset_empty = RefSet.empty
let refset_elements = RefSet.elements
F#*)(*i*)

(* Give a list of object numbers referenced in a given [pdfobject] *)
let rec referenced no_follow_entries no_follow_contains pdf found i = function
  | Parsed (Indirect i) ->
      if not (RefSet.mem i !found) then
        begin
          let obj = 
            try lookup_obj pdf i with
              Not_found -> dpr "2M"; Null
          in
            match obj with
            | Dictionary d ->
                if not (mem true (map (mem' no_follow_contains) d)) then
                  begin
                  found := RefSet.add i !found;
                  referenced no_follow_entries no_follow_contains pdf found i (Parsed obj)
                  end
            | _ ->
              found := RefSet.add i !found;
              referenced no_follow_entries no_follow_contains pdf found i (Parsed obj)
        end
  | Parsed (Array a) ->
      iter
        (referenced no_follow_entries no_follow_contains pdf found i)
        (map (fun x -> Parsed x) a)
  | Parsed (Dictionary d) ->
      iter
        (referenced no_follow_entries no_follow_contains pdf found i)
        (map
          (fun x -> Parsed (snd x))
          (lose (fun (k, _) -> mem k no_follow_entries) d))
  | Parsed (Stream s) ->
      referenced no_follow_entries no_follow_contains pdf found i (Parsed (fst !s))
  | Parsed _ ->
      ()
  | ToParse ->
      referenced no_follow_entries no_follow_contains pdf found i (Parsed (parse_lazy pdf i))

(* Nullify all references to page objects which are no longer in the page tree.
This prevents (for instance) annotations on a page referencing a deleted page,
thus preventing the deleted page's objects from being recovered during garbage
collection. *)
let nullify_deleted_page_references pdf =
  let rec nullify numbers = function
    | Indirect i when mem i numbers -> Null
    | Array elts -> recurse_array (nullify numbers) elts
    | Dictionary elts -> recurse_dict (nullify numbers) elts
    | Stream {contents = (p, s)} -> Stream {contents = nullify numbers p, s}
    | x -> x
  and page_object_numbers =
    let nums = ref [] in
      objiter
        (function objnum ->
           function
             | Dictionary d when lookup "/Type" d  = Some (Name "/Page") ->
                 nums := objnum :: !nums
             | x -> ())
        pdf;
      !nums
  in
    objmap (nullify (setminus page_object_numbers (page_reference_numbers pdf))) pdf

(* \intf Remove any unreferenced objects. *)
let remove_unreferenced pdf =
  nullify_deleted_page_references pdf;
  let found = ref RefSet.empty in
    referenced [] [] pdf found pdf.root (Parsed (lookup_obj pdf pdf.root));
    referenced [] [] pdf found 0 (Parsed pdf.trailerdict);
    found := RefSet.add pdf.root !found;
    let eltnumbers = RefSet.elements !found in
      (* If not found, just ignore. *)
      let elements =
        map
          (fun n -> try lookup_obj pdf n with Not_found -> dpr "2N"; Null)
          eltnumbers
      in
        pdf.objects <-
          {maxobjnum = 0;
           parse = pdf.objects.parse;
           objectsfromstream = pdf.objects.objectsfromstream;
           pdfobjects = pdfobjmap_empty};
        iter (addobj_given_num pdf) (combine eltnumbers elements)

(* \intf Objects referenced from a given one. *)
let objects_referenced no_follow_entries no_follow_contains pdf pdfobject =
  let set = ref RefSet.empty in
    referenced no_follow_entries no_follow_contains pdf set 0 (Parsed pdfobject);
    RefSet.elements !set

(* \intf The same, but return the objects too. *)
let objects_referenced_and_objects no_follow_entries no_follow_contains pdf pdfobject =
  let nums =
    objects_referenced no_follow_entries no_follow_contains pdf pdfobject
  in
    combine nums (map (lookup_obj pdf) nums)

(* \intf Remove a dictionary entry. Also works for streams. *)
let rec remove_dict_entry dict key =
  match dict with
  | Dictionary d -> Dictionary (remove key d)
  | Stream ({contents = (dict', stream)} as s) ->
      s := (remove_dict_entry dict' key, stream);
      Stream s
  | _ -> raise (PDFError "remove_dict_entry: not a dictionary")

(* \intf Replace dict entry, raising [Not_found] if it's not there. Also works
for streams.*)
let rec replace_dict_entry dict key value =
  match dict with
  | Dictionary d -> Dictionary (replace key value d)
  | Stream ({contents = (dict', stream)} as s) ->
      s := (replace_dict_entry dict' key value, stream);
      Stream s
  | _ -> raise (PDFError "replace_dict_entry: not a dictionary.")

(* \intf Add a dict entry, replacing if there. Also works for streams. *)
let rec add_dict_entry dict key value =
  match dict with
  | Dictionary d -> Dictionary (add key value d)
  | Stream ({contents = (dict', stream)} as s) ->
      s := (add_dict_entry dict' key value, stream);
      Stream s
  | _ -> raise (PDFError "add_dict_entry: not a dictionary.")

(* Find the contents of a stream as a bytestream. *)
let rec bigarray_of_stream s =
  getstream s;
  match s with
  | Stream {contents = _, Got bytestream} -> bytestream
  | _ -> raise (PDFError "couldn't extract raw stream")

(* \intf Given a dictionary and a prefix (e.g gs), return a name, starting with the
prefix, which is not already in the dictionary (e.g /gs0). *)
let unique_key prefix obj =
  let elts = match obj with
    | Dictionary es
    | Stream {contents = Dictionary es, _} -> es
    | _ -> raise (PDFError "unique_key: Not a dictionary or stream")
  in
    let names = fst (split elts)
    and name_of_num n = "/" ^ prefix ^ string_of_int n
    and num = ref 0 in
      while mem (name_of_num !num) names do incr num done;
      name_of_num !num

(* \intf Given a PDF and potential filename, calculate an MD5 string and build a
suitable /ID entry from it. *)
let generate_id (pdf : pdfdoc) (path : string) =
  (*IF-OCAML*)
  let gettime () = Unix.gettimeofday () in
  (*ENDIF-OCAML*)
  (*F#
  let gettime () = Sys.time () in
  F#*)
  let d =
    digest (path ^ string_of_float (gettime ()))
  in
    Array [String d; String d]

(* Find all the indirect numbers reachable from an entry in a dictionary,
including the indirect of that dictionary entry, if it's an indirect. *)
let reference_numbers_of_dict_entry pdf dict entry =
  match dict with
  | Dictionary d ->
      begin match lookup entry d with
      | Some x -> objects_referenced [] [] pdf x
      | None ->
          raise (PDFError "reference_numbers_of_dict_entry: no entry")
      end
  | _ ->
      raise (PDFError "reference_numbers_of_dict_entry: not a dictionary")

(* Find the indirect reference given by the value associated with a key in a
dictionary. *)
let find_indirect key dict =
  match dict with
  | Dictionary d ->
      begin match lookup key d with
      | Some (Indirect i) -> Some i
      | _ -> None
      end
  | _ -> raise (PDFError "find_indirect: not a dictionary")

(* Name tree functionality *)

(* Look something up in a name tree. *)
let rec nametree_lookup_kids pdf k = function
  | Array (h::t) ->
      begin match nametree_lookup pdf k h with
      | None ->
          nametree_lookup_kids pdf k (Array t)
      | Some result -> Some result
      end
  | Array [] -> None
  | _ -> raise (PDFError "malformed name tree")

and array_lookup pdf k = function
  | Array elts ->
      lookup k (pairs_of_list elts)
  | _ -> raise (PDFError "Bad lookup array")

and nametree_lookup pdf k dict =
  match lookup_direct pdf "/Limits" dict with
  | Some (Array [l;r]) ->
      if k < l || k > r then None else
      begin match lookup_direct pdf "/Kids" dict with
      | Some kids ->
          (* Intermediate node *)
          nametree_lookup_kids pdf k kids
      | None ->
          match lookup_direct pdf "/Names" dict with
          | Some names ->
              (* leaf node *)
              array_lookup pdf k names
          | None ->
              raise (PDFError "Malformed name tree entry")
      end
  | None ->
      begin match lookup_direct pdf "/Kids" dict with
      | Some kids ->
          (* Root node with kids *)
          nametree_lookup_kids pdf k kids
      | None ->
          match lookup_direct pdf "/Names" dict with
          | Some names ->
              (* Root node with names *)
              array_lookup pdf k names
          | None ->
              raise (PDFError "Missing name tree entry")
      end
  | _ -> raise (PDFError "Malformed name tree")

(* Return an ordered list of all the (k, v) pairs in a tree *)
let rec contents_of_nametree pdf tree =
  match lookup_direct pdf "/Names" tree with
  | Some (Array names) ->
      let rec pairs_of_list prev = function
      | [] -> rev prev
      | [_] -> raise (PDFError "contents_of_nametree: bad /Names")
      | k::v::r -> pairs_of_list ((k, v)::prev) r
      in
        pairs_of_list [] names
  | _ ->
      match lookup_direct pdf "/Kids" tree with
      | Some (Array kids) ->
          flatten (map (contents_of_nametree pdf) kids)
      | _ -> raise (PDFError "contents_of_nametree: neither names nor kids")

let copy_pdf from =
  {major = from.major;
   minor = from.minor;
   root = from.root;
   objects = from.objects;
   trailerdict = from.trailerdict}

let deep_copy from =
  let pdf = copy_pdf from in
    objmap
      (function
       | Stream {contents = (dict, Got stream)} -> Stream (ref (dict, Got (copystream stream)))
       | x -> x)
      pdf;
    pdf

