5. Photographers at the Museum

The V&A began acquiring photographs in 1852, and its collection is now one of the largest and most important in the world. Let’s take a look at which photographers are held in the (catalogued) collection.

To query the photographers in the V&A collections, we need to query object types of “photograph” and “photographs” (this is due to variations in cataloguing names) and cluster the results by ‘maker’. We then show a treemap visualisation of the top 50 results for each.

For a record of the results, we also generate a sample of the photographers and some of their works as a PDF.

The following code (click to expand) setups the treemap drawing

from IPython.display import display

def Vega(spec):
    bundle = {}
    bundle['application/vnd.vega.v5+json'] = spec
    display(bundle, raw=True)

def treemap(clusters, cluster_name):
    
  clusters_json = [{"id": index+1, "name": [x["value"], "%d objects" % x["count"]], "parent": '0', "value": x["count"]} for index, x in enumerate(clusters)]
  clusters_json.insert(0, {"id": 0, "value": 0, "name": cluster_name})
    
  Vega({
  "$schema": "https://vega.github.io/schema/vega/v5.json",
  "description": "An example of treemap layout for hierarchical data.",
  "width": 1200,
  "height": 800,
  "padding": 2.5,
  "autosize": "none",
  "data": [
    {
      "name": "tree",
      "values": clusters_json,
      "transform": [
        {
          "type": "stratify",
          "key": "id",
          "parentKey": "parent"
        },
        {
          "type": "treemap",
          "field": "value",
          "sort": {"field": "value", "order": "descending"},
          "round": True,
          "size": [{"signal": "width"}, {"signal": "height"}]
        }
      ]
    },
    {
      "name": "nodes",
      "source": "tree",
      "transform": [{ "type": "filter", "expr": "datum.children" }]
    },
    {
      "name": "leaves",
      "source": "tree",
      "transform": [{ "type": "filter", "expr": "datum.parent == 0" }]
    }
  ],

  "scales": [
    {
      "name": "color",
      "type": "ordinal",
      "domain": {"data": "nodes", "field": "name"},
      "range": [
        "#3182bd", "#6baed6", "#9ecae1", "#c6dbef", "#e6550d",
        "#fd8d3c", "#fdae6b", "#fdd0a2", "#31a354", "#74c476",
        "#a1d99b", "#c7e9c0", "#756bb1", "#9e9ac8", "#bcbddc",
        "#dadaeb", "#636363", "#969696", "#bdbdbd", "#d9d9d9"
      ]
    },
    {
      "name": "size",
      "type": "ordinal",
      "domain": [0, 1, 2, 3],
      "range": [256, 10, 20, 14]
    },
    {
      "name": "opacity",
      "type": "ordinal",
      "domain": [0, 1, 2, 3],
      "range": [0.15, 0.5, 0.8, 1.0]
    }
  ],

  "marks": [
    {
      "type": "rect",
      "from": {"data": "nodes"},
      "interactive": False,
      "encode": {
        "enter": {
          "fill": {"scale": "color", "field": "name"}
        },
        "update": {
          "x": {"field": "x0"},
          "y": {"field": "y0"},
          "x2": {"field": "x1"},
          "y2": {"field": "y1"}
        }
      }
    },
    {
      "type": "rect",
      "from": {"data": "leaves"},
      "encode": {
        "enter": {
          "stroke": {"value": "#fff"}
        },
        "update": {
          "x": {"field": "x0"},
          "y": {"field": "y0"},
          "x2": {"field": "x1"},
          "y2": {"field": "y1"},
          "fill": {"value": "transparent"},
          "href": {"value": "https://collections.vam.ac.uk/"}
        },
        "hover": {
          "fill": {"value": "green"}
        }
      }
    },
    {
      "type": "text",
      "from": {"data": "leaves"},
      "interactive": False,
      "encode": {
        "enter": {
          "font": {"value": "Helvetica Neue, Arial"},
          "align": {"value": "center"},
          "baseline": {"value": "middle"},
          "fill": {"value": "#000"},
          "text": {"field": "name"},
          "fontSize": {"scale": "size", "field": "depth"},
          "fillOpacity": {"scale": "opacity", "field": "depth"}
        },
        "update": {
          "x": {"signal": "0.5 * (datum.x0 + datum.x1)"},
          "y": {"signal": "0.5 * (datum.y0 + datum.y1)"}
        }
      }
    }
  ]
}
)

Data Visualisation

Now we query the API for the object types and show them as a treemap

Photograph

import requests
req = requests.get('https://api.vam.ac.uk/v2/objects/clusters/maker/search?kw_object_type=Photograph&cluster_size=50')
treemap(req.json(), "Photographers")

Photographs

import requests
req = requests.get('https://api.vam.ac.uk/v2/objects/clusters/maker/search?kw_object_type=Photographs&cluster_size=50')
treemap(req.json(), "Photographer")

Saving as CSV

At present we do not have a custom CSV response for cluster endpoints, this is something that might be added in a future version of the API. The response at the moment will return the identifier (‘id’), a descriptive term (‘value’) and the count of matching object records (‘count’).

import pandas as pd
df_photograph = pd.read_json('https://api.vam.ac.uk/v2/objects/clusters/maker/search?kw_object_type=Photograph&cluster_size=100', orient='records')
df_photograph["link"] = "https://collections.vam.ac.uk/search/?id_person=" + df_photograph['id']
df_photograph.to_csv("photograph-top-100-link.csv")
df_photograph.head(10)
id value count count_max_error link
0 A1848 Unknown 11846 0 https://collections.vam.ac.uk/search/?id_perso...
1 A6403 Frith, Francis 4153 0 https://collections.vam.ac.uk/search/?id_perso...
2 AUTH334543 K.A.C. Creswell 3335 0 https://collections.vam.ac.uk/search/?id_perso...
3 N3687 Thompson, Charles Thurston 2244 0 https://collections.vam.ac.uk/search/?id_perso...
4 A5970 London Stereoscopic and Photographic Company 1703 0 https://collections.vam.ac.uk/search/?id_perso...
5 A4801 Stone, Benjamin Sir 1532 0 https://collections.vam.ac.uk/search/?id_perso...
6 AUTH325233 Parker, John Henry 1449 0 https://collections.vam.ac.uk/search/?id_perso...
7 AUTH335751 Thompson, Stephen 1265 0 https://collections.vam.ac.uk/search/?id_perso...
8 A4798 Scamell, George Mr 1035 0 https://collections.vam.ac.uk/search/?id_perso...
9 A5902 Beaton, Cecil (Sir) 959 0 https://collections.vam.ac.uk/search/?id_perso...
import pandas as pd
df_photographs = pd.read_json('https://api.vam.ac.uk/v2/objects/clusters/maker/search?kw_object_type=photographs&cluster_size=100', orient='records')
df_photographs["link"] = "https://collections.vam.ac.uk/search/?id_person=" + df_photographs['id']
df_photographs.to_csv("photographs-top-100-link.csv")
import requests
req = requests.get('https://api.vam.ac.uk/v2/objects/clusters/maker/search?kw_object_type=Photograph&cluster_size=50')
top100_photographers = req.json()

Generating PDF sample

A possibly useful way to look at the data, for those who don’t want to just see spreadsheets and data tables, is to construct a PDF with a sample of objects for each photograph. This code (drawn from reportlab documentation) generates a very simple PDF with images of 5 objects for the first 10 photographers.

from reportlab.pdfgen import canvas
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Image, Table, TableStyle
from reportlab.lib.styles import getSampleStyleSheet
from reportlab.rl_config import defaultPageSize
from reportlab.lib.units import inch
import PIL
from io import BytesIO
import requests
import pandas as pd

PAGE_HEIGHT=defaultPageSize[1]; 
PAGE_WIDTH=defaultPageSize[0]
styles = getSampleStyleSheet()
top100_photographs = ""

Title = "V&A Photographers - Top 10"
pageinfo = "vam-api-data-exploration-5"

def myFirstPage(canvas, doc): 
    canvas.saveState() 
    canvas.setFont('Times-Bold',16)  
    canvas.drawCentredString(PAGE_WIDTH/2.0, PAGE_HEIGHT-108, Title) 
    canvas.setFont('Times-Roman',9) 
    canvas.drawString(inch, 0.75 * inch, "First Page / %s" % pageinfo) 
    canvas.restoreState()

def myLaterPages(canvas, doc): 
    canvas.saveState()  
    canvas.setFont('Times-Roman',9)   
    canvas.drawString(inch, 0.75 * inch, "Page %d %s" % (doc.page, pageinfo)) 
    canvas.restoreState()

chart_style = TableStyle([('ALIGN', (0, 0), (-1, -1), 'CENTER')])

def build_doc():  
    doc = SimpleDocTemplate("photographers-samples.pdf") 
    Story = [Spacer(1,2*inch)]  
    style = styles["Normal"]  
    
    i = 0
    for photographer in top100_photographers:
        photographer_name = photographer['value']   
        photographer_id = photographer['id']
        
        p = Paragraph(photographer_name, style)     
        Story.append(p)   
        Story.append(Spacer(1,0.4*inch)) 
        
        # Retrieve an image to show, update URL to point directly to thumbnail derivative
        query_url = "https://api.vam.ac.uk/v2/objects/search?id_maker=%s&images_exist=1&response_format=csv&page_size=20" % photographer_id
        photograph_objects = pd.read_csv(query_url)
        IIIF_IMAGE_URL = "https://framemark.vam.ac.uk/collections/%s/full/!100,100/0/default.jpg"
        photograph_objects._primaryImageId = [IIIF_IMAGE_URL % item for item in photograph_objects._primaryImageId]      

        r = requests.get(photograph_objects.iloc[0]._primaryImageId)
        image1 = Image(BytesIO(r.content), width=inch, height=inch)
        if(photograph_objects.iloc[0]._primaryTitle != "nan"):
          object_link = Paragraph('<link href="https://collections.vam.ac.uk/item/%s">%s</link>' % (photograph_objects.iloc[0].systemNumber, photograph_objects.iloc[0]._primaryTitle))
        else:
          object_link = Paragraph('<link href="https://collections.vam.ac.uk/item/%s">%s</link>' % (photograph_objects.iloc[0].systemNumber, photograph_objects.iloc[0].objectType))
        
        r = requests.get(photograph_objects.iloc[1]._primaryImageId)
        image2 = Image(BytesIO(r.content), width=inch, height=inch)
        
        r = requests.get(photograph_objects.iloc[2]._primaryImageId)
        image3 = Image(BytesIO(r.content), width=inch, height=inch)
        
        r = requests.get(photograph_objects.iloc[3]._primaryImageId)
        image4 = Image(BytesIO(r.content), width=inch, height=inch)
        
        r = requests.get(photograph_objects.iloc[4]._primaryImageId)
        image5 = Image(BytesIO(r.content), width=inch, height=inch)
        
        Story.append(Table([[image1, image2, image3, image4, image5], [object_link, object_link, object_link, object_link, object_link]],
                     colWidths=[inch, inch, inch, inch, inch],
                     rowHeights=[ 0.75*inch, 0.25*inch], style=chart_style))
        
        Story.append(Spacer(1,0.2*inch)) 
        if i > 9:
            break
        else:
            i += 1
      
    doc.build(Story, onFirstPage=myFirstPage, onLaterPages=myLaterPages)
 
req = requests.get('https://api.vam.ac.uk/v2/objects/clusters/maker/search?kw_object_type=Photograph&cluster_size=50')
top100_photographers = req.json()

build_doc()
    
# DO NOT REMOVE This code is just here to force Jupyterbook to generate the diagrams as PNGs above. It's hidden from the output

import altair as alt
import plotly as px

data = px.data.iris()
data.head()

alt.Chart(data=data).mark_point().encode(
    x="sepal_width",
    y="sepal_length",
    color="species",
    size='sepal_length'
)
---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
<ipython-input-8-3f247e18ef93> in <module>
      2 
      3 import altair as alt
----> 4 import plotly as px
      5 
      6 data = px.data.iris()

ModuleNotFoundError: No module named 'plotly'