{ "cells": [ { "cell_type": "code", "execution_count": 2, "id": "53755bb3", "metadata": {}, "outputs": [], "source": [ "from delta import *\n", "import pyspark\n", "\n", "builder = pyspark.sql.SparkSession.builder.appName(\"MyApp\") \\\n", " .config(\"spark.sql.extensions\", \"io.delta.sql.DeltaSparkSessionExtension\") \\\n", " .config(\"spark.sql.catalog.spark_catalog\", \"org.apache.spark.sql.delta.catalog.DeltaCatalog\")\n", "\n", "spark = configure_spark_with_delta_pip(builder).getOrCreate()" ] }, { "cell_type": "code", "execution_count": 2, "id": "a9f65e85", "metadata": {}, "outputs": [], "source": [ "df1=spark.read.json('/home/celine/ipython-in-depth/btd2.json')" ] }, { "cell_type": "code", "execution_count": 3, "id": "73570cd7", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "root\n", " |-- image: struct (nullable = true)\n", " | |-- origin: string (nullable = true)\n", " | |-- height: integer (nullable = true)\n", " | |-- width: integer (nullable = true)\n", " | |-- nChannels: integer (nullable = true)\n", " | |-- mode: integer (nullable = true)\n", " | |-- data: binary (nullable = true)\n", "\n" ] } ], "source": [ "image1=spark.read.format(\"image\") \\\n", " .option(\"recursiveFileLookup\", \"true\") \\\n", " .option(\"pathGlobFilter\", \"*.png\") \\\n", " .load(\"/home/celine/Bilder/zwischendatein/pc\")\n", "image2=spark.read.format(\"image\") \\\n", " .option(\"recursiveFileLookup\", \"true\") \\\n", " .option(\"pathGlobFilter\", \"*.png\") \\\n", " .load(\"/home/celine/Bilder/zwischendatein/p\")\n", "image1.printSchema()" ] }, { "cell_type": "code", "execution_count": 4, "id": "1ff444a6", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | Bike # | \n", "Duration | \n", "End Date | \n", "End Station | \n", "End Terminal | \n", "Start Date | \n", "Start Station | \n", "Start Terminal | \n", "Subscription Type | \n", "Trip ID | \n", "Zip Code | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "520 | \n", "63 | \n", "8/29/13 14:14 | \n", "South Van Ness at Market | \n", "66 | \n", "8/29/13 14:13 | \n", "South Van Ness at Market | \n", "66 | \n", "Subscriber | \n", "4576 | \n", "94127 | \n", "
1 | \n", "661 | \n", "70 | \n", "8/29/13 14:43 | \n", "San Jose City Hall | \n", "10 | \n", "8/29/13 14:42 | \n", "San Jose City Hall | \n", "10 | \n", "Subscriber | \n", "4607 | \n", "95138 | \n", "
2 | \n", "48 | \n", "71 | \n", "8/29/13 10:17 | \n", "Mountain View City Hall | \n", "27 | \n", "8/29/13 10:16 | \n", "Mountain View City Hall | \n", "27 | \n", "Subscriber | \n", "4130 | \n", "97214 | \n", "
3 | \n", "26 | \n", "77 | \n", "8/29/13 11:30 | \n", "San Jose City Hall | \n", "10 | \n", "8/29/13 11:29 | \n", "San Jose City Hall | \n", "10 | \n", "Subscriber | \n", "4251 | \n", "95060 | \n", "
4 | \n", "319 | \n", "83 | \n", "8/29/13 12:04 | \n", "Market at 10th | \n", "67 | \n", "8/29/13 12:02 | \n", "South Van Ness at Market | \n", "66 | \n", "Subscriber | \n", "4299 | \n", "94103 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
144010 | \n", "483 | \n", "385 | \n", "2/28/14 22:22 | \n", "South Van Ness at Market | \n", "66 | \n", "2/28/14 22:15 | \n", "Powell Street BART | \n", "53 | \n", "Subscriber | \n", "198771 | \n", "94404 | \n", "
144011 | \n", "425 | \n", "145 | \n", "2/28/14 22:40 | \n", "Davis at Jackson | \n", "42 | \n", "2/28/14 22:38 | \n", "Commercial at Montgomery | \n", "45 | \n", "Subscriber | \n", "198772 | \n", "94111 | \n", "
144012 | \n", "438 | \n", "677 | \n", "2/28/14 22:56 | \n", "Market at 4th | \n", "76 | \n", "2/28/14 22:45 | \n", "Embarcadero at Sansome | \n", "60 | \n", "Subscriber | \n", "198773 | \n", "94102 | \n", "
144013 | \n", "414 | \n", "64128 | \n", "3/1/14 16:50 | \n", "Harry Bridges Plaza (Ferry Building) | \n", "50 | \n", "2/28/14 23:01 | \n", "Civic Center BART (7th at Market) | \n", "72 | \n", "Customer | \n", "198774 | \n", "94124 | \n", "
144014 | \n", "577 | \n", "570 | \n", "2/28/14 23:30 | \n", "Townsend at 7th | \n", "65 | \n", "2/28/14 23:20 | \n", "2nd at South Park | \n", "64 | \n", "Subscriber | \n", "198775 | \n", "94107 | \n", "
144015 rows × 11 columns
\n", "\n", " | origin | \n", "height | \n", "width | \n", "nChannels | \n", "mode | \n", "data | \n", "
---|---|---|---|---|---|---|
0 | \n", "file:///home/celine/Bilder/zwischendatein/pc/e... | \n", "1753 | \n", "2480 | \n", "4 | \n", "24 | \n", "[158, 106, 82, 165, 158, 106, 82, 165, 158, 10... | \n", "
1 | \n", "file:///home/celine/Bilder/zwischendatein/pc/e... | \n", "1753 | \n", "2480 | \n", "4 | \n", "24 | \n", "[179, 239, 255, 4, 179, 239, 255, 4, 179, 239,... | \n", "
2 | \n", "file:///home/celine/Bilder/zwischendatein/pc/e... | \n", "2480 | \n", "1753 | \n", "4 | \n", "24 | \n", "[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... | \n", "
3 | \n", "file:///home/celine/Bilder/zwischendatein/pc/e... | \n", "2480 | \n", "1753 | \n", "4 | \n", "24 | \n", "[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... | \n", "
4 | \n", "file:///home/celine/Bilder/zwischendatein/pc/e... | \n", "2480 | \n", "1753 | \n", "4 | \n", "24 | \n", "[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... | \n", "
5 | \n", "file:///home/celine/Bilder/zwischendatein/pc/e... | \n", "2480 | \n", "1753 | \n", "4 | \n", "24 | \n", "[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... | \n", "
6 | \n", "file:///home/celine/Bilder/zwischendatein/pc/e... | \n", "2480 | \n", "1753 | \n", "4 | \n", "24 | \n", "[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... | \n", "
7 | \n", "file:///home/celine/Bilder/zwischendatein/pc/e... | \n", "2480 | \n", "1753 | \n", "4 | \n", "24 | \n", "[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... | \n", "
8 | \n", "file:///home/celine/Bilder/zwischendatein/pc/e... | \n", "2480 | \n", "1753 | \n", "4 | \n", "24 | \n", "[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... | \n", "
9 | \n", "file:///home/celine/Bilder/zwischendatein/pc/e... | \n", "1753 | \n", "2480 | \n", "4 | \n", "24 | \n", "[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... | \n", "
10 | \n", "file:///home/celine/Bilder/zwischendatein/pc/e... | \n", "2480 | \n", "1753 | \n", "4 | \n", "24 | \n", "[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... | \n", "
11 | \n", "file:///home/celine/Bilder/zwischendatein/pc/e... | \n", "2480 | \n", "1753 | \n", "4 | \n", "24 | \n", "[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... | \n", "
12 | \n", "file:///home/celine/Bilder/zwischendatein/pc/e... | \n", "1753 | \n", "2480 | \n", "4 | \n", "24 | \n", "[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... | \n", "
13 | \n", "file:///home/celine/Bilder/zwischendatein/pc/e... | \n", "2480 | \n", "1753 | \n", "4 | \n", "24 | \n", "[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... | \n", "
14 | \n", "file:///home/celine/Bilder/zwischendatein/pc/e... | \n", "1753 | \n", "2480 | \n", "3 | \n", "16 | \n", "[148, 95, 74, 148, 95, 74, 148, 95, 74, 148, 9... | \n", "
15 | \n", "file:///home/celine/Bilder/zwischendatein/pc/e... | \n", "2480 | \n", "1753 | \n", "3 | \n", "16 | \n", "[227, 249, 255, 227, 249, 255, 227, 249, 255, ... | \n", "