donb-hf commited on
Commit
e66e19e
1 Parent(s): 79cf287

add slider for page size

Browse files
Files changed (2) hide show
  1. app.py +3 -1
  2. arxiv.ipynb +119 -0
app.py CHANGED
@@ -38,6 +38,7 @@ def handle_dataset_view(page: int = 1, page_size: int = 10) -> Dict[str, Any]:
38
  result = {
39
  "total_records": total_records,
40
  "current_page": page,
 
41
  "records": records
42
  }
43
  logging.info(f"Returning result: {result}")
@@ -70,12 +71,13 @@ with gr.Blocks() as demo:
70
 
71
  with gr.Tab("View Dataset"):
72
  page_number = gr.Number(value=1, label="Page Number", precision=0)
 
73
  refresh_button = gr.Button("Refresh Dataset View")
74
  dataset_info = gr.JSON(label="Dataset Info")
75
 
76
  refresh_button.click(
77
  fn=handle_dataset_view,
78
- inputs=[page_number],
79
  outputs=dataset_info
80
  )
81
 
 
38
  result = {
39
  "total_records": total_records,
40
  "current_page": page,
41
+ "page_size": page_size,
42
  "records": records
43
  }
44
  logging.info(f"Returning result: {result}")
 
71
 
72
  with gr.Tab("View Dataset"):
73
  page_number = gr.Number(value=1, label="Page Number", precision=0)
74
+ page_size = gr.Slider(minimum=5, maximum=50, value=10, step=5, label="Page Size")
75
  refresh_button = gr.Button("Refresh Dataset View")
76
  dataset_info = gr.JSON(label="Dataset Info")
77
 
78
  refresh_button.click(
79
  fn=handle_dataset_view,
80
+ inputs=[page_number, page_size],
81
  outputs=dataset_info
82
  )
83
 
arxiv.ipynb ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import arxiv\n",
10
+ "\n",
11
+ "client = arxiv.Client(delay_seconds=3, num_retries=3)\n",
12
+ "\n",
13
+ "\n",
14
+ "max_results: int = 10\n",
15
+ "\n",
16
+ "search = arxiv.Search(\n",
17
+ " query=\"2304.08485\", \n",
18
+ " max_results=max_results, \n",
19
+ " sort_by=arxiv.SortCriterion.SubmittedDate\n",
20
+ " )"
21
+ ]
22
+ },
23
+ {
24
+ "cell_type": "code",
25
+ "execution_count": 2,
26
+ "metadata": {},
27
+ "outputs": [
28
+ {
29
+ "name": "stdout",
30
+ "output_type": "stream",
31
+ "text": [
32
+ "arxiv.Search(query='2304.08485', id_list=[], max_results=10, sort_by=<SortCriterion.SubmittedDate: 'submittedDate'>, sort_order=<SortOrder.Descending: 'descending'>)\n"
33
+ ]
34
+ }
35
+ ],
36
+ "source": [
37
+ "print(search)"
38
+ ]
39
+ },
40
+ {
41
+ "cell_type": "code",
42
+ "execution_count": 24,
43
+ "metadata": {},
44
+ "outputs": [
45
+ {
46
+ "name": "stdout",
47
+ "output_type": "stream",
48
+ "text": [
49
+ "entry_id: http://arxiv.org/abs/2304.08485v2\n",
50
+ "updated: 2023-12-11 17:46:14+00:00\n",
51
+ "published: 2023-04-17 17:59:25+00:00\n",
52
+ "title: Visual Instruction Tuning\n",
53
+ "authors: [arxiv.Result.Author('Haotian Liu'), arxiv.Result.Author('Chunyuan Li'), arxiv.Result.Author('Qingyang Wu'), arxiv.Result.Author('Yong Jae Lee')]\n",
54
+ "summary: Instruction tuning large language models (LLMs) using machine-generated\n",
55
+ "instruction-following data has improved zero-shot capabilities on new tasks,\n",
56
+ "but the idea is less explored in the multimodal field. In this paper, we\n",
57
+ "present the first attempt to use language-only GPT-4 to generate multimodal\n",
58
+ "language-image instruction-following data. By instruction tuning on such\n",
59
+ "generated data, we introduce LLaVA: Large Language and Vision Assistant, an\n",
60
+ "end-to-end trained large multimodal model that connects a vision encoder and\n",
61
+ "LLM for general-purpose visual and language understanding.Our early experiments\n",
62
+ "show that LLaVA demonstrates impressive multimodel chat abilities, sometimes\n",
63
+ "exhibiting the behaviors of multimodal GPT-4 on unseen images/instructions, and\n",
64
+ "yields a 85.1% relative score compared with GPT-4 on a synthetic multimodal\n",
65
+ "instruction-following dataset. When fine-tuned on Science QA, the synergy of\n",
66
+ "LLaVA and GPT-4 achieves a new state-of-the-art accuracy of 92.53%. We make\n",
67
+ "GPT-4 generated visual instruction tuning data, our model and code base\n",
68
+ "publicly available.\n",
69
+ "comment: NeurIPS 2023 Oral; project page: https://llava-vl.github.io/\n",
70
+ "journal_ref: None\n",
71
+ "doi: None\n",
72
+ "primary_category: cs.CV\n",
73
+ "categories: ['cs.CV', 'cs.AI', 'cs.CL', 'cs.LG']\n",
74
+ "links: [arxiv.Result.Link('http://arxiv.org/abs/2304.08485v2', title=None, rel='alternate', content_type=None), arxiv.Result.Link('http://arxiv.org/pdf/2304.08485v2', title='pdf', rel='related', content_type=None)]\n",
75
+ "pdf_url: http://arxiv.org/pdf/2304.08485v2\n",
76
+ "_raw: {'id': 'http://arxiv.org/abs/2304.08485v2', 'guidislink': True, 'link': 'http://arxiv.org/abs/2304.08485v2', 'updated': '2023-12-11T17:46:14Z', 'updated_parsed': time.struct_time(tm_year=2023, tm_mon=12, tm_mday=11, tm_hour=17, tm_min=46, tm_sec=14, tm_wday=0, tm_yday=345, tm_isdst=0), 'published': '2023-04-17T17:59:25Z', 'published_parsed': time.struct_time(tm_year=2023, tm_mon=4, tm_mday=17, tm_hour=17, tm_min=59, tm_sec=25, tm_wday=0, tm_yday=107, tm_isdst=0), 'title': 'Visual Instruction Tuning', 'title_detail': {'type': 'text/plain', 'language': None, 'base': '', 'value': 'Visual Instruction Tuning'}, 'summary': 'Instruction tuning large language models (LLMs) using machine-generated\\ninstruction-following data has improved zero-shot capabilities on new tasks,\\nbut the idea is less explored in the multimodal field. In this paper, we\\npresent the first attempt to use language-only GPT-4 to generate multimodal\\nlanguage-image instruction-following data. By instruction tuning on such\\ngenerated data, we introduce LLaVA: Large Language and Vision Assistant, an\\nend-to-end trained large multimodal model that connects a vision encoder and\\nLLM for general-purpose visual and language understanding.Our early experiments\\nshow that LLaVA demonstrates impressive multimodel chat abilities, sometimes\\nexhibiting the behaviors of multimodal GPT-4 on unseen images/instructions, and\\nyields a 85.1% relative score compared with GPT-4 on a synthetic multimodal\\ninstruction-following dataset. When fine-tuned on Science QA, the synergy of\\nLLaVA and GPT-4 achieves a new state-of-the-art accuracy of 92.53%. We make\\nGPT-4 generated visual instruction tuning data, our model and code base\\npublicly available.', 'summary_detail': {'type': 'text/plain', 'language': None, 'base': '', 'value': 'Instruction tuning large language models (LLMs) using machine-generated\\ninstruction-following data has improved zero-shot capabilities on new tasks,\\nbut the idea is less explored in the multimodal field. In this paper, we\\npresent the first attempt to use language-only GPT-4 to generate multimodal\\nlanguage-image instruction-following data. By instruction tuning on such\\ngenerated data, we introduce LLaVA: Large Language and Vision Assistant, an\\nend-to-end trained large multimodal model that connects a vision encoder and\\nLLM for general-purpose visual and language understanding.Our early experiments\\nshow that LLaVA demonstrates impressive multimodel chat abilities, sometimes\\nexhibiting the behaviors of multimodal GPT-4 on unseen images/instructions, and\\nyields a 85.1% relative score compared with GPT-4 on a synthetic multimodal\\ninstruction-following dataset. When fine-tuned on Science QA, the synergy of\\nLLaVA and GPT-4 achieves a new state-of-the-art accuracy of 92.53%. We make\\nGPT-4 generated visual instruction tuning data, our model and code base\\npublicly available.'}, 'authors': [{'name': 'Haotian Liu'}, {'name': 'Chunyuan Li'}, {'name': 'Qingyang Wu'}, {'name': 'Yong Jae Lee'}], 'author_detail': {'name': 'Yong Jae Lee'}, 'author': 'Yong Jae Lee', 'arxiv_comment': 'NeurIPS 2023 Oral; project page: https://llava-vl.github.io/', 'links': [{'href': 'http://arxiv.org/abs/2304.08485v2', 'rel': 'alternate', 'type': 'text/html'}, {'title': 'pdf', 'href': 'http://arxiv.org/pdf/2304.08485v2', 'rel': 'related', 'type': 'application/pdf'}], 'arxiv_primary_category': {'term': 'cs.CV', 'scheme': 'http://arxiv.org/schemas/atom'}, 'tags': [{'term': 'cs.CV', 'scheme': 'http://arxiv.org/schemas/atom', 'label': None}, {'term': 'cs.AI', 'scheme': 'http://arxiv.org/schemas/atom', 'label': None}, {'term': 'cs.CL', 'scheme': 'http://arxiv.org/schemas/atom', 'label': None}, {'term': 'cs.LG', 'scheme': 'http://arxiv.org/schemas/atom', 'label': None}]}\n"
77
+ ]
78
+ }
79
+ ],
80
+ "source": [
81
+ "results = []\n",
82
+ "for result in client.results(search):\n",
83
+ " results.append(result)\n",
84
+ " # print all key value pairs in \"key: value\" format\n",
85
+ " for key, value in vars(result).items():\n",
86
+ " print(f\"{key}: {value}\")\n",
87
+ "\n"
88
+ ]
89
+ },
90
+ {
91
+ "cell_type": "code",
92
+ "execution_count": null,
93
+ "metadata": {},
94
+ "outputs": [],
95
+ "source": []
96
+ }
97
+ ],
98
+ "metadata": {
99
+ "kernelspec": {
100
+ "display_name": ".venv",
101
+ "language": "python",
102
+ "name": "python3"
103
+ },
104
+ "language_info": {
105
+ "codemirror_mode": {
106
+ "name": "ipython",
107
+ "version": 3
108
+ },
109
+ "file_extension": ".py",
110
+ "mimetype": "text/x-python",
111
+ "name": "python",
112
+ "nbconvert_exporter": "python",
113
+ "pygments_lexer": "ipython3",
114
+ "version": "3.10.13"
115
+ }
116
+ },
117
+ "nbformat": 4,
118
+ "nbformat_minor": 2
119
+ }